1 /**********************************************************************
2 conformersearch.cpp - Conformer searching using genetic algorithm.
3 
4 Copyright (C) 2010 Tim Vandermeersch
5 Some portions Copyright (C) 2016 Torsten Sachse
6 
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.org/>
9 
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19 
20 #include <openbabel/conformersearch.h>
21 #include <openbabel/math/align.h>
22 #include <openbabel/forcefield.h>
23 #include <openbabel/elements.h>
24 #include <openbabel/bond.h>
25 #include "rand.h"
26 #include <algorithm>
27 
28 #if defined(_MSC_VER) && (_MSC_VER < 1800)
29  #define OB_ISNAN _isnan
30 #else
31  #define OB_ISNAN std::isnan
32 #endif
33 
34 namespace OpenBabel {
35 
36   //////////////////////////////////////////////////////////
37   //
38   //  OBConformerFilter(s)
39   //
40   //////////////////////////////////////////////////////////
41 
OBStericConformerFilter()42   OBStericConformerFilter::OBStericConformerFilter ()
43   {
44     m_cutoff = 0.8;
45     m_vdw_factor = 0.5;
46     m_check_hydrogens = true;
47   }
48 
OBStericConformerFilter(double cutoff,double vdw_factor,bool check_hydrogens)49   OBStericConformerFilter::OBStericConformerFilter (double cutoff, double vdw_factor, bool check_hydrogens)
50   {
51     m_cutoff = cutoff * cutoff;
52     m_vdw_factor = vdw_factor;
53     m_check_hydrogens = check_hydrogens;
54   }
55 
~OBConformerFilter()56   OBConformerFilter::~OBConformerFilter() {}
57 
IsGood(const OBMol & mol,const RotorKey & key,double * conformer)58   bool OBStericConformerFilter::IsGood(const OBMol &mol, const RotorKey &key, double *conformer)
59   {
60     unsigned int a1 = 0, a2 = 0;
61     unsigned int numAtoms = mol.NumAtoms();
62     OBAtom *atom1 = nullptr, *atom2 = nullptr;
63     double dx = 0.0, dy = 0.0, dz = 0.0;
64     double distanceSquared = 0.0, vdwCutoff = 0.0;
65 
66     for (a1 = 0; a1 < numAtoms; ++a1) {
67       for (a2 = a1 + 1; a2 < numAtoms; ++a2) {
68         atom1 = mol.GetAtom(a1+1);
69         atom2 = mol.GetAtom(a2+1);
70         // Default should be to recognize H clashes too
71         if (!m_check_hydrogens  && (atom1->GetAtomicNum() == OBElements::Hydrogen || atom2->GetAtomicNum() == OBElements::Hydrogen ))
72           continue;
73 
74         // skip connected atoms
75         if (atom1->IsConnected(atom2))
76           continue;
77         // compute the distance
78         dx = conformer[a1*3  ] - conformer[a2*3  ];
79         dy = conformer[a1*3+1] - conformer[a2*3+1];
80         dz = conformer[a1*3+2] - conformer[a2*3+2];
81         distanceSquared = dx*dx + dy*dy + dz*dz;
82         // As we don't check 1-3 and 1-4 bonded atoms, apply a
83         // factor of to the sum of VdW radii
84         vdwCutoff = m_vdw_factor * (OBElements::GetVdwRad(atom1->GetAtomicNum())
85                                     + OBElements::GetVdwRad(atom2->GetAtomicNum()));
86         vdwCutoff *= vdwCutoff; // compare squared distances
87         //std::cout << vdwCutoff << " " << m_vdw_factor << " " << m_cutoff << " " << distanceSquared << std::endl;
88 
89         // check distance
90         if (distanceSquared < m_cutoff || distanceSquared < vdwCutoff)
91           return false;
92       }
93     }
94 
95     return true;
96   }
97 
98   //////////////////////////////////////////////////////////
99   //
100   //  OBConformerScore(s)
101   //
102   //////////////////////////////////////////////////////////
103 
~OBConformerScore()104   OBConformerScore::~OBConformerScore() {}
105 
Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)106   double OBRMSDConformerScore::Score(OBMol &mol, unsigned int index,
107                                      const RotorKeys &keys, const std::vector<double*> &conformers)
108   {
109     unsigned int numAtoms = mol.NumAtoms();
110 
111     // create the reference vector3 for conformer with index
112     double *conformer_i = conformers[index];
113     std::vector<vector3> vi;
114     for (unsigned int a = 0; a < numAtoms; ++a)
115       vi.push_back(vector3(conformer_i[a*3], conformer_i[a*3+1], conformer_i[a*3+2]));
116 
117     OBAlign align(mol, mol, false, false);
118     align.SetRef(vi);
119 
120     double score_min = 10e10;
121     for (unsigned int j = 0; j < conformers.size(); ++j) {
122       if (index == j)
123         continue;
124       double *conformer_j = conformers[j];
125       // create vector3 conformer
126       std::vector<vector3> vj;
127       for (unsigned int a = 0; a < numAtoms; ++a)
128         vj.push_back(vector3(conformer_j[a*3], conformer_j[a*3+1], conformer_j[a*3+2]));
129 
130       // perform Kabsch alignment
131       align.SetTarget(vj);
132       align.Align();
133 
134       // get the RMSD
135       double rmsd = align.GetRMSD();
136       // store the rmsd if it is lower than any of the previous
137       if (rmsd < score_min)
138         score_min = rmsd;
139     }
140 
141     // return the lowest RMSD
142     return score_min;
143   }
144 
Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)145   double OBEnergyConformerScore::Score(OBMol &mol, unsigned int index,
146                                        const RotorKeys &keys, const std::vector<double*> &conformers)
147   {
148     energy_nrequest++;
149     RotorKey cur_key = keys[index];
150     if (energy_map.size () > 0)
151       {
152         // Check that we haven't already computed this energy);
153         mapRotorEnergy::iterator it = energy_map.find (cur_key);
154         if (it != energy_map.end ())
155           return it->second;
156       }
157     energy_ncompute++;
158 
159     double *origCoords = mol.GetCoordinates();
160     // copy the original coordinates to coords
161     // copy the conformer coordinates to OBMol object
162     std::vector<double> coords(mol.NumAtoms() * 3);
163     for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) {
164       coords[i] = origCoords[i];
165       origCoords[i] = conformers[index][i];
166     }
167 
168     OBForceField *ff = OBForceField::FindType("MMFF94");
169     if (!ff->Setup(mol)) {
170       ff = OBForceField::FindType("UFF");
171       if (!ff->Setup(mol))
172         return 10e10;
173     }
174     double score = ff->Energy(false); // no gradients
175 
176     // copy original coordinates back
177     for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i)
178       origCoords[i] = coords[i];
179 
180     // Save that in the map
181     if (energy_map.size () < 50000)
182       energy_map[cur_key] = score;
183 
184     return score;
185   }
186 
Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)187   double OBMinimizingEnergyConformerScore::Score(OBMol &mol, unsigned int index,
188                                                  const RotorKeys &keys, const std::vector<double*> &conformers)
189   {
190     energy_nrequest++;
191     RotorKey cur_key = keys[index];
192     if (energy_map.size () > 0)
193       {
194         // Check that we haven't already computed this energy);
195         mapRotorEnergy::iterator it = energy_map.find (cur_key);
196         if (it != energy_map.end ())
197           return it->second;
198       }
199     energy_ncompute++;
200 
201     double *origCoords = mol.GetCoordinates();
202     // copy the original coordinates to coords
203     // copy the conformer coordinates to OBMol object
204     std::vector<double> coords(mol.NumAtoms() * 3);
205     for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) {
206       coords[i] = origCoords[i];
207       origCoords[i] = conformers[index][i];
208     }
209 
210     OBForceField *ff = OBForceField::FindType("MMFF94");
211     if (!ff->Setup(mol)) {
212       ff = OBForceField::FindType("UFF");
213       if (!ff->Setup(mol))
214         return 10e10;
215     }
216     ff->ConjugateGradients(50);
217     double score = ff->Energy(false); // no gradients
218 
219     // copy original coordinates back
220     for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i)
221       origCoords[i] = coords[i];
222 
223     // Save that in the map
224     if (energy_map.size () < 50000)
225       energy_map[cur_key] = score;
226 
227     return score;
228   }
229 
Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)230   double OBMinimizingRMSDConformerScore::Score(OBMol &mol, unsigned int index,
231                                                const RotorKeys &keys, const std::vector<double*> &conformers)
232   {
233     unsigned int numAtoms = mol.NumAtoms();
234     double *origCoords = mol.GetCoordinates();
235     // copy the original coordinates to coords
236     // copy the conformer coordinates to OBMol object
237     std::vector<double> coords(mol.NumAtoms() * 3);
238     for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) {
239       coords[i] = origCoords[i];
240       origCoords[i] = conformers[index][i];
241     }
242 
243     OBForceField *ff = OBForceField::FindType("MMFF94");
244     if (!ff->Setup(mol)) {
245       ff = OBForceField::FindType("UFF");
246       if (!ff->Setup(mol))
247         return 10e10;
248     }
249     ff->ConjugateGradients(50);
250     double score = ff->Energy(false); // no gradients
251 
252     // copy original coordinates back
253     for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i)
254       origCoords[i] = coords[i];
255 
256     double *conformer_i = conformers[index];
257     std::vector<vector3> vi;
258     for (unsigned int a = 0; a < numAtoms; ++a)
259       vi.push_back(vector3(conformer_i[a*3], conformer_i[a*3+1], conformer_i[a*3+2]));
260 
261     OBAlign align(mol, mol, false, false);
262     align.SetRef(vi);
263 
264     double score_min = 10e10;
265     for (unsigned int j = 0; j < conformers.size(); ++j) {
266       if (index == j)
267         continue;
268       double *conformer_j = conformers[j];
269       // create vector3 conformer
270       std::vector<vector3> vj;
271       for (unsigned int a = 0; a < numAtoms; ++a)
272         vj.push_back(vector3(conformer_j[a*3], conformer_j[a*3+1], conformer_j[a*3+2]));
273 
274       // perform Kabsch alignment
275       align.SetTarget(vj);
276       align.Align();
277 
278       // get the RMSD
279       double rmsd = align.GetRMSD();
280       // store the rmsd if it is lower than any of the previous
281       if (rmsd < score_min)
282         score_min = rmsd;
283     }
284 
285     // return the lowest RMSD
286     return score_min;
287   }
288 
289   //////////////////////////////////////////////////////////
290   //
291   //  OBConformerSearch
292   //
293   //////////////////////////////////////////////////////////
294 
295 
OBConformerSearch()296   OBConformerSearch::OBConformerSearch()
297   {
298     m_filter = static_cast<OBConformerFilter*>(new OBStericConformerFilter());
299     m_score = static_cast<OBConformerScore*>(new OBRMSDConformerScore());
300     //m_score = static_cast<OBConformerScore*>(new OBEnergyConformerScore());
301     m_numConformers = 30;
302     m_numChildren = 5;
303     m_mutability = 10;
304     // Set some inital values here, but will fine-tune some in the molecule Setup
305     use_sharing = false;
306     alpha_share = 2.0;
307     sigma_share = 2.0;
308     nb_niches = 5;
309     niche_radius = 1.0;
310     p_crossover = 0.7;
311     niche_mating = 0.7;
312     local_opt_rate = 3;
313     // For the moment 'd' is an opaque pointer to an instance of OBRandom*.
314     // In future, it could be a pointer to a structure storing all of the
315     // private variables.
316     d = (void*)new OBRandom();
317     ((OBRandom*)d)->TimeSeed();
318     m_logstream = &std::cout; 	// Default logging send to standard output
319     // m_logstream = NULL;
320     m_printrotors = false;  // By default, do not print rotors but perform the conformer search
321 
322   }
323 
~OBConformerSearch()324   OBConformerSearch::~OBConformerSearch()
325   {
326     delete m_filter;
327     delete m_score;
328     delete (OBRandom*)d;
329   }
330 
331 
Setup(const OBMol & mol,int numConformers,int numChildren,int mutability,int convergence)332   bool OBConformerSearch::Setup(const OBMol &mol, int numConformers, int numChildren, int mutability, int convergence)
333   {
334     int nb_rotors = 0;
335     // copy some variables
336     m_mol = mol;
337     m_numConformers = numConformers;
338     m_numChildren = numChildren;
339     m_mutability = mutability;
340     m_convergence = convergence;
341 
342     if (m_mol.GetCoordinates() == nullptr)
343       return false;
344 
345     // Initialize the OBRotorList
346     m_rotorList.SetFixedBonds(m_fixedBonds);
347     m_rotorList.Setup(m_mol);
348 
349     // Print all available rotors if so desired
350     if (m_printrotors){
351       OBRotorIterator it;
352       OBRotorIterator end_it = m_rotorList.EndRotors();
353       OBRotor* r = m_rotorList.BeginRotor(it);
354       int rotcount = 1;
355       std::cout << "Rotors:" << std::endl;
356       while(r){
357         OBBond* b = r->GetBond();
358         int at1,at2;
359         at1 = b->GetBeginAtomIdx();
360         at2 = b->GetEndAtomIdx();
361         std::cout << at1 << "-" << at2 << "  ";
362         r = m_rotorList.NextRotor(it);
363         if (rotcount%4==0 && r){std::cout << std::endl;}
364         ++rotcount;
365       }
366       std::cout << std::endl;
367       return false;
368     }
369     // Print those that are fixed
370     if (!m_fixedBonds.IsEmpty()){
371       std::cout << "Fixed Rotors: " << std::endl;
372       int end_it = m_fixedBonds.EndBit();
373       int it = m_fixedBonds.FirstBit();
374       int rotcount = 1;
375       while(it!=end_it){
376         OBBond* b = m_mol.GetBond(it);
377         int at1,at2;
378         at1 = b->GetBeginAtomIdx();
379         at2 = b->GetEndAtomIdx();
380         std::cout << at1 << "-" << at2 << "  ";
381         it = m_fixedBonds.NextBit(it);
382         if (rotcount%4==0 && it!=end_it){std::cout << std::endl;}
383         ++rotcount;
384       }
385       std::cout << std::endl;
386     }
387 
388     nb_rotors = m_rotorList.Size();
389     if (!nb_rotors) { // only one conformer
390       return false;
391     }
392 
393     // create initial population
394     OBRandom generator;
395     generator.TimeSeed();
396 
397     RotorKey rotorKey(m_rotorList.Size() + 1, 0); // indexed from 1
398     if (IsGood(rotorKey))
399       m_rotorKeys.push_back(rotorKey);
400     else {
401       if (m_logstream != nullptr)
402         (*m_logstream) << "Initial conformer does not pass filter!" << std::endl;
403     }
404 
405     int tries = 0, ndup = 0, nbad = 0;
406     while (m_rotorKeys.size() < m_numConformers && tries < numConformers * 1000) {
407       tries++;
408       // perform random mutation(s)
409       OBRotorIterator ri;
410       OBRotor *rotor = m_rotorList.BeginRotor(ri);
411       for (unsigned int i = 1; i < m_rotorList.Size() + 1; ++i, rotor = m_rotorList.NextRotor(ri)) {
412         if (generator.NextInt() % m_mutability == 0)
413           rotorKey[i] = generator.NextInt() % rotor->GetResolution().size();
414       }
415       // duplicates are always rejected
416       if (!IsUniqueKey(m_rotorKeys, rotorKey))
417         {
418           ndup++;
419           continue;
420         }
421       // execute filter(s)
422       if (!IsGood(rotorKey))
423         {
424           nbad++;
425           continue;
426         }
427       // add the key
428       m_rotorKeys.push_back(rotorKey);
429     }
430 
431     // print out initial conformers
432     if (m_logstream != nullptr)
433       {
434         (*m_logstream) << "Initial conformer count: " << m_rotorKeys.size() << std::endl;
435         (*m_logstream) << tries << " attempts,  " << ndup << " duplicates, " << nbad << " failed filter." << std::endl;
436         for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) {
437           for (unsigned int j = 1; j < m_rotorKeys[i].size(); ++j)
438             (*m_logstream) << m_rotorKeys[i][j] << " ";
439           (*m_logstream) << std::endl;
440         }
441       }
442 
443     // Setup some default values for dynamic niche sharing according to the molecule.
444     nb_niches = (m_rotorKeys.size()) / 10;
445     if (nb_niches < 3)
446       nb_niches = 3;
447     sigma_share = (double)nb_rotors / 3.0;
448     if (sigma_share < 1.0)
449       sigma_share = 1.0;
450     niche_radius =  (double)nb_rotors / 4.0;
451     if (niche_radius < 1.0)
452       niche_radius = 1.0;
453 
454     return true;
455   }
456 
NextGeneration()457   void OBConformerSearch::NextGeneration()
458   {
459     // create next generation population
460     OBRandom generator;
461     generator.TimeSeed();
462 
463     // generate the children
464     int numConformers = m_rotorKeys.size();
465     for (int c = 0; c < numConformers; ++c) {
466       for (int child = 0; child < m_numChildren; ++child) {
467         bool foundKey = false;
468         int tries = 0;
469         while (!foundKey) {
470           tries++;
471           if (tries > 1000)
472             foundKey = true;
473           RotorKey rotorKey = m_rotorKeys[c]; // copy parent gene
474           // perform random mutation(s)
475           OBRotorIterator ri;
476           OBRotor *rotor = m_rotorList.BeginRotor(ri);
477           for (unsigned int i = 1; i < m_rotorList.Size() + 1; ++i, rotor = m_rotorList.NextRotor(ri)) {
478             if (generator.NextInt() % m_mutability == 0)
479               rotorKey[i] = generator.NextInt() % rotor->GetResolution().size(); // permutate gene
480           }
481           // duplicates are always rejected
482           if (!IsUniqueKey(m_rotorKeys, rotorKey))
483             continue;
484           // execute the filter(s)
485           if (!IsGood(rotorKey))
486             continue;
487           // add the key
488           m_rotorKeys.push_back(rotorKey); // append child to population
489           // set foundKey to generate the next child
490           foundKey = true;
491         }
492       }
493     }
494   }
495 
496   // Helper struct to sort conformers by score
497   struct ConformerScore {
ConformerScoreOpenBabel::ConformerScore498     ConformerScore(const RotorKey &key, double _score) : rotorKey(key), score(_score) {}
499     RotorKey rotorKey;
500     double score;
501   };
502 
503   // Helper struct to compare ConformerScore objects by score
504   struct CompareConformerHighScore {
operator ()OpenBabel::CompareConformerHighScore505     bool operator()(const ConformerScore &cs1, const ConformerScore &cs2) { return cs1.score > cs2.score; }
506   };
507   struct CompareConformerLowScore {
operator ()OpenBabel::CompareConformerLowScore508     bool operator()(const ConformerScore &cs1, const ConformerScore &cs2) { return cs1.score < cs2.score; }
509   };
510 
511 
MakeSelection()512   double OBConformerSearch::MakeSelection()
513   {
514     OBRotamerList rotamers;
515     rotamers.SetBaseCoordinateSets(m_mol);
516     rotamers.Setup(m_mol, m_rotorList);
517 
518     // Add all (parent + children) rotor keys
519     for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) {
520       rotamers.AddRotamer(m_rotorKeys[i]);
521     }
522 
523     // Get conformers for the rotor keys
524     std::vector<double*> conformers;
525     rotamers.ExpandConformerList(m_mol, conformers);
526 
527     // Score each conformer
528     std::vector<ConformerScore> conformer_scores;
529     for (unsigned int i = 0; i < conformers.size(); ++i) {
530       double score = m_score->Score(m_mol, i, m_rotorKeys, conformers);
531       conformer_scores.push_back(ConformerScore(m_rotorKeys[i], score));
532     }
533 
534     // delete the conformers
535     for (unsigned int i = 0; i < conformers.size(); ++i) {
536       delete [] conformers[i];
537     }
538 
539     if (m_score->GetPreferred() == OBConformerScore::HighScore)
540       std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerHighScore());
541     else
542       std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerLowScore());
543 
544     // Rmove the worst scored conformers until we have the disired number of conformers
545     while (conformer_scores.size() > m_numConformers) {
546       conformer_scores.pop_back();
547     }
548 
549     // compute sum of all scores, this is a measure of convergence
550     double sum = 0.0, lowest, highest;
551     m_rotorKeys.clear();
552     for (unsigned int i = 0; i < conformer_scores.size(); ++i) {
553       switch (m_score->GetConvergence()) {
554       case OBConformerScore::Highest:
555         if (!i || conformer_scores[i].score > highest) {
556           highest = conformer_scores[i].score;
557         }
558         break;
559       case OBConformerScore::Lowest:
560         if (!i || conformer_scores[i].score < lowest) {
561           lowest = conformer_scores[i].score;
562         }
563         break;
564       default:
565         sum += conformer_scores[i].score;
566         break;
567       }
568       // store the best keys
569       m_rotorKeys.push_back(conformer_scores[i].rotorKey);
570     }
571 
572     switch (m_score->GetConvergence()) {
573     case OBConformerScore::Highest:
574       return highest;
575     case OBConformerScore::Lowest:
576       return lowest;
577     case OBConformerScore::Sum:
578       return sum;
579     case OBConformerScore::Average:
580     default:
581       return sum / m_rotorKeys.size();
582     }
583   }
584 
585 
586 
Search()587   void OBConformerSearch::Search()
588   {
589     int identicalGenerations = 0;
590     double last_score = 0.0, score = 0.0;
591 
592     if (m_logstream != nullptr)
593       {
594         (*m_logstream) << std::endl << "=====> Starting conformers search with a Genetic Algorithm <=====" << std::endl;
595         if (use_sharing)
596           {
597             (*m_logstream) << "Uses fitness sharing (with dynamic niche identification)" << std::endl;
598             (*m_logstream) << "Population size :" << m_rotorKeys.size() << std::endl;
599             (*m_logstream) << nb_niches << " niches searched, with a key distance radius of " << niche_radius << std::endl;
600             (*m_logstream) << "Fitness sharing parameter alpha: " << alpha_share << " \t sigma:" << sigma_share << std::endl;
601             (*m_logstream) << "Uniform crossover probability: " << p_crossover << std::endl;
602             (*m_logstream) << "Mutation probability: " << (1.0 / (double) m_mutability) << std::endl;
603             (*m_logstream) << "Niche mating probability: " << niche_mating << std::endl;
604             if (local_opt_rate)
605               {
606                 (*m_logstream) << "Trying to improve best indivual with local search every ";
607                 (*m_logstream) << local_opt_rate<< "generations" << std::endl;
608               }
609           }
610         else
611           {
612             (*m_logstream) << "Perform elitist generation replacement with mutation only" << std::endl;
613             (*m_logstream) << "Mutation probability: " << (1.0 / (double) m_mutability) << std::endl;
614           }
615         (*m_logstream) << "Will stop after " << m_convergence << " generations without improvement." << std::endl << std::endl;
616       }
617     if (use_sharing)
618       score_population ();
619 
620     for (int i = 0; i < 1000; i++) {
621       // keep copy of rotor keys if next generation is less fit
622       RotorKeys rotorKeys = m_rotorKeys;
623 
624       if (use_sharing)
625         {
626           if (local_opt_rate && (i % local_opt_rate) == 0)
627             local_opt ();		// Try to locally improve the best from times to times
628           share_fitness ();
629           score = sharing_generation ();
630         }
631       else
632         {
633           // create the children
634           NextGeneration();
635           // make the selection
636           score = MakeSelection();
637         }
638       if (OB_ISNAN(score)) {
639           (*m_logstream) << "The current score is not a number, will not continue." << std::endl << std::endl;
640           return;
641       }
642       if (i == 0)
643         last_score = score;
644 
645       if (IsNear(last_score, score)) {
646         identicalGenerations++;
647         last_score = score;
648       } else {
649         if (m_score->GetPreferred() == OBConformerScore::HighScore)
650           {
651             // Maximize score
652             if (score < last_score) {
653               if (!use_sharing)
654                 m_rotorKeys = rotorKeys;
655               identicalGenerations++;
656             } else {
657               last_score = score;
658               identicalGenerations = 0;
659             }
660           }
661         else
662           {
663             // Minimize score
664             if (score > last_score) {
665               if (!use_sharing)
666                 m_rotorKeys = rotorKeys;
667               identicalGenerations++;
668             } else {
669               last_score = score;
670               identicalGenerations = 0;
671             }
672           }
673       }
674       if (m_logstream != nullptr)
675         {
676           if (vscores.size ())
677             (*m_logstream) << "Generation #" << i + 1 << "  " << last_score << "\t best " << vscores[0] << std::endl;
678           else
679             (*m_logstream) << "Generation #" << i + 1 << "  " << last_score << std::endl;
680         }
681       if (identicalGenerations > m_convergence)
682         break;
683     }
684 
685     if (m_logstream != nullptr)
686       {
687         for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) {
688           for (unsigned int j = 1; j < m_rotorKeys[i].size(); ++j)
689             (*m_logstream) << m_rotorKeys[i][j] << " ";
690           (*m_logstream) << std::endl;
691         }
692       }
693   }
694 
GetConformers(OBMol & mol)695   void OBConformerSearch::GetConformers(OBMol &mol)
696   {
697     OBRotamerList rotamers;
698     rotamers.SetBaseCoordinateSets(mol);
699     rotamers.Setup(mol, m_rotorList);
700 
701     std::cout << "GetConformers:" << std::endl;
702     // Add all (parent + children) unique rotor keys
703     for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) {
704       rotamers.AddRotamer(m_rotorKeys[i]);
705 
706       for (unsigned int j = 1; j < m_rotorKeys[i].size(); ++j)
707         std::cout << m_rotorKeys[i][j] << " ";
708       std::cout << std::endl;
709     }
710 
711     // Get conformers for the rotor keys
712     std::vector<double*> conformers;
713     rotamers.ExpandConformerList(mol, conformers);
714     if (conformers.size())
715       mol.SetConformers(conformers);
716   }
717 
IsUniqueKey(const RotorKeys & keys,const RotorKey & key) const718   bool OBConformerSearch::IsUniqueKey(const RotorKeys &keys, const RotorKey &key) const
719   {
720     for (unsigned int i = 0; i < keys.size(); ++i)
721       if (keys[i] == key)
722         return false;
723     return true;
724   }
725 
IsGood(const RotorKey & key)726   bool OBConformerSearch::IsGood(const RotorKey &key)
727   {
728     // Setup OBRotamerList
729     OBRotamerList rotamers;
730     rotamers.SetBaseCoordinateSets(m_mol);
731     rotamers.Setup(m_mol, m_rotorList);
732     rotamers.AddRotamer(key);
733 
734     // Get conformer for the rotor keys
735     std::vector<double*> conformers;
736     rotamers.ExpandConformerList(m_mol, conformers);
737     double *conformer = conformers[0];
738 
739     // Execute filter(s)
740     bool result = m_filter->IsGood(m_mol, key, conformer);
741 
742     delete [] conformer;
743 
744     return result;
745   }
746 
747 
748   /*! @brief   Genetic similarity measure, i.e. "distance" between two rotor keys.
749 
750     @param key1: compute distance from
751     @param key2: compute distance to
752 
753     @return: number of different alleles, i.e. number of different values in the key vectors
754   */
755   int
key_distance(const RotorKey & key1,const RotorKey & key2)756   OBConformerSearch::key_distance (const RotorKey &key1, const RotorKey &key2)
757   {
758     int dist = 0;
759     //    assert(key1.size() > 1 && key1.size()== key2.size());
760     std::vector<int>::const_iterator it1 = key1.begin ();
761     std::vector<int>::const_iterator it2 = key2.begin ();
762     // Skip first values, since  meaningfull valaues are starting at index 1 (Fortran translation inside ;-))
763     for (++it1, ++it2; it1 != key1.end ();++it1, ++it2)
764       if (*it1 != *it2)
765         dist++;
766     return dist;
767   }
768 
769   /*! @brief Make a local search on the best individual.
770     Randomly change each rotor key in the key vector.
771     This is not a complete coverage of all "neighbors" vectors,
772     since not all possible key values are tested.
773   */
774   int
local_opt()775   OBConformerSearch::local_opt ()
776   {
777     bool max_flag = (m_score->GetPreferred() == OBConformerScore::HighScore);
778     bool flag_improved = false;
779     std::vector<double> backup_scores = vscores;
780     RotorKey best = m_rotorKeys[0], neighbor, opt_key;
781     RotorKeys backup_population = m_rotorKeys;
782     double opt_score = 0.0;
783     int i = 0, new_val = 0;
784     OBRotorIterator ri;
785     OBRotor *rotor = nullptr;
786 
787     // Change 1 and only value, looping on all positions
788     m_rotorKeys.clear ();
789     rotor = m_rotorList.BeginRotor(ri);
790     for (i = 1; i <= m_rotorList.Size(); ++i, rotor = m_rotorList.NextRotor(ri))
791       {
792         neighbor = best;
793         new_val = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size();
794         while (new_val == best[i])
795           new_val = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size();
796         neighbor[i] = new_val;
797         if (IsUniqueKey(backup_population, neighbor) && IsGood(neighbor))
798           m_rotorKeys.push_back (neighbor);
799       }
800     score_population ();
801     opt_score = vscores[0];
802     if ( (max_flag && (opt_score > backup_scores[0]))  || (!max_flag && (opt_score < backup_scores[0])) )
803       {				// Found a better conformer
804         opt_key = m_rotorKeys[0];
805         flag_improved = true;
806         if (m_logstream != nullptr)
807           {
808             (*m_logstream) << "    => Best individual improved with local search: ";
809             (*m_logstream) << backup_scores[0] << "  --> " << opt_score << std::endl;
810           }
811       }
812     // Set back population and score vector
813     m_rotorKeys.clear ();
814     m_rotorKeys = backup_population;
815     vscores.clear ();
816     vscores = backup_scores;
817     if (flag_improved)
818       { // Replace best one
819         m_rotorKeys[0] = opt_key;
820         vscores[0] = opt_score;
821       }
822 
823     return (int) flag_improved;
824   }
825 
826   /*! @brief  Produces one or two offsprings
827 
828     @param key1: reference to the first offspring key
829     @param key2: reference to the second offspring key
830 
831     @return: 0 if no valid offspring, 1 if first only, 2 if second only, and 3 if 2 are valid offsprings.
832   */
833   int
reproduce(RotorKey & key1,RotorKey & key2)834   OBConformerSearch::reproduce (RotorKey &key1, RotorKey &key2)
835   {
836     unsigned int i = 0, iniche = 0, j = 0, pop_size = vscores.size ();
837     unsigned int rnd1 = 0, rnd2 = 0, parent1 = 0, parent2 = 0, nsize = 0;
838     int ret_code = 0;
839     bool flag_crossover = false;
840     OBRotorIterator ri;
841     OBRotor *rotor = nullptr;
842 
843     if (pop_size < 2)
844       return 0;
845 
846     // Make a 2-tournament selection to choose first parent
847     i = ((OBRandom*)d)->NextInt() % pop_size;
848     j = ((OBRandom*)d)->NextInt() % pop_size;
849     parent1 = vshared_fitnes[i] > vshared_fitnes[j] ? i : j;
850     iniche = niche_map[parent1];
851     if (iniche > -1)
852       nsize = dynamic_niches[iniche].size (); // Belongs to a specific niche
853 
854     // Do we apply crossover here?
855     flag_crossover = (((OBRandom*)d)->NextFloat () <= p_crossover);
856     if (flag_crossover && (((OBRandom*)d)->NextFloat () <= niche_mating)  &&  nsize > 1)
857       {
858         // Apply niche mating: draw second parent in the same niche, if its has
859         // at least 2 members. Make a 2-tournament selection whithin this niche
860         rnd1 = ((OBRandom*)d)->NextInt() % nsize;
861         i =  dynamic_niches[iniche][rnd1];
862         rnd2 = ((OBRandom*)d)->NextInt() % nsize;
863         j = dynamic_niches[iniche][rnd2];
864         parent2 = vshared_fitnes[i] > vshared_fitnes[j] ? i : j;
865       }
866     else
867       {
868         // Draw second in the whole population
869         i = ((OBRandom*)d)->NextInt() % pop_size;
870         j = ((OBRandom*)d)->NextInt() % pop_size;
871         parent2 = vshared_fitnes[i] > vshared_fitnes[j] ? i : j;
872       }
873 
874     if (flag_crossover)
875       {
876         // Cross the 2 vectors: toss a coin for each position (i.e. uniform crossover)
877         for (i = 1; i < key1.size(); i++)
878           {
879             if (((OBRandom*)d)->NextInt() % 2)
880               { // Copy parent1 to offspring 1
881                 key1[i] = m_rotorKeys[parent1][i];
882                 key2[i] = m_rotorKeys[parent2][i];
883               }
884             else
885               { // invert
886                 key2[i] = m_rotorKeys[parent1][i];
887                 key1[i] = m_rotorKeys[parent2][i];
888               }
889           }
890       }
891     else
892       {
893         key1 =  m_rotorKeys[parent1];
894         key2 =  m_rotorKeys[parent2];
895       }
896 
897     // perform random mutation(s)
898     rotor = m_rotorList.BeginRotor(ri);
899     for (i = 1; i <= m_rotorList.Size(); ++i, rotor = m_rotorList.NextRotor(ri))
900       {
901         if (((OBRandom*)d)->NextInt() % m_mutability == 0)
902           key1[i] = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size();
903         if (((OBRandom*)d)->NextInt() % m_mutability == 0)
904           key2[i] = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size();
905       }
906     if (IsUniqueKey(m_rotorKeys, key1) && IsGood(key1))
907       ret_code += 1;
908     if (IsUniqueKey(m_rotorKeys, key2) && IsGood(key2))
909       ret_code += 2;
910     return ret_code;		// Returns number of new distinct individuals (i.e. rotor keys)
911   }
912 
913   /*! @brief  Score and sort the current population
914 
915     @return: 0 when OK, 1 or more if not
916   */
917   int
score_population()918   OBConformerSearch::score_population ()
919   {
920     bool max_flag = (m_score->GetPreferred() == OBConformerScore::HighScore);
921     unsigned int i = 0, pop_size = 0;
922     double score = 0.0;
923     std::vector<double*> conformers;
924     std::vector<double>::iterator dit;
925     OBRotamerList rotamers;
926     std::vector<ConformerScore> conformer_scores;
927 
928     rotamers.SetBaseCoordinateSets(m_mol);
929     rotamers.Setup(m_mol, m_rotorList);
930 
931     // Add all (parent + children) rotor keys
932     for (i = 0; i < m_rotorKeys.size(); ++i)
933       rotamers.AddRotamer(m_rotorKeys[i]);
934 
935     // Get conformers for the rotor keys
936     rotamers.ExpandConformerList(m_mol, conformers);
937 
938     // Score each conformer
939     for (i = 0; i < conformers.size(); ++i)
940       {
941         score = m_score->Score(m_mol, i, m_rotorKeys, conformers);
942         conformer_scores.push_back(ConformerScore(m_rotorKeys[i], score));
943       }
944 
945     // delete the conformers
946     for (i = 0; i < conformers.size(); ++i)
947       delete [] conformers[i];
948 
949     // Sort conformers
950     if (max_flag)
951       std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerHighScore());
952     else
953       std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerLowScore());
954     pop_size = conformer_scores.size();
955 
956     // Save the score vector, reorder population
957     vscores.clear ();
958     m_rotorKeys.clear();
959     for (i = 0; i < pop_size; i++)
960       {
961         vscores.push_back (conformer_scores[i].score);
962         m_rotorKeys.push_back(conformer_scores[i].rotorKey);
963       }
964     return 0;
965   }
966 
967 
968   /*! @brief   Compute shared fitness values for a given population.
969     Assume the population was order prior to this call.
970 
971     @return: 0 when OK, 1 or more if not
972   */
973   int
share_fitness()974   OBConformerSearch::share_fitness ()
975   {
976     bool max_flag = (m_score->GetPreferred() == OBConformerScore::HighScore);
977     bool flag_niched = false;
978     unsigned int i = 0, iniche = 0, j = 0, k = 0, pop_size = vscores.size ();
979     unsigned int max_dist = 0, min_dist = 0, imax = 0;
980     int dist = 0;
981     double min_score = 0.0, dtmp = 0.0;
982     double sh_count = 0.0, sh_ij = 0.0;
983     std::vector<double>::iterator dit;
984     std::vector<int>::iterator nit;
985     std::vector<char> vshared;
986 
987     min_score = max_flag?  vscores[pop_size - 1] :  vscores[0];
988 
989     // Compute shared fitness from score: rescale score values so the minimum is 1.0
990     vshared_fitnes.clear ();
991     dtmp = 1.0 - min_score;
992     if (max_flag)
993       for (dit =  vscores.begin (); dit != vscores.end (); ++dit)
994         vshared_fitnes.push_back (*dit + dtmp);
995     else			// Minimization: invert score so best has 1.0, others lower values
996       for (dit =  vscores.begin (); dit != vscores.end (); ++dit)
997         vshared_fitnes.push_back (1.0 / (*dit + dtmp));
998 
999     // Use a vector to keep track of assigned indivivduals
1000     vshared.resize (pop_size);
1001     // Identify niches
1002     dynamic_niches.clear ();
1003     for (k = 0; k < pop_size; k++)
1004       {
1005         imax = -1;
1006         if ((dynamic_niches.size () < nb_niches) && (dynamic_niches.size () %2))
1007           {
1008             // Get the most distant individual to current list heads, wihtin the first 2/3 of the population
1009             // The idea is to find some explicit niches, with maximum disimilarity form existing ones,
1010             // still with some reasonable head fitness score.
1011             max_dist = 0;
1012             for (i = 0; i < ((pop_size * 2) / 3); i++)
1013               {
1014                 if (vshared[i])
1015                   continue;
1016                 min_dist = 1000000;
1017                 for(iniche = 0; iniche < dynamic_niches.size (); iniche++)
1018                   {
1019                     j = dynamic_niches[iniche][0];
1020                     dist = key_distance (m_rotorKeys[j], m_rotorKeys[i]);
1021                     if (dist < min_dist )
1022                       min_dist = dist;
1023                   }
1024                 if (min_dist > max_dist)
1025                   {
1026                     imax = i;
1027                     max_dist = min_dist;
1028                   }
1029               }
1030             i = imax;
1031           }
1032 
1033         // Get the best unassigned individual
1034         if (imax == -1)
1035           for (i = 0; i < pop_size; i++)
1036             if (!vshared[i])
1037               break;
1038 
1039         for (iniche = 0; iniche < dynamic_niches.size (); iniche++)
1040           {
1041             j = dynamic_niches[iniche][0];
1042             dist = key_distance(m_rotorKeys[j], m_rotorKeys[i]);
1043             if ((double)dist <= niche_radius)
1044               {
1045                 dynamic_niches[iniche].push_back (i);
1046                 break;
1047               }
1048           }
1049         if (iniche == dynamic_niches.size ())
1050           {  // Could not find a niche for this one
1051             if (dynamic_niches.size () < nb_niches)
1052               { // Create a new niche.
1053                 iniche = dynamic_niches.size ();
1054                 dynamic_niches.resize (iniche + 1);
1055                 dynamic_niches[iniche].push_back (i);
1056               }
1057             else
1058               { // Do it the hard way: compute sharing factor with all the population
1059                 sh_count = 0.0;
1060                 for (j = 0; j < pop_size; j++)
1061                   {
1062                     dist = key_distance (m_rotorKeys[i], m_rotorKeys[j]);
1063                     if ((double)dist < sigma_share)
1064                       {
1065                         sh_ij = 1.0 - pow (((double) dist) / ((double ) sigma_share), alpha_share);
1066                         sh_count += sh_ij;
1067                       }
1068                   }
1069                 vshared_fitnes[i] /= sh_count; // Classical shared fitness
1070               }
1071           }
1072         vshared[i] = 1;
1073       }
1074 
1075     // Build the niche map: provided an invididual index, provides the niche index it belongs to.
1076     // -1 means out of explicit niches.
1077     niche_map.clear ();
1078     niche_map.resize (pop_size, -1);
1079     for (iniche = 0; iniche < dynamic_niches.size (); iniche++)
1080       {
1081         // Divide each dynamic niche member score by the niche size: i.e. each niche member has the same
1082         // penalty, i.e. the order is unchanged whitin this niche.
1083         dtmp = 1.0 / ((double) dynamic_niches[iniche].size ());
1084         for (nit = dynamic_niches[iniche].begin (); nit != dynamic_niches[iniche].end (); ++nit)
1085           {
1086             vshared_fitnes[*nit] *= dtmp;
1087             niche_map[*nit] = iniche;
1088           }
1089       }
1090 
1091     return 0;
1092   }
1093 
1094   /*! @brief  Create a new generation with fitness sharing
1095 
1096     @return: generation score (lowest, highest, average or sum of fitness)
1097   */
1098   double
sharing_generation()1099   OBConformerSearch::sharing_generation ()
1100   {
1101     RotorKeys new_generation, offsprings;
1102     RotorKey key1, key2;
1103     unsigned int i = 0, j = 0, imax = 0, iniche = 0, nsize = 0, pop_size = 0, half_pop = 0, key_size = 0;
1104     int ret_code = 0,  ninsert = 0, iround = 0, last_ninsert;
1105     int dist = 0, dist_max = 0, dist_min = 0;
1106     double sum = 0.0, lowest = 0.0, highest = 0.0;
1107     std::vector<int>::iterator nit;
1108     std::vector<double>::iterator dit;
1109     std::vector<unsigned char> vflag;
1110     std::vector<int> out_niches; // Vector of all individuals out of the niches
1111 
1112     // key1 and key2 will be used to create new keys for the new generation
1113     key_size = m_rotorKeys[0].size();
1114     key1.resize (key_size);
1115     key2.resize (key_size);
1116 
1117     pop_size = vscores.size ();
1118     vflag.resize (pop_size);
1119     half_pop = pop_size / 2;
1120     if (pop_size % 2)
1121       half_pop++;
1122 
1123     // Build the list of individuals out of the niches
1124     for (i = 0; i < pop_size; i++)
1125       if (niche_map[i] == -1)
1126         out_niches.push_back (i);
1127 
1128     if (m_logstream != nullptr)
1129       {
1130         (*m_logstream) << "  ==> Number of niches: " << dynamic_niches.size ();
1131         (*m_logstream) << "   # out of niches :" << out_niches.size () << "\t Best :" << vscores[0] << std::endl;
1132       }
1133 
1134     // Save each niche 1set element, then 2nd until we have half of the population
1135     // If we still miss some, get from individuals out of the niches (starting from best ones)
1136     new_generation.push_back (m_rotorKeys[0]);
1137     vflag[0] = 1;
1138     ninsert = 1;
1139     iround = 0;
1140     while (ninsert < ((half_pop * 2) / 3))
1141       {
1142         last_ninsert = ninsert;
1143         for (iniche = 0; iniche < dynamic_niches.size (); iniche++)
1144           {
1145             nsize = dynamic_niches[iniche].size ();
1146             imax = nsize / 2;
1147             if (nsize % 2)
1148               imax++;
1149             if (iround < imax)
1150               {
1151                 i = dynamic_niches[iniche][iround];
1152                 if (!vflag[i])
1153                   {
1154                     new_generation.push_back (m_rotorKeys[i]);
1155                     vflag[i] = 1;
1156                     ninsert++;
1157                     if (ninsert >= half_pop)
1158                       break;
1159                   }
1160               }
1161           }
1162         if (ninsert == last_ninsert)
1163           {
1164             for (i = 0; i < out_niches.size () ; i++)
1165               {
1166                 j = out_niches[i];
1167                 if (!vflag[j])
1168                   {
1169                     new_generation.push_back (m_rotorKeys[j]);
1170                     vflag[j] = 1;
1171                     ninsert++;
1172                     break;
1173 
1174                   }
1175               }
1176           }
1177         if (ninsert == last_ninsert)
1178           break;
1179         iround++;
1180       }
1181     // Now add most distant individuals: force genetic diversity in the new population at the cost of higher energies.
1182     while (ninsert < half_pop)
1183       {
1184         dist_max = 0;
1185         imax = 0;
1186         for (i = 0; i < pop_size; i++)
1187           {
1188             if (vflag[i])
1189               continue;
1190             dist_min = 1000000;
1191             for (j = 1; j < pop_size; j++)
1192               {
1193                 // Get closest neigbhor distance for this key
1194                 if (!vflag[j])
1195                   continue;
1196                 dist = key_distance (m_rotorKeys[i], m_rotorKeys[j]);
1197                 if (dist < dist_min)
1198                   dist_min = dist;
1199               }
1200             if (dist_min > dist_max)
1201               {			// Find the most distant to its clostest neighbor
1202                 imax = i;
1203                 dist_max = dist_min;
1204               }
1205           }
1206         new_generation.push_back (m_rotorKeys[imax]);
1207         vflag[imax] = 1;
1208         ninsert++;
1209       }
1210 
1211     // Now generate offsprings
1212     offsprings.clear ();
1213     while (offsprings.size () < pop_size)
1214       {
1215         ret_code = reproduce (key1, key2);
1216         // Add the first offspring if valid... and not already in the new generation
1217         if ((ret_code % 2) && IsUniqueKey(new_generation, key1))
1218           offsprings.push_back (key1);
1219         // Add the second if valid, and enough space in the new population
1220         if ((ret_code > 1) && IsUniqueKey(new_generation, key2))
1221           offsprings.push_back (key2);
1222       }
1223 
1224     // Score the offsprings and the best in the new population
1225     m_rotorKeys.clear ();
1226     for (i = 0; i < offsprings.size(); i++)
1227       m_rotorKeys.push_back(offsprings[i]);
1228     score_population ();
1229     imax = pop_size - new_generation.size ();
1230     for (i = 0; i < imax; i++)
1231       new_generation.push_back(m_rotorKeys[i]);
1232     m_rotorKeys.clear ();
1233     for (i = 0; i < new_generation.size(); i++)
1234       m_rotorKeys.push_back(new_generation[i]);
1235     score_population ();
1236 
1237     // Convergence energy values: average of niches best element
1238     sum = 0.0;
1239     if (0) {
1240       for (iniche = 0; iniche <  dynamic_niches.size (); iniche++)
1241         {
1242           i = dynamic_niches[iniche][0];
1243           sum += vscores[i];
1244         }
1245       return sum / ((double) dynamic_niches.size ());
1246     }
1247     imax = pop_size / 2;
1248     for (i = 0; i < imax; i++)
1249       sum += vscores[i];
1250 
1251     // Convergence criterion: best half population score
1252     return sum / ((double) imax);
1253   }
1254 
1255   /**
1256    * @example obconformersearch_default.cpp
1257    */
1258 
1259 };
1260 
1261 /// @file conformersearch.cpp
1262 /// @brief Conformer searching using genetic algorithm
1263