1 /********************************************************************** 2 conformersearch.cpp - Conformer searching using genetic algorithm. 3 4 Copyright (C) 2010 Tim Vandermeersch 5 Some portions Copyright (C) 2016 Torsten Sachse 6 7 This file is part of the Open Babel project. 8 For more information, see <http://openbabel.org/> 9 10 This program is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation version 2 of the License. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 ***********************************************************************/ 19 20 #include <openbabel/conformersearch.h> 21 #include <openbabel/math/align.h> 22 #include <openbabel/forcefield.h> 23 #include <openbabel/elements.h> 24 #include <openbabel/bond.h> 25 #include "rand.h" 26 #include <algorithm> 27 28 #if defined(_MSC_VER) && (_MSC_VER < 1800) 29 #define OB_ISNAN _isnan 30 #else 31 #define OB_ISNAN std::isnan 32 #endif 33 34 namespace OpenBabel { 35 36 ////////////////////////////////////////////////////////// 37 // 38 // OBConformerFilter(s) 39 // 40 ////////////////////////////////////////////////////////// 41 OBStericConformerFilter()42 OBStericConformerFilter::OBStericConformerFilter () 43 { 44 m_cutoff = 0.8; 45 m_vdw_factor = 0.5; 46 m_check_hydrogens = true; 47 } 48 OBStericConformerFilter(double cutoff,double vdw_factor,bool check_hydrogens)49 OBStericConformerFilter::OBStericConformerFilter (double cutoff, double vdw_factor, bool check_hydrogens) 50 { 51 m_cutoff = cutoff * cutoff; 52 m_vdw_factor = vdw_factor; 53 m_check_hydrogens = check_hydrogens; 54 } 55 ~OBConformerFilter()56 OBConformerFilter::~OBConformerFilter() {} 57 IsGood(const OBMol & mol,const RotorKey & key,double * conformer)58 bool OBStericConformerFilter::IsGood(const OBMol &mol, const RotorKey &key, double *conformer) 59 { 60 unsigned int a1 = 0, a2 = 0; 61 unsigned int numAtoms = mol.NumAtoms(); 62 OBAtom *atom1 = nullptr, *atom2 = nullptr; 63 double dx = 0.0, dy = 0.0, dz = 0.0; 64 double distanceSquared = 0.0, vdwCutoff = 0.0; 65 66 for (a1 = 0; a1 < numAtoms; ++a1) { 67 for (a2 = a1 + 1; a2 < numAtoms; ++a2) { 68 atom1 = mol.GetAtom(a1+1); 69 atom2 = mol.GetAtom(a2+1); 70 // Default should be to recognize H clashes too 71 if (!m_check_hydrogens && (atom1->GetAtomicNum() == OBElements::Hydrogen || atom2->GetAtomicNum() == OBElements::Hydrogen )) 72 continue; 73 74 // skip connected atoms 75 if (atom1->IsConnected(atom2)) 76 continue; 77 // compute the distance 78 dx = conformer[a1*3 ] - conformer[a2*3 ]; 79 dy = conformer[a1*3+1] - conformer[a2*3+1]; 80 dz = conformer[a1*3+2] - conformer[a2*3+2]; 81 distanceSquared = dx*dx + dy*dy + dz*dz; 82 // As we don't check 1-3 and 1-4 bonded atoms, apply a 83 // factor of to the sum of VdW radii 84 vdwCutoff = m_vdw_factor * (OBElements::GetVdwRad(atom1->GetAtomicNum()) 85 + OBElements::GetVdwRad(atom2->GetAtomicNum())); 86 vdwCutoff *= vdwCutoff; // compare squared distances 87 //std::cout << vdwCutoff << " " << m_vdw_factor << " " << m_cutoff << " " << distanceSquared << std::endl; 88 89 // check distance 90 if (distanceSquared < m_cutoff || distanceSquared < vdwCutoff) 91 return false; 92 } 93 } 94 95 return true; 96 } 97 98 ////////////////////////////////////////////////////////// 99 // 100 // OBConformerScore(s) 101 // 102 ////////////////////////////////////////////////////////// 103 ~OBConformerScore()104 OBConformerScore::~OBConformerScore() {} 105 Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)106 double OBRMSDConformerScore::Score(OBMol &mol, unsigned int index, 107 const RotorKeys &keys, const std::vector<double*> &conformers) 108 { 109 unsigned int numAtoms = mol.NumAtoms(); 110 111 // create the reference vector3 for conformer with index 112 double *conformer_i = conformers[index]; 113 std::vector<vector3> vi; 114 for (unsigned int a = 0; a < numAtoms; ++a) 115 vi.push_back(vector3(conformer_i[a*3], conformer_i[a*3+1], conformer_i[a*3+2])); 116 117 OBAlign align(mol, mol, false, false); 118 align.SetRef(vi); 119 120 double score_min = 10e10; 121 for (unsigned int j = 0; j < conformers.size(); ++j) { 122 if (index == j) 123 continue; 124 double *conformer_j = conformers[j]; 125 // create vector3 conformer 126 std::vector<vector3> vj; 127 for (unsigned int a = 0; a < numAtoms; ++a) 128 vj.push_back(vector3(conformer_j[a*3], conformer_j[a*3+1], conformer_j[a*3+2])); 129 130 // perform Kabsch alignment 131 align.SetTarget(vj); 132 align.Align(); 133 134 // get the RMSD 135 double rmsd = align.GetRMSD(); 136 // store the rmsd if it is lower than any of the previous 137 if (rmsd < score_min) 138 score_min = rmsd; 139 } 140 141 // return the lowest RMSD 142 return score_min; 143 } 144 Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)145 double OBEnergyConformerScore::Score(OBMol &mol, unsigned int index, 146 const RotorKeys &keys, const std::vector<double*> &conformers) 147 { 148 energy_nrequest++; 149 RotorKey cur_key = keys[index]; 150 if (energy_map.size () > 0) 151 { 152 // Check that we haven't already computed this energy); 153 mapRotorEnergy::iterator it = energy_map.find (cur_key); 154 if (it != energy_map.end ()) 155 return it->second; 156 } 157 energy_ncompute++; 158 159 double *origCoords = mol.GetCoordinates(); 160 // copy the original coordinates to coords 161 // copy the conformer coordinates to OBMol object 162 std::vector<double> coords(mol.NumAtoms() * 3); 163 for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) { 164 coords[i] = origCoords[i]; 165 origCoords[i] = conformers[index][i]; 166 } 167 168 OBForceField *ff = OBForceField::FindType("MMFF94"); 169 if (!ff->Setup(mol)) { 170 ff = OBForceField::FindType("UFF"); 171 if (!ff->Setup(mol)) 172 return 10e10; 173 } 174 double score = ff->Energy(false); // no gradients 175 176 // copy original coordinates back 177 for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) 178 origCoords[i] = coords[i]; 179 180 // Save that in the map 181 if (energy_map.size () < 50000) 182 energy_map[cur_key] = score; 183 184 return score; 185 } 186 Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)187 double OBMinimizingEnergyConformerScore::Score(OBMol &mol, unsigned int index, 188 const RotorKeys &keys, const std::vector<double*> &conformers) 189 { 190 energy_nrequest++; 191 RotorKey cur_key = keys[index]; 192 if (energy_map.size () > 0) 193 { 194 // Check that we haven't already computed this energy); 195 mapRotorEnergy::iterator it = energy_map.find (cur_key); 196 if (it != energy_map.end ()) 197 return it->second; 198 } 199 energy_ncompute++; 200 201 double *origCoords = mol.GetCoordinates(); 202 // copy the original coordinates to coords 203 // copy the conformer coordinates to OBMol object 204 std::vector<double> coords(mol.NumAtoms() * 3); 205 for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) { 206 coords[i] = origCoords[i]; 207 origCoords[i] = conformers[index][i]; 208 } 209 210 OBForceField *ff = OBForceField::FindType("MMFF94"); 211 if (!ff->Setup(mol)) { 212 ff = OBForceField::FindType("UFF"); 213 if (!ff->Setup(mol)) 214 return 10e10; 215 } 216 ff->ConjugateGradients(50); 217 double score = ff->Energy(false); // no gradients 218 219 // copy original coordinates back 220 for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) 221 origCoords[i] = coords[i]; 222 223 // Save that in the map 224 if (energy_map.size () < 50000) 225 energy_map[cur_key] = score; 226 227 return score; 228 } 229 Score(OBMol & mol,unsigned int index,const RotorKeys & keys,const std::vector<double * > & conformers)230 double OBMinimizingRMSDConformerScore::Score(OBMol &mol, unsigned int index, 231 const RotorKeys &keys, const std::vector<double*> &conformers) 232 { 233 unsigned int numAtoms = mol.NumAtoms(); 234 double *origCoords = mol.GetCoordinates(); 235 // copy the original coordinates to coords 236 // copy the conformer coordinates to OBMol object 237 std::vector<double> coords(mol.NumAtoms() * 3); 238 for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) { 239 coords[i] = origCoords[i]; 240 origCoords[i] = conformers[index][i]; 241 } 242 243 OBForceField *ff = OBForceField::FindType("MMFF94"); 244 if (!ff->Setup(mol)) { 245 ff = OBForceField::FindType("UFF"); 246 if (!ff->Setup(mol)) 247 return 10e10; 248 } 249 ff->ConjugateGradients(50); 250 double score = ff->Energy(false); // no gradients 251 252 // copy original coordinates back 253 for (unsigned int i = 0; i < mol.NumAtoms() * 3; ++i) 254 origCoords[i] = coords[i]; 255 256 double *conformer_i = conformers[index]; 257 std::vector<vector3> vi; 258 for (unsigned int a = 0; a < numAtoms; ++a) 259 vi.push_back(vector3(conformer_i[a*3], conformer_i[a*3+1], conformer_i[a*3+2])); 260 261 OBAlign align(mol, mol, false, false); 262 align.SetRef(vi); 263 264 double score_min = 10e10; 265 for (unsigned int j = 0; j < conformers.size(); ++j) { 266 if (index == j) 267 continue; 268 double *conformer_j = conformers[j]; 269 // create vector3 conformer 270 std::vector<vector3> vj; 271 for (unsigned int a = 0; a < numAtoms; ++a) 272 vj.push_back(vector3(conformer_j[a*3], conformer_j[a*3+1], conformer_j[a*3+2])); 273 274 // perform Kabsch alignment 275 align.SetTarget(vj); 276 align.Align(); 277 278 // get the RMSD 279 double rmsd = align.GetRMSD(); 280 // store the rmsd if it is lower than any of the previous 281 if (rmsd < score_min) 282 score_min = rmsd; 283 } 284 285 // return the lowest RMSD 286 return score_min; 287 } 288 289 ////////////////////////////////////////////////////////// 290 // 291 // OBConformerSearch 292 // 293 ////////////////////////////////////////////////////////// 294 295 OBConformerSearch()296 OBConformerSearch::OBConformerSearch() 297 { 298 m_filter = static_cast<OBConformerFilter*>(new OBStericConformerFilter()); 299 m_score = static_cast<OBConformerScore*>(new OBRMSDConformerScore()); 300 //m_score = static_cast<OBConformerScore*>(new OBEnergyConformerScore()); 301 m_numConformers = 30; 302 m_numChildren = 5; 303 m_mutability = 10; 304 // Set some inital values here, but will fine-tune some in the molecule Setup 305 use_sharing = false; 306 alpha_share = 2.0; 307 sigma_share = 2.0; 308 nb_niches = 5; 309 niche_radius = 1.0; 310 p_crossover = 0.7; 311 niche_mating = 0.7; 312 local_opt_rate = 3; 313 // For the moment 'd' is an opaque pointer to an instance of OBRandom*. 314 // In future, it could be a pointer to a structure storing all of the 315 // private variables. 316 d = (void*)new OBRandom(); 317 ((OBRandom*)d)->TimeSeed(); 318 m_logstream = &std::cout; // Default logging send to standard output 319 // m_logstream = NULL; 320 m_printrotors = false; // By default, do not print rotors but perform the conformer search 321 322 } 323 ~OBConformerSearch()324 OBConformerSearch::~OBConformerSearch() 325 { 326 delete m_filter; 327 delete m_score; 328 delete (OBRandom*)d; 329 } 330 331 Setup(const OBMol & mol,int numConformers,int numChildren,int mutability,int convergence)332 bool OBConformerSearch::Setup(const OBMol &mol, int numConformers, int numChildren, int mutability, int convergence) 333 { 334 int nb_rotors = 0; 335 // copy some variables 336 m_mol = mol; 337 m_numConformers = numConformers; 338 m_numChildren = numChildren; 339 m_mutability = mutability; 340 m_convergence = convergence; 341 342 if (m_mol.GetCoordinates() == nullptr) 343 return false; 344 345 // Initialize the OBRotorList 346 m_rotorList.SetFixedBonds(m_fixedBonds); 347 m_rotorList.Setup(m_mol); 348 349 // Print all available rotors if so desired 350 if (m_printrotors){ 351 OBRotorIterator it; 352 OBRotorIterator end_it = m_rotorList.EndRotors(); 353 OBRotor* r = m_rotorList.BeginRotor(it); 354 int rotcount = 1; 355 std::cout << "Rotors:" << std::endl; 356 while(r){ 357 OBBond* b = r->GetBond(); 358 int at1,at2; 359 at1 = b->GetBeginAtomIdx(); 360 at2 = b->GetEndAtomIdx(); 361 std::cout << at1 << "-" << at2 << " "; 362 r = m_rotorList.NextRotor(it); 363 if (rotcount%4==0 && r){std::cout << std::endl;} 364 ++rotcount; 365 } 366 std::cout << std::endl; 367 return false; 368 } 369 // Print those that are fixed 370 if (!m_fixedBonds.IsEmpty()){ 371 std::cout << "Fixed Rotors: " << std::endl; 372 int end_it = m_fixedBonds.EndBit(); 373 int it = m_fixedBonds.FirstBit(); 374 int rotcount = 1; 375 while(it!=end_it){ 376 OBBond* b = m_mol.GetBond(it); 377 int at1,at2; 378 at1 = b->GetBeginAtomIdx(); 379 at2 = b->GetEndAtomIdx(); 380 std::cout << at1 << "-" << at2 << " "; 381 it = m_fixedBonds.NextBit(it); 382 if (rotcount%4==0 && it!=end_it){std::cout << std::endl;} 383 ++rotcount; 384 } 385 std::cout << std::endl; 386 } 387 388 nb_rotors = m_rotorList.Size(); 389 if (!nb_rotors) { // only one conformer 390 return false; 391 } 392 393 // create initial population 394 OBRandom generator; 395 generator.TimeSeed(); 396 397 RotorKey rotorKey(m_rotorList.Size() + 1, 0); // indexed from 1 398 if (IsGood(rotorKey)) 399 m_rotorKeys.push_back(rotorKey); 400 else { 401 if (m_logstream != nullptr) 402 (*m_logstream) << "Initial conformer does not pass filter!" << std::endl; 403 } 404 405 int tries = 0, ndup = 0, nbad = 0; 406 while (m_rotorKeys.size() < m_numConformers && tries < numConformers * 1000) { 407 tries++; 408 // perform random mutation(s) 409 OBRotorIterator ri; 410 OBRotor *rotor = m_rotorList.BeginRotor(ri); 411 for (unsigned int i = 1; i < m_rotorList.Size() + 1; ++i, rotor = m_rotorList.NextRotor(ri)) { 412 if (generator.NextInt() % m_mutability == 0) 413 rotorKey[i] = generator.NextInt() % rotor->GetResolution().size(); 414 } 415 // duplicates are always rejected 416 if (!IsUniqueKey(m_rotorKeys, rotorKey)) 417 { 418 ndup++; 419 continue; 420 } 421 // execute filter(s) 422 if (!IsGood(rotorKey)) 423 { 424 nbad++; 425 continue; 426 } 427 // add the key 428 m_rotorKeys.push_back(rotorKey); 429 } 430 431 // print out initial conformers 432 if (m_logstream != nullptr) 433 { 434 (*m_logstream) << "Initial conformer count: " << m_rotorKeys.size() << std::endl; 435 (*m_logstream) << tries << " attempts, " << ndup << " duplicates, " << nbad << " failed filter." << std::endl; 436 for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) { 437 for (unsigned int j = 1; j < m_rotorKeys[i].size(); ++j) 438 (*m_logstream) << m_rotorKeys[i][j] << " "; 439 (*m_logstream) << std::endl; 440 } 441 } 442 443 // Setup some default values for dynamic niche sharing according to the molecule. 444 nb_niches = (m_rotorKeys.size()) / 10; 445 if (nb_niches < 3) 446 nb_niches = 3; 447 sigma_share = (double)nb_rotors / 3.0; 448 if (sigma_share < 1.0) 449 sigma_share = 1.0; 450 niche_radius = (double)nb_rotors / 4.0; 451 if (niche_radius < 1.0) 452 niche_radius = 1.0; 453 454 return true; 455 } 456 NextGeneration()457 void OBConformerSearch::NextGeneration() 458 { 459 // create next generation population 460 OBRandom generator; 461 generator.TimeSeed(); 462 463 // generate the children 464 int numConformers = m_rotorKeys.size(); 465 for (int c = 0; c < numConformers; ++c) { 466 for (int child = 0; child < m_numChildren; ++child) { 467 bool foundKey = false; 468 int tries = 0; 469 while (!foundKey) { 470 tries++; 471 if (tries > 1000) 472 foundKey = true; 473 RotorKey rotorKey = m_rotorKeys[c]; // copy parent gene 474 // perform random mutation(s) 475 OBRotorIterator ri; 476 OBRotor *rotor = m_rotorList.BeginRotor(ri); 477 for (unsigned int i = 1; i < m_rotorList.Size() + 1; ++i, rotor = m_rotorList.NextRotor(ri)) { 478 if (generator.NextInt() % m_mutability == 0) 479 rotorKey[i] = generator.NextInt() % rotor->GetResolution().size(); // permutate gene 480 } 481 // duplicates are always rejected 482 if (!IsUniqueKey(m_rotorKeys, rotorKey)) 483 continue; 484 // execute the filter(s) 485 if (!IsGood(rotorKey)) 486 continue; 487 // add the key 488 m_rotorKeys.push_back(rotorKey); // append child to population 489 // set foundKey to generate the next child 490 foundKey = true; 491 } 492 } 493 } 494 } 495 496 // Helper struct to sort conformers by score 497 struct ConformerScore { ConformerScoreOpenBabel::ConformerScore498 ConformerScore(const RotorKey &key, double _score) : rotorKey(key), score(_score) {} 499 RotorKey rotorKey; 500 double score; 501 }; 502 503 // Helper struct to compare ConformerScore objects by score 504 struct CompareConformerHighScore { operator ()OpenBabel::CompareConformerHighScore505 bool operator()(const ConformerScore &cs1, const ConformerScore &cs2) { return cs1.score > cs2.score; } 506 }; 507 struct CompareConformerLowScore { operator ()OpenBabel::CompareConformerLowScore508 bool operator()(const ConformerScore &cs1, const ConformerScore &cs2) { return cs1.score < cs2.score; } 509 }; 510 511 MakeSelection()512 double OBConformerSearch::MakeSelection() 513 { 514 OBRotamerList rotamers; 515 rotamers.SetBaseCoordinateSets(m_mol); 516 rotamers.Setup(m_mol, m_rotorList); 517 518 // Add all (parent + children) rotor keys 519 for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) { 520 rotamers.AddRotamer(m_rotorKeys[i]); 521 } 522 523 // Get conformers for the rotor keys 524 std::vector<double*> conformers; 525 rotamers.ExpandConformerList(m_mol, conformers); 526 527 // Score each conformer 528 std::vector<ConformerScore> conformer_scores; 529 for (unsigned int i = 0; i < conformers.size(); ++i) { 530 double score = m_score->Score(m_mol, i, m_rotorKeys, conformers); 531 conformer_scores.push_back(ConformerScore(m_rotorKeys[i], score)); 532 } 533 534 // delete the conformers 535 for (unsigned int i = 0; i < conformers.size(); ++i) { 536 delete [] conformers[i]; 537 } 538 539 if (m_score->GetPreferred() == OBConformerScore::HighScore) 540 std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerHighScore()); 541 else 542 std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerLowScore()); 543 544 // Rmove the worst scored conformers until we have the disired number of conformers 545 while (conformer_scores.size() > m_numConformers) { 546 conformer_scores.pop_back(); 547 } 548 549 // compute sum of all scores, this is a measure of convergence 550 double sum = 0.0, lowest, highest; 551 m_rotorKeys.clear(); 552 for (unsigned int i = 0; i < conformer_scores.size(); ++i) { 553 switch (m_score->GetConvergence()) { 554 case OBConformerScore::Highest: 555 if (!i || conformer_scores[i].score > highest) { 556 highest = conformer_scores[i].score; 557 } 558 break; 559 case OBConformerScore::Lowest: 560 if (!i || conformer_scores[i].score < lowest) { 561 lowest = conformer_scores[i].score; 562 } 563 break; 564 default: 565 sum += conformer_scores[i].score; 566 break; 567 } 568 // store the best keys 569 m_rotorKeys.push_back(conformer_scores[i].rotorKey); 570 } 571 572 switch (m_score->GetConvergence()) { 573 case OBConformerScore::Highest: 574 return highest; 575 case OBConformerScore::Lowest: 576 return lowest; 577 case OBConformerScore::Sum: 578 return sum; 579 case OBConformerScore::Average: 580 default: 581 return sum / m_rotorKeys.size(); 582 } 583 } 584 585 586 Search()587 void OBConformerSearch::Search() 588 { 589 int identicalGenerations = 0; 590 double last_score = 0.0, score = 0.0; 591 592 if (m_logstream != nullptr) 593 { 594 (*m_logstream) << std::endl << "=====> Starting conformers search with a Genetic Algorithm <=====" << std::endl; 595 if (use_sharing) 596 { 597 (*m_logstream) << "Uses fitness sharing (with dynamic niche identification)" << std::endl; 598 (*m_logstream) << "Population size :" << m_rotorKeys.size() << std::endl; 599 (*m_logstream) << nb_niches << " niches searched, with a key distance radius of " << niche_radius << std::endl; 600 (*m_logstream) << "Fitness sharing parameter alpha: " << alpha_share << " \t sigma:" << sigma_share << std::endl; 601 (*m_logstream) << "Uniform crossover probability: " << p_crossover << std::endl; 602 (*m_logstream) << "Mutation probability: " << (1.0 / (double) m_mutability) << std::endl; 603 (*m_logstream) << "Niche mating probability: " << niche_mating << std::endl; 604 if (local_opt_rate) 605 { 606 (*m_logstream) << "Trying to improve best indivual with local search every "; 607 (*m_logstream) << local_opt_rate<< "generations" << std::endl; 608 } 609 } 610 else 611 { 612 (*m_logstream) << "Perform elitist generation replacement with mutation only" << std::endl; 613 (*m_logstream) << "Mutation probability: " << (1.0 / (double) m_mutability) << std::endl; 614 } 615 (*m_logstream) << "Will stop after " << m_convergence << " generations without improvement." << std::endl << std::endl; 616 } 617 if (use_sharing) 618 score_population (); 619 620 for (int i = 0; i < 1000; i++) { 621 // keep copy of rotor keys if next generation is less fit 622 RotorKeys rotorKeys = m_rotorKeys; 623 624 if (use_sharing) 625 { 626 if (local_opt_rate && (i % local_opt_rate) == 0) 627 local_opt (); // Try to locally improve the best from times to times 628 share_fitness (); 629 score = sharing_generation (); 630 } 631 else 632 { 633 // create the children 634 NextGeneration(); 635 // make the selection 636 score = MakeSelection(); 637 } 638 if (OB_ISNAN(score)) { 639 (*m_logstream) << "The current score is not a number, will not continue." << std::endl << std::endl; 640 return; 641 } 642 if (i == 0) 643 last_score = score; 644 645 if (IsNear(last_score, score)) { 646 identicalGenerations++; 647 last_score = score; 648 } else { 649 if (m_score->GetPreferred() == OBConformerScore::HighScore) 650 { 651 // Maximize score 652 if (score < last_score) { 653 if (!use_sharing) 654 m_rotorKeys = rotorKeys; 655 identicalGenerations++; 656 } else { 657 last_score = score; 658 identicalGenerations = 0; 659 } 660 } 661 else 662 { 663 // Minimize score 664 if (score > last_score) { 665 if (!use_sharing) 666 m_rotorKeys = rotorKeys; 667 identicalGenerations++; 668 } else { 669 last_score = score; 670 identicalGenerations = 0; 671 } 672 } 673 } 674 if (m_logstream != nullptr) 675 { 676 if (vscores.size ()) 677 (*m_logstream) << "Generation #" << i + 1 << " " << last_score << "\t best " << vscores[0] << std::endl; 678 else 679 (*m_logstream) << "Generation #" << i + 1 << " " << last_score << std::endl; 680 } 681 if (identicalGenerations > m_convergence) 682 break; 683 } 684 685 if (m_logstream != nullptr) 686 { 687 for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) { 688 for (unsigned int j = 1; j < m_rotorKeys[i].size(); ++j) 689 (*m_logstream) << m_rotorKeys[i][j] << " "; 690 (*m_logstream) << std::endl; 691 } 692 } 693 } 694 GetConformers(OBMol & mol)695 void OBConformerSearch::GetConformers(OBMol &mol) 696 { 697 OBRotamerList rotamers; 698 rotamers.SetBaseCoordinateSets(mol); 699 rotamers.Setup(mol, m_rotorList); 700 701 std::cout << "GetConformers:" << std::endl; 702 // Add all (parent + children) unique rotor keys 703 for (unsigned int i = 0; i < m_rotorKeys.size(); ++i) { 704 rotamers.AddRotamer(m_rotorKeys[i]); 705 706 for (unsigned int j = 1; j < m_rotorKeys[i].size(); ++j) 707 std::cout << m_rotorKeys[i][j] << " "; 708 std::cout << std::endl; 709 } 710 711 // Get conformers for the rotor keys 712 std::vector<double*> conformers; 713 rotamers.ExpandConformerList(mol, conformers); 714 if (conformers.size()) 715 mol.SetConformers(conformers); 716 } 717 IsUniqueKey(const RotorKeys & keys,const RotorKey & key) const718 bool OBConformerSearch::IsUniqueKey(const RotorKeys &keys, const RotorKey &key) const 719 { 720 for (unsigned int i = 0; i < keys.size(); ++i) 721 if (keys[i] == key) 722 return false; 723 return true; 724 } 725 IsGood(const RotorKey & key)726 bool OBConformerSearch::IsGood(const RotorKey &key) 727 { 728 // Setup OBRotamerList 729 OBRotamerList rotamers; 730 rotamers.SetBaseCoordinateSets(m_mol); 731 rotamers.Setup(m_mol, m_rotorList); 732 rotamers.AddRotamer(key); 733 734 // Get conformer for the rotor keys 735 std::vector<double*> conformers; 736 rotamers.ExpandConformerList(m_mol, conformers); 737 double *conformer = conformers[0]; 738 739 // Execute filter(s) 740 bool result = m_filter->IsGood(m_mol, key, conformer); 741 742 delete [] conformer; 743 744 return result; 745 } 746 747 748 /*! @brief Genetic similarity measure, i.e. "distance" between two rotor keys. 749 750 @param key1: compute distance from 751 @param key2: compute distance to 752 753 @return: number of different alleles, i.e. number of different values in the key vectors 754 */ 755 int key_distance(const RotorKey & key1,const RotorKey & key2)756 OBConformerSearch::key_distance (const RotorKey &key1, const RotorKey &key2) 757 { 758 int dist = 0; 759 // assert(key1.size() > 1 && key1.size()== key2.size()); 760 std::vector<int>::const_iterator it1 = key1.begin (); 761 std::vector<int>::const_iterator it2 = key2.begin (); 762 // Skip first values, since meaningfull valaues are starting at index 1 (Fortran translation inside ;-)) 763 for (++it1, ++it2; it1 != key1.end ();++it1, ++it2) 764 if (*it1 != *it2) 765 dist++; 766 return dist; 767 } 768 769 /*! @brief Make a local search on the best individual. 770 Randomly change each rotor key in the key vector. 771 This is not a complete coverage of all "neighbors" vectors, 772 since not all possible key values are tested. 773 */ 774 int local_opt()775 OBConformerSearch::local_opt () 776 { 777 bool max_flag = (m_score->GetPreferred() == OBConformerScore::HighScore); 778 bool flag_improved = false; 779 std::vector<double> backup_scores = vscores; 780 RotorKey best = m_rotorKeys[0], neighbor, opt_key; 781 RotorKeys backup_population = m_rotorKeys; 782 double opt_score = 0.0; 783 int i = 0, new_val = 0; 784 OBRotorIterator ri; 785 OBRotor *rotor = nullptr; 786 787 // Change 1 and only value, looping on all positions 788 m_rotorKeys.clear (); 789 rotor = m_rotorList.BeginRotor(ri); 790 for (i = 1; i <= m_rotorList.Size(); ++i, rotor = m_rotorList.NextRotor(ri)) 791 { 792 neighbor = best; 793 new_val = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size(); 794 while (new_val == best[i]) 795 new_val = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size(); 796 neighbor[i] = new_val; 797 if (IsUniqueKey(backup_population, neighbor) && IsGood(neighbor)) 798 m_rotorKeys.push_back (neighbor); 799 } 800 score_population (); 801 opt_score = vscores[0]; 802 if ( (max_flag && (opt_score > backup_scores[0])) || (!max_flag && (opt_score < backup_scores[0])) ) 803 { // Found a better conformer 804 opt_key = m_rotorKeys[0]; 805 flag_improved = true; 806 if (m_logstream != nullptr) 807 { 808 (*m_logstream) << " => Best individual improved with local search: "; 809 (*m_logstream) << backup_scores[0] << " --> " << opt_score << std::endl; 810 } 811 } 812 // Set back population and score vector 813 m_rotorKeys.clear (); 814 m_rotorKeys = backup_population; 815 vscores.clear (); 816 vscores = backup_scores; 817 if (flag_improved) 818 { // Replace best one 819 m_rotorKeys[0] = opt_key; 820 vscores[0] = opt_score; 821 } 822 823 return (int) flag_improved; 824 } 825 826 /*! @brief Produces one or two offsprings 827 828 @param key1: reference to the first offspring key 829 @param key2: reference to the second offspring key 830 831 @return: 0 if no valid offspring, 1 if first only, 2 if second only, and 3 if 2 are valid offsprings. 832 */ 833 int reproduce(RotorKey & key1,RotorKey & key2)834 OBConformerSearch::reproduce (RotorKey &key1, RotorKey &key2) 835 { 836 unsigned int i = 0, iniche = 0, j = 0, pop_size = vscores.size (); 837 unsigned int rnd1 = 0, rnd2 = 0, parent1 = 0, parent2 = 0, nsize = 0; 838 int ret_code = 0; 839 bool flag_crossover = false; 840 OBRotorIterator ri; 841 OBRotor *rotor = nullptr; 842 843 if (pop_size < 2) 844 return 0; 845 846 // Make a 2-tournament selection to choose first parent 847 i = ((OBRandom*)d)->NextInt() % pop_size; 848 j = ((OBRandom*)d)->NextInt() % pop_size; 849 parent1 = vshared_fitnes[i] > vshared_fitnes[j] ? i : j; 850 iniche = niche_map[parent1]; 851 if (iniche > -1) 852 nsize = dynamic_niches[iniche].size (); // Belongs to a specific niche 853 854 // Do we apply crossover here? 855 flag_crossover = (((OBRandom*)d)->NextFloat () <= p_crossover); 856 if (flag_crossover && (((OBRandom*)d)->NextFloat () <= niche_mating) && nsize > 1) 857 { 858 // Apply niche mating: draw second parent in the same niche, if its has 859 // at least 2 members. Make a 2-tournament selection whithin this niche 860 rnd1 = ((OBRandom*)d)->NextInt() % nsize; 861 i = dynamic_niches[iniche][rnd1]; 862 rnd2 = ((OBRandom*)d)->NextInt() % nsize; 863 j = dynamic_niches[iniche][rnd2]; 864 parent2 = vshared_fitnes[i] > vshared_fitnes[j] ? i : j; 865 } 866 else 867 { 868 // Draw second in the whole population 869 i = ((OBRandom*)d)->NextInt() % pop_size; 870 j = ((OBRandom*)d)->NextInt() % pop_size; 871 parent2 = vshared_fitnes[i] > vshared_fitnes[j] ? i : j; 872 } 873 874 if (flag_crossover) 875 { 876 // Cross the 2 vectors: toss a coin for each position (i.e. uniform crossover) 877 for (i = 1; i < key1.size(); i++) 878 { 879 if (((OBRandom*)d)->NextInt() % 2) 880 { // Copy parent1 to offspring 1 881 key1[i] = m_rotorKeys[parent1][i]; 882 key2[i] = m_rotorKeys[parent2][i]; 883 } 884 else 885 { // invert 886 key2[i] = m_rotorKeys[parent1][i]; 887 key1[i] = m_rotorKeys[parent2][i]; 888 } 889 } 890 } 891 else 892 { 893 key1 = m_rotorKeys[parent1]; 894 key2 = m_rotorKeys[parent2]; 895 } 896 897 // perform random mutation(s) 898 rotor = m_rotorList.BeginRotor(ri); 899 for (i = 1; i <= m_rotorList.Size(); ++i, rotor = m_rotorList.NextRotor(ri)) 900 { 901 if (((OBRandom*)d)->NextInt() % m_mutability == 0) 902 key1[i] = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size(); 903 if (((OBRandom*)d)->NextInt() % m_mutability == 0) 904 key2[i] = ((OBRandom*)d)->NextInt() % rotor->GetResolution().size(); 905 } 906 if (IsUniqueKey(m_rotorKeys, key1) && IsGood(key1)) 907 ret_code += 1; 908 if (IsUniqueKey(m_rotorKeys, key2) && IsGood(key2)) 909 ret_code += 2; 910 return ret_code; // Returns number of new distinct individuals (i.e. rotor keys) 911 } 912 913 /*! @brief Score and sort the current population 914 915 @return: 0 when OK, 1 or more if not 916 */ 917 int score_population()918 OBConformerSearch::score_population () 919 { 920 bool max_flag = (m_score->GetPreferred() == OBConformerScore::HighScore); 921 unsigned int i = 0, pop_size = 0; 922 double score = 0.0; 923 std::vector<double*> conformers; 924 std::vector<double>::iterator dit; 925 OBRotamerList rotamers; 926 std::vector<ConformerScore> conformer_scores; 927 928 rotamers.SetBaseCoordinateSets(m_mol); 929 rotamers.Setup(m_mol, m_rotorList); 930 931 // Add all (parent + children) rotor keys 932 for (i = 0; i < m_rotorKeys.size(); ++i) 933 rotamers.AddRotamer(m_rotorKeys[i]); 934 935 // Get conformers for the rotor keys 936 rotamers.ExpandConformerList(m_mol, conformers); 937 938 // Score each conformer 939 for (i = 0; i < conformers.size(); ++i) 940 { 941 score = m_score->Score(m_mol, i, m_rotorKeys, conformers); 942 conformer_scores.push_back(ConformerScore(m_rotorKeys[i], score)); 943 } 944 945 // delete the conformers 946 for (i = 0; i < conformers.size(); ++i) 947 delete [] conformers[i]; 948 949 // Sort conformers 950 if (max_flag) 951 std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerHighScore()); 952 else 953 std::sort(conformer_scores.begin(), conformer_scores.end(), CompareConformerLowScore()); 954 pop_size = conformer_scores.size(); 955 956 // Save the score vector, reorder population 957 vscores.clear (); 958 m_rotorKeys.clear(); 959 for (i = 0; i < pop_size; i++) 960 { 961 vscores.push_back (conformer_scores[i].score); 962 m_rotorKeys.push_back(conformer_scores[i].rotorKey); 963 } 964 return 0; 965 } 966 967 968 /*! @brief Compute shared fitness values for a given population. 969 Assume the population was order prior to this call. 970 971 @return: 0 when OK, 1 or more if not 972 */ 973 int share_fitness()974 OBConformerSearch::share_fitness () 975 { 976 bool max_flag = (m_score->GetPreferred() == OBConformerScore::HighScore); 977 bool flag_niched = false; 978 unsigned int i = 0, iniche = 0, j = 0, k = 0, pop_size = vscores.size (); 979 unsigned int max_dist = 0, min_dist = 0, imax = 0; 980 int dist = 0; 981 double min_score = 0.0, dtmp = 0.0; 982 double sh_count = 0.0, sh_ij = 0.0; 983 std::vector<double>::iterator dit; 984 std::vector<int>::iterator nit; 985 std::vector<char> vshared; 986 987 min_score = max_flag? vscores[pop_size - 1] : vscores[0]; 988 989 // Compute shared fitness from score: rescale score values so the minimum is 1.0 990 vshared_fitnes.clear (); 991 dtmp = 1.0 - min_score; 992 if (max_flag) 993 for (dit = vscores.begin (); dit != vscores.end (); ++dit) 994 vshared_fitnes.push_back (*dit + dtmp); 995 else // Minimization: invert score so best has 1.0, others lower values 996 for (dit = vscores.begin (); dit != vscores.end (); ++dit) 997 vshared_fitnes.push_back (1.0 / (*dit + dtmp)); 998 999 // Use a vector to keep track of assigned indivivduals 1000 vshared.resize (pop_size); 1001 // Identify niches 1002 dynamic_niches.clear (); 1003 for (k = 0; k < pop_size; k++) 1004 { 1005 imax = -1; 1006 if ((dynamic_niches.size () < nb_niches) && (dynamic_niches.size () %2)) 1007 { 1008 // Get the most distant individual to current list heads, wihtin the first 2/3 of the population 1009 // The idea is to find some explicit niches, with maximum disimilarity form existing ones, 1010 // still with some reasonable head fitness score. 1011 max_dist = 0; 1012 for (i = 0; i < ((pop_size * 2) / 3); i++) 1013 { 1014 if (vshared[i]) 1015 continue; 1016 min_dist = 1000000; 1017 for(iniche = 0; iniche < dynamic_niches.size (); iniche++) 1018 { 1019 j = dynamic_niches[iniche][0]; 1020 dist = key_distance (m_rotorKeys[j], m_rotorKeys[i]); 1021 if (dist < min_dist ) 1022 min_dist = dist; 1023 } 1024 if (min_dist > max_dist) 1025 { 1026 imax = i; 1027 max_dist = min_dist; 1028 } 1029 } 1030 i = imax; 1031 } 1032 1033 // Get the best unassigned individual 1034 if (imax == -1) 1035 for (i = 0; i < pop_size; i++) 1036 if (!vshared[i]) 1037 break; 1038 1039 for (iniche = 0; iniche < dynamic_niches.size (); iniche++) 1040 { 1041 j = dynamic_niches[iniche][0]; 1042 dist = key_distance(m_rotorKeys[j], m_rotorKeys[i]); 1043 if ((double)dist <= niche_radius) 1044 { 1045 dynamic_niches[iniche].push_back (i); 1046 break; 1047 } 1048 } 1049 if (iniche == dynamic_niches.size ()) 1050 { // Could not find a niche for this one 1051 if (dynamic_niches.size () < nb_niches) 1052 { // Create a new niche. 1053 iniche = dynamic_niches.size (); 1054 dynamic_niches.resize (iniche + 1); 1055 dynamic_niches[iniche].push_back (i); 1056 } 1057 else 1058 { // Do it the hard way: compute sharing factor with all the population 1059 sh_count = 0.0; 1060 for (j = 0; j < pop_size; j++) 1061 { 1062 dist = key_distance (m_rotorKeys[i], m_rotorKeys[j]); 1063 if ((double)dist < sigma_share) 1064 { 1065 sh_ij = 1.0 - pow (((double) dist) / ((double ) sigma_share), alpha_share); 1066 sh_count += sh_ij; 1067 } 1068 } 1069 vshared_fitnes[i] /= sh_count; // Classical shared fitness 1070 } 1071 } 1072 vshared[i] = 1; 1073 } 1074 1075 // Build the niche map: provided an invididual index, provides the niche index it belongs to. 1076 // -1 means out of explicit niches. 1077 niche_map.clear (); 1078 niche_map.resize (pop_size, -1); 1079 for (iniche = 0; iniche < dynamic_niches.size (); iniche++) 1080 { 1081 // Divide each dynamic niche member score by the niche size: i.e. each niche member has the same 1082 // penalty, i.e. the order is unchanged whitin this niche. 1083 dtmp = 1.0 / ((double) dynamic_niches[iniche].size ()); 1084 for (nit = dynamic_niches[iniche].begin (); nit != dynamic_niches[iniche].end (); ++nit) 1085 { 1086 vshared_fitnes[*nit] *= dtmp; 1087 niche_map[*nit] = iniche; 1088 } 1089 } 1090 1091 return 0; 1092 } 1093 1094 /*! @brief Create a new generation with fitness sharing 1095 1096 @return: generation score (lowest, highest, average or sum of fitness) 1097 */ 1098 double sharing_generation()1099 OBConformerSearch::sharing_generation () 1100 { 1101 RotorKeys new_generation, offsprings; 1102 RotorKey key1, key2; 1103 unsigned int i = 0, j = 0, imax = 0, iniche = 0, nsize = 0, pop_size = 0, half_pop = 0, key_size = 0; 1104 int ret_code = 0, ninsert = 0, iround = 0, last_ninsert; 1105 int dist = 0, dist_max = 0, dist_min = 0; 1106 double sum = 0.0, lowest = 0.0, highest = 0.0; 1107 std::vector<int>::iterator nit; 1108 std::vector<double>::iterator dit; 1109 std::vector<unsigned char> vflag; 1110 std::vector<int> out_niches; // Vector of all individuals out of the niches 1111 1112 // key1 and key2 will be used to create new keys for the new generation 1113 key_size = m_rotorKeys[0].size(); 1114 key1.resize (key_size); 1115 key2.resize (key_size); 1116 1117 pop_size = vscores.size (); 1118 vflag.resize (pop_size); 1119 half_pop = pop_size / 2; 1120 if (pop_size % 2) 1121 half_pop++; 1122 1123 // Build the list of individuals out of the niches 1124 for (i = 0; i < pop_size; i++) 1125 if (niche_map[i] == -1) 1126 out_niches.push_back (i); 1127 1128 if (m_logstream != nullptr) 1129 { 1130 (*m_logstream) << " ==> Number of niches: " << dynamic_niches.size (); 1131 (*m_logstream) << " # out of niches :" << out_niches.size () << "\t Best :" << vscores[0] << std::endl; 1132 } 1133 1134 // Save each niche 1set element, then 2nd until we have half of the population 1135 // If we still miss some, get from individuals out of the niches (starting from best ones) 1136 new_generation.push_back (m_rotorKeys[0]); 1137 vflag[0] = 1; 1138 ninsert = 1; 1139 iround = 0; 1140 while (ninsert < ((half_pop * 2) / 3)) 1141 { 1142 last_ninsert = ninsert; 1143 for (iniche = 0; iniche < dynamic_niches.size (); iniche++) 1144 { 1145 nsize = dynamic_niches[iniche].size (); 1146 imax = nsize / 2; 1147 if (nsize % 2) 1148 imax++; 1149 if (iround < imax) 1150 { 1151 i = dynamic_niches[iniche][iround]; 1152 if (!vflag[i]) 1153 { 1154 new_generation.push_back (m_rotorKeys[i]); 1155 vflag[i] = 1; 1156 ninsert++; 1157 if (ninsert >= half_pop) 1158 break; 1159 } 1160 } 1161 } 1162 if (ninsert == last_ninsert) 1163 { 1164 for (i = 0; i < out_niches.size () ; i++) 1165 { 1166 j = out_niches[i]; 1167 if (!vflag[j]) 1168 { 1169 new_generation.push_back (m_rotorKeys[j]); 1170 vflag[j] = 1; 1171 ninsert++; 1172 break; 1173 1174 } 1175 } 1176 } 1177 if (ninsert == last_ninsert) 1178 break; 1179 iround++; 1180 } 1181 // Now add most distant individuals: force genetic diversity in the new population at the cost of higher energies. 1182 while (ninsert < half_pop) 1183 { 1184 dist_max = 0; 1185 imax = 0; 1186 for (i = 0; i < pop_size; i++) 1187 { 1188 if (vflag[i]) 1189 continue; 1190 dist_min = 1000000; 1191 for (j = 1; j < pop_size; j++) 1192 { 1193 // Get closest neigbhor distance for this key 1194 if (!vflag[j]) 1195 continue; 1196 dist = key_distance (m_rotorKeys[i], m_rotorKeys[j]); 1197 if (dist < dist_min) 1198 dist_min = dist; 1199 } 1200 if (dist_min > dist_max) 1201 { // Find the most distant to its clostest neighbor 1202 imax = i; 1203 dist_max = dist_min; 1204 } 1205 } 1206 new_generation.push_back (m_rotorKeys[imax]); 1207 vflag[imax] = 1; 1208 ninsert++; 1209 } 1210 1211 // Now generate offsprings 1212 offsprings.clear (); 1213 while (offsprings.size () < pop_size) 1214 { 1215 ret_code = reproduce (key1, key2); 1216 // Add the first offspring if valid... and not already in the new generation 1217 if ((ret_code % 2) && IsUniqueKey(new_generation, key1)) 1218 offsprings.push_back (key1); 1219 // Add the second if valid, and enough space in the new population 1220 if ((ret_code > 1) && IsUniqueKey(new_generation, key2)) 1221 offsprings.push_back (key2); 1222 } 1223 1224 // Score the offsprings and the best in the new population 1225 m_rotorKeys.clear (); 1226 for (i = 0; i < offsprings.size(); i++) 1227 m_rotorKeys.push_back(offsprings[i]); 1228 score_population (); 1229 imax = pop_size - new_generation.size (); 1230 for (i = 0; i < imax; i++) 1231 new_generation.push_back(m_rotorKeys[i]); 1232 m_rotorKeys.clear (); 1233 for (i = 0; i < new_generation.size(); i++) 1234 m_rotorKeys.push_back(new_generation[i]); 1235 score_population (); 1236 1237 // Convergence energy values: average of niches best element 1238 sum = 0.0; 1239 if (0) { 1240 for (iniche = 0; iniche < dynamic_niches.size (); iniche++) 1241 { 1242 i = dynamic_niches[iniche][0]; 1243 sum += vscores[i]; 1244 } 1245 return sum / ((double) dynamic_niches.size ()); 1246 } 1247 imax = pop_size / 2; 1248 for (i = 0; i < imax; i++) 1249 sum += vscores[i]; 1250 1251 // Convergence criterion: best half population score 1252 return sum / ((double) imax); 1253 } 1254 1255 /** 1256 * @example obconformersearch_default.cpp 1257 */ 1258 1259 }; 1260 1261 /// @file conformersearch.cpp 1262 /// @brief Conformer searching using genetic algorithm 1263