1 // 2 // Copyright (C) 2017 Novartis Institutes for BioMedical Research 3 // 4 // @@ All Rights Reserved @@ 5 // This file is part of the RDKit. 6 // The contents are covered by the terms of the BSD license 7 // which is included in the file license.txt, found at the root 8 // of the RDKit source tree. 9 // 10 #include <RDGeneral/export.h> 11 #ifndef RDKIT_RGROUPDECOMP_H 12 #define RDKIT_RGROUPDECOMP_H 13 14 #include "../RDKitBase.h" 15 #include <GraphMol/Substruct/SubstructMatch.h> 16 #include <chrono> 17 18 namespace RDKit { 19 20 //! Compute the isomorphic degenerative points in the 21 //! molecule. These are points that are symmetrically 22 //! equivalent. 23 /*! 24 \param mol Molecule to compute the degenerative points 25 26 \return the set of degenerative points (set<unsigned int>) 27 */ 28 29 typedef enum { 30 IsotopeLabels = 0x01, 31 AtomMapLabels = 0x02, 32 AtomIndexLabels = 0x04, 33 RelabelDuplicateLabels = 0x08, 34 MDLRGroupLabels = 0x10, 35 DummyAtomLabels = 0x20, // These are rgroups but will get relabelled 36 AutoDetect = 0xFF, 37 } RGroupLabels; 38 39 typedef enum { 40 Greedy = 0x01, 41 GreedyChunks = 0x02, 42 Exhaustive = 0x04, // not really useful for large sets 43 NoSymmetrization = 0x08, 44 GA = 0x10, 45 } RGroupMatching; 46 47 typedef enum { 48 AtomMap = 0x01, 49 Isotope = 0x02, 50 MDLRGroup = 0x04, 51 } RGroupLabelling; 52 53 typedef enum { 54 // DEPRECATED, remove the following line in release 2021.03 55 None = 0x0, 56 NoAlignment = 0x0, 57 MCS = 0x01, 58 } RGroupCoreAlignment; 59 60 typedef enum { 61 Match = 0x1, 62 FingerprintVariance = 0x4, 63 } RGroupScore; 64 65 struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionProcessResult { 66 const bool success; 67 const double score; RGroupDecompositionProcessResultRGroupDecompositionProcessResult68 RGroupDecompositionProcessResult(const bool success, const double score) 69 : success(success), score(score) {} 70 }; 71 72 struct RGroupMatch; 73 74 struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters { 75 unsigned int labels = AutoDetect; 76 unsigned int matchingStrategy = GreedyChunks; 77 unsigned int scoreMethod = Match; 78 unsigned int rgroupLabelling = AtomMap | MDLRGroup; 79 unsigned int alignment = MCS; 80 81 unsigned int chunkSize = 5; 82 //! only allow rgroup decomposition at the specified rgroups 83 bool onlyMatchAtRGroups = false; 84 //! remove all user-defined rgroups that only have hydrogens 85 bool removeAllHydrogenRGroups = true; 86 //! remove all user-defined rgroups that only have hydrogens, 87 //! and also remove the corresponding labels from the core 88 bool removeAllHydrogenRGroupsAndLabels = true; 89 //! remove all hydrogens from the output molecules 90 bool removeHydrogensPostMatch = true; 91 //! allow labelled Rgroups of degree 2 or more 92 bool allowNonTerminalRGroups = false; 93 94 double timeout = -1.0; ///< timeout in seconds. <=0 indicates no timeout 95 96 // Determine how to assign the rgroup labels from the given core 97 unsigned int autoGetLabels(const RWMol &); 98 99 // Prepare the core for substructure searching and rgroup assignment 100 bool prepareCore(RWMol &, const RWMol *alignCore); 101 102 // Parameters specific to GA 103 104 // GA population size or -1 to use best guess 105 int gaPopulationSize = -1; 106 // GA maximum number of operations or -1 to use best guess 107 int gaMaximumOperations = -1; 108 // GA number of operations permitted without improvement before exiting (-1 109 // for best guess) 110 int gaNumberOperationsWithoutImprovement = -1; 111 // GA random number seed (-1 for default, -2 for random seed) 112 int gaRandomSeed = -1; 113 // Number of runs 114 int gaNumberRuns = 1; 115 // Sequential or parallel runs? 116 #ifdef RDK_THREADSAFE_SSS 117 bool gaParallelRuns = true; 118 #else 119 bool gaParallelRuns = false; 120 #endif 121 // Controls the way substructure matching with the core is done 122 SubstructMatchParameters substructmatchParams; 123 RGroupDecompositionParametersRGroupDecompositionParameters124 RGroupDecompositionParameters() { substructmatchParams.useChirality = true; } 125 126 private: 127 int indexOffset{-1}; 128 void checkNonTerminal(const Atom &atom) const; 129 }; 130 131 typedef std::map<std::string, ROMOL_SPTR> RGroupRow; 132 typedef std::vector<ROMOL_SPTR> RGroupColumn; 133 134 typedef std::vector<RGroupRow> RGroupRows; 135 typedef std::map<std::string, RGroupColumn> RGroupColumns; 136 137 class UsedLabelMap { 138 public: UsedLabelMap(const std::map<int,int> & mapping)139 UsedLabelMap(const std::map<int, int> &mapping) { 140 for (const auto &rl : mapping) { 141 d_map[rl.second] = std::make_pair(false, (rl.first > 0)); 142 } 143 } getIsUsed(int label)144 bool getIsUsed(int label) const { return d_map.at(label).first; } setIsUsed(int label)145 void setIsUsed(int label) { d_map[label].first = true; } isUserDefined(int label)146 bool isUserDefined(int label) const { return d_map.at(label).second; } 147 148 private: 149 std::map<int, std::pair<bool, bool>> d_map; 150 }; 151 152 struct RGroupDecompData; 153 class RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecomposition { 154 private: 155 RGroupDecompData *data; // implementation details 156 RGroupDecomposition(const RGroupDecomposition &); // no copy construct 157 RGroupDecomposition &operator=( 158 const RGroupDecomposition &); // Prevent assignment 159 RWMOL_SPTR outputCoreMolecule(const RGroupMatch &match, 160 const UsedLabelMap &usedRGroupMap) const; 161 std::map<int, bool> getBlankRGroupMap() const; 162 163 public: 164 RGroupDecomposition(const ROMol &core, 165 const RGroupDecompositionParameters ¶ms = 166 RGroupDecompositionParameters()); 167 RGroupDecomposition(const std::vector<ROMOL_SPTR> &cores, 168 const RGroupDecompositionParameters ¶ms = 169 RGroupDecompositionParameters()); 170 171 ~RGroupDecomposition(); 172 173 //! Returns the index of the added molecule in the RGroupDecomposition 174 // or a negative error code 175 // :param mol: Molecule to add to the decomposition 176 // :result: index of the molecle or 177 // -1 if none of the core matches 178 // -2 if the matched molecule has no sidechains, i.e. is the 179 // same as the scaffold 180 int add(const ROMol &mol); 181 RGroupDecompositionProcessResult processAndScore(); 182 bool process(); 183 184 const RGroupDecompositionParameters ¶ms() const; 185 //! return the current group labels 186 std::vector<std::string> getRGroupLabels() const; 187 188 //! return rgroups in row order group[row][attachment_point] = ROMol 189 RGroupRows getRGroupsAsRows() const; 190 //! return rgroups in column order group[attachment_point][row] = ROMol 191 RGroupColumns getRGroupsAsColumns() const; 192 }; 193 194 RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose( 195 const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols, 196 RGroupRows &rows, std::vector<unsigned int> *unmatched = nullptr, 197 const RGroupDecompositionParameters &options = 198 RGroupDecompositionParameters()); 199 200 RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose( 201 const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols, 202 RGroupColumns &columns, std::vector<unsigned int> *unmatched = nullptr, 203 const RGroupDecompositionParameters &options = 204 RGroupDecompositionParameters()); 205 206 inline bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, 207 double timeout, bool throwOnTimeout = true) { 208 if (timeout <= 0) return false; 209 auto t1 = std::chrono::steady_clock::now(); 210 std::chrono::duration<double> elapsed = t1 - t0; 211 if (elapsed.count() >= timeout) { 212 if (throwOnTimeout) { 213 throw std::runtime_error("operation timed out"); 214 } 215 return true; 216 } 217 return false; 218 } 219 220 } // namespace RDKit 221 222 #endif 223