1 //
2 //  Copyright (C) 2017 Novartis Institutes for BioMedical Research
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RDKIT_RGROUPDECOMP_H
12 #define RDKIT_RGROUPDECOMP_H
13 
14 #include "../RDKitBase.h"
15 #include <GraphMol/Substruct/SubstructMatch.h>
16 #include <chrono>
17 
18 namespace RDKit {
19 
20 //! Compute the isomorphic degenerative points in the
21 //!  molecule.  These are points that are symmetrically
22 //!  equivalent.
23 /*!
24    \param mol     Molecule to compute the degenerative points
25 
26    \return the set of degenerative points (set<unsigned int>)
27 */
28 
29 typedef enum {
30   IsotopeLabels = 0x01,
31   AtomMapLabels = 0x02,
32   AtomIndexLabels = 0x04,
33   RelabelDuplicateLabels = 0x08,
34   MDLRGroupLabels = 0x10,
35   DummyAtomLabels = 0x20,  // These are rgroups but will get relabelled
36   AutoDetect = 0xFF,
37 } RGroupLabels;
38 
39 typedef enum {
40   Greedy = 0x01,
41   GreedyChunks = 0x02,
42   Exhaustive = 0x04,  // not really useful for large sets
43   NoSymmetrization = 0x08,
44   GA = 0x10,
45 } RGroupMatching;
46 
47 typedef enum {
48   AtomMap = 0x01,
49   Isotope = 0x02,
50   MDLRGroup = 0x04,
51 } RGroupLabelling;
52 
53 typedef enum {
54   // DEPRECATED, remove the following line in release 2021.03
55   None = 0x0,
56   NoAlignment = 0x0,
57   MCS = 0x01,
58 } RGroupCoreAlignment;
59 
60 typedef enum {
61   Match = 0x1,
62   FingerprintVariance = 0x4,
63 } RGroupScore;
64 
65 struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionProcessResult {
66   const bool success;
67   const double score;
RGroupDecompositionProcessResultRGroupDecompositionProcessResult68   RGroupDecompositionProcessResult(const bool success, const double score)
69       : success(success), score(score) {}
70 };
71 
72 struct RGroupMatch;
73 
74 struct RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecompositionParameters {
75   unsigned int labels = AutoDetect;
76   unsigned int matchingStrategy = GreedyChunks;
77   unsigned int scoreMethod = Match;
78   unsigned int rgroupLabelling = AtomMap | MDLRGroup;
79   unsigned int alignment = MCS;
80 
81   unsigned int chunkSize = 5;
82   //! only allow rgroup decomposition at the specified rgroups
83   bool onlyMatchAtRGroups = false;
84   //! remove all user-defined rgroups that only have hydrogens
85   bool removeAllHydrogenRGroups = true;
86   //! remove all user-defined rgroups that only have hydrogens,
87   //! and also remove the corresponding labels from the core
88   bool removeAllHydrogenRGroupsAndLabels = true;
89   //! remove all hydrogens from the output molecules
90   bool removeHydrogensPostMatch = true;
91   //! allow labelled Rgroups of degree 2 or more
92   bool allowNonTerminalRGroups = false;
93 
94   double timeout = -1.0;  ///< timeout in seconds. <=0 indicates no timeout
95 
96   // Determine how to assign the rgroup labels from the given core
97   unsigned int autoGetLabels(const RWMol &);
98 
99   // Prepare the core for substructure searching and rgroup assignment
100   bool prepareCore(RWMol &, const RWMol *alignCore);
101 
102   // Parameters specific to GA
103 
104   // GA population size or -1 to use best guess
105   int gaPopulationSize = -1;
106   // GA maximum number of operations or -1 to use best guess
107   int gaMaximumOperations = -1;
108   // GA number of operations permitted without improvement before exiting (-1
109   // for best guess)
110   int gaNumberOperationsWithoutImprovement = -1;
111   // GA random number seed (-1 for default, -2 for random seed)
112   int gaRandomSeed = -1;
113   // Number of runs
114   int gaNumberRuns = 1;
115   // Sequential or parallel runs?
116 #ifdef RDK_THREADSAFE_SSS
117   bool gaParallelRuns = true;
118 #else
119   bool gaParallelRuns = false;
120 #endif
121   // Controls the way substructure matching with the core is done
122   SubstructMatchParameters substructmatchParams;
123 
RGroupDecompositionParametersRGroupDecompositionParameters124   RGroupDecompositionParameters() { substructmatchParams.useChirality = true; }
125 
126  private:
127   int indexOffset{-1};
128   void checkNonTerminal(const Atom &atom) const;
129 };
130 
131 typedef std::map<std::string, ROMOL_SPTR> RGroupRow;
132 typedef std::vector<ROMOL_SPTR> RGroupColumn;
133 
134 typedef std::vector<RGroupRow> RGroupRows;
135 typedef std::map<std::string, RGroupColumn> RGroupColumns;
136 
137 class UsedLabelMap {
138  public:
UsedLabelMap(const std::map<int,int> & mapping)139   UsedLabelMap(const std::map<int, int> &mapping) {
140     for (const auto &rl : mapping) {
141       d_map[rl.second] = std::make_pair(false, (rl.first > 0));
142     }
143   }
getIsUsed(int label)144   bool getIsUsed(int label) const { return d_map.at(label).first; }
setIsUsed(int label)145   void setIsUsed(int label) { d_map[label].first = true; }
isUserDefined(int label)146   bool isUserDefined(int label) const { return d_map.at(label).second; }
147 
148  private:
149   std::map<int, std::pair<bool, bool>> d_map;
150 };
151 
152 struct RGroupDecompData;
153 class RDKIT_RGROUPDECOMPOSITION_EXPORT RGroupDecomposition {
154  private:
155   RGroupDecompData *data;                            // implementation details
156   RGroupDecomposition(const RGroupDecomposition &);  // no copy construct
157   RGroupDecomposition &operator=(
158       const RGroupDecomposition &);  // Prevent assignment
159   RWMOL_SPTR outputCoreMolecule(const RGroupMatch &match,
160                                 const UsedLabelMap &usedRGroupMap) const;
161   std::map<int, bool> getBlankRGroupMap() const;
162 
163  public:
164   RGroupDecomposition(const ROMol &core,
165                       const RGroupDecompositionParameters &params =
166                           RGroupDecompositionParameters());
167   RGroupDecomposition(const std::vector<ROMOL_SPTR> &cores,
168                       const RGroupDecompositionParameters &params =
169                           RGroupDecompositionParameters());
170 
171   ~RGroupDecomposition();
172 
173   //! Returns the index of the added molecule in the RGroupDecomposition
174   //   or a negative error code
175   // :param mol: Molecule to add to the decomposition
176   // :result: index of the molecle or
177   //              -1 if none of the core matches
178   //              -2 if the matched molecule has no sidechains, i.e. is the
179   //                 same as the scaffold
180   int add(const ROMol &mol);
181   RGroupDecompositionProcessResult processAndScore();
182   bool process();
183 
184   const RGroupDecompositionParameters &params() const;
185   //! return the current group labels
186   std::vector<std::string> getRGroupLabels() const;
187 
188   //! return rgroups in row order group[row][attachment_point] = ROMol
189   RGroupRows getRGroupsAsRows() const;
190   //! return rgroups in column order group[attachment_point][row] = ROMol
191   RGroupColumns getRGroupsAsColumns() const;
192 };
193 
194 RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose(
195     const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
196     RGroupRows &rows, std::vector<unsigned int> *unmatched = nullptr,
197     const RGroupDecompositionParameters &options =
198         RGroupDecompositionParameters());
199 
200 RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose(
201     const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
202     RGroupColumns &columns, std::vector<unsigned int> *unmatched = nullptr,
203     const RGroupDecompositionParameters &options =
204         RGroupDecompositionParameters());
205 
206 inline bool checkForTimeout(const std::chrono::steady_clock::time_point &t0,
207                             double timeout, bool throwOnTimeout = true) {
208   if (timeout <= 0) return false;
209   auto t1 = std::chrono::steady_clock::now();
210   std::chrono::duration<double> elapsed = t1 - t0;
211   if (elapsed.count() >= timeout) {
212     if (throwOnTimeout) {
213       throw std::runtime_error("operation timed out");
214     }
215     return true;
216   }
217   return false;
218 }
219 
220 }  // namespace RDKit
221 
222 #endif
223