1 //  Copyright (c) 2017, Novartis Institutes for BioMedical Research Inc.
2 //  All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 //       notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 //       copyright notice, this list of conditions and the following
12 //       disclaimer in the documentation and/or other materials provided
13 //       with the distribution.
14 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 //       nor the names of its contributors may be used to endorse or promote
16 //       products derived from this software without specific prior written
17 //       permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #define NO_IMPORT_ARRAY
32 #include <RDBoost/python.h>
33 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
34 #include <numpy/arrayobject.h>
35 #include <boost/python/list.hpp>
36 #include <boost/python/suite/indexing/map_indexing_suite.hpp>
37 #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
38 #include <string>
39 #include <cmath>
40 #include <chrono>
41 
42 #include <RDGeneral/Exceptions.h>
43 #include <GraphMol/SmilesParse/SmilesWrite.h>
44 #include <GraphMol/RDKitBase.h>
45 #include <GraphMol/RGroupDecomposition/RGroupDecomp.h>
46 #include <RDBoost/Wrap.h>
47 #include <RDBoost/python_streambuf.h>
48 
49 namespace python = boost::python;
50 using boost_adaptbx::python::streambuf;
51 
52 namespace RDKit {
53 
54 class RGroupDecompositionHelper {
55   RGroupDecomposition *decomp;
56 
57  public:
~RGroupDecompositionHelper()58   ~RGroupDecompositionHelper() { delete decomp; }
59 
RGroupDecompositionHelper(python::object cores,const RGroupDecompositionParameters & params=RGroupDecompositionParameters ())60   RGroupDecompositionHelper(python::object cores,
61                             const RGroupDecompositionParameters &params =
62                                 RGroupDecompositionParameters()) {
63     python::extract<ROMol> isROMol(cores);
64     if (isROMol.check()) {
65       decomp = new RGroupDecomposition(isROMol(), params);
66     } else {
67       MOL_SPTR_VECT coreMols;
68       python::stl_input_iterator<ROMOL_SPTR> iter(cores), end;
69       while (iter != end) {
70         if (!*iter) {
71           throw_value_error("reaction called with None reactants");
72         }
73         coreMols.push_back(*iter);
74         ++iter;
75       }
76       decomp = new RGroupDecomposition(coreMols, params);
77     }
78   }
79 
Add(const ROMol & mol)80   int Add(const ROMol &mol) {
81     NOGIL gil;
82     return decomp->add(mol);
83   }
Process()84   bool Process() {
85     NOGIL gil;
86     return decomp->process();
87   }
ProcessAndScore()88   python::tuple ProcessAndScore() {
89     NOGIL gil;
90     auto result = decomp->processAndScore();
91     return python::make_tuple(result.success, result.score);
92   }
93 
GetRGroupLabels()94   python::list GetRGroupLabels() {
95     python::list result;
96     std::vector<std::string> labels = decomp->getRGroupLabels();
97     for (auto label : labels) {
98       result.append(label);
99     }
100     return result;
101   }
GetRGroupsAsRows(bool asSmiles=false)102   python::list GetRGroupsAsRows(bool asSmiles = false) {
103     const RGroupRows &groups = decomp->getRGroupsAsRows();
104     python::list result;
105 
106     for (const auto &side_chains : groups) {
107       python::dict dict;
108       for (const auto &side_chain : side_chains) {
109         if (asSmiles) {
110           dict[side_chain.first] = MolToSmiles(*side_chain.second, true);
111         } else {
112           dict[side_chain.first] = side_chain.second;
113         }
114       }
115       result.append(dict);
116     }
117     return result;
118   }
119 
GetRGroupsAsColumn(bool asSmiles=false)120   python::dict GetRGroupsAsColumn(bool asSmiles = false) {
121     python::dict result;
122 
123     RGroupColumns groups = decomp->getRGroupsAsColumns();
124 
125     for (RGroupColumns::const_iterator it = groups.begin(); it != groups.end();
126          ++it) {
127       python::list col;
128 
129       for (const auto &cit : it->second) {
130         if (asSmiles) {
131           col.append(MolToSmiles(*cit, true));
132         } else {
133           col.append(cit);
134         }
135       }
136       result[it->first] = col;
137     }
138     return result;
139   }
140 };
141 
RGroupDecomp(python::object cores,python::object mols,bool asSmiles=false,bool asRows=true,const RGroupDecompositionParameters & options=RGroupDecompositionParameters ())142 python::object RGroupDecomp(python::object cores, python::object mols,
143                             bool asSmiles = false, bool asRows = true,
144                             const RGroupDecompositionParameters &options =
145                                 RGroupDecompositionParameters()) {
146   auto t0 = std::chrono::steady_clock::now();
147   RGroupDecompositionHelper decomp(cores, options);
148   python::list unmatched;
149 
150   python::stl_input_iterator<ROMOL_SPTR> iter(mols), end;
151   unsigned int idx = 0;
152   while (iter != end) {
153     if (!*iter) {
154       throw_value_error("reaction called with None reactants");
155     }
156     if (decomp.Add(*(*iter)) == -1) {
157       unmatched.append(idx);
158     }
159     ++iter;
160     ++idx;
161     checkForTimeout(t0, options.timeout);
162   }
163 
164   decomp.Process();
165   if (asRows) {
166     return make_tuple(decomp.GetRGroupsAsRows(asSmiles), unmatched);
167   } else {
168     return make_tuple(decomp.GetRGroupsAsColumn(asSmiles), unmatched);
169   }
170 }  // namespace RDKit
171 
172 struct rgroupdecomp_wrapper {
wrapRDKit::rgroupdecomp_wrapper173   static void wrap() {
174     // logic from https://stackoverflow.com/a/13017303
175     boost::python::type_info info =
176         boost::python::type_id<RDKit::MOL_SPTR_VECT>();
177     const boost::python::converter::registration *reg =
178         boost::python::converter::registry::query(info);
179     if (reg == nullptr || (*reg).m_to_python == nullptr) {
180       python::class_<RDKit::MOL_SPTR_VECT>("MOL_SPTR_VECT")
181           .def(python::vector_indexing_suite<RDKit::MOL_SPTR_VECT, true>());
182     }
183 
184     std::string docString = "";
185     python::enum_<RDKit::RGroupLabels>("RGroupLabels")
186         .value("IsotopeLabels", RDKit::IsotopeLabels)
187         .value("AtomMapLabels", RDKit::AtomMapLabels)
188         .value("AtomIndexLabels", RDKit::AtomIndexLabels)
189         .value("RelabelDuplicateLabels", RDKit::RelabelDuplicateLabels)
190         .value("MDLRGroupLabels", RDKit::MDLRGroupLabels)
191         .value("DummyAtomLabels", RDKit::DummyAtomLabels)
192         .value("AutoDetect", RDKit::AutoDetect)
193         .export_values();
194 
195     python::enum_<RDKit::RGroupMatching>("RGroupMatching")
196         .value("Greedy", RDKit::Greedy)
197         .value("GreedyChunks", RDKit::GreedyChunks)
198         .value("Exhaustive", RDKit::Exhaustive)
199         .value("NoSymmetrization", RDKit::NoSymmetrization)
200         .value("GA", RDKit::GA)
201         .export_values();
202 
203     python::enum_<RDKit::RGroupLabelling>("RGroupLabelling")
204         .value("AtomMap", RDKit::AtomMap)
205         .value("Isotope", RDKit::Isotope)
206         .value("MDLRGroup", RDKit::MDLRGroup)
207         .export_values();
208 
209     python::enum_<RDKit::RGroupCoreAlignment>("RGroupCoreAlignment")
210         // DEPRECATED, remove the folowing line in release 2021.03
211         .value("None", RDKit::NoAlignment)
212         .value("NoAlignment", RDKit::NoAlignment)
213         .value("MCS", RDKit::MCS)
214         .export_values();
215 
216     python::enum_<RDKit::RGroupScore>("RGroupScore")
217         .value("Match", RDKit::Match)
218         .value("FingerprintVariance", RDKit::FingerprintVariance)
219         .export_values();
220 
221     docString =
222         "RGroupDecompositionParameters controls how the RGroupDecomposition "
223         "sets labelling and matches structures\n"
224         "  OPTIONS:\n"
225         "    - RGroupCoreAlignment: can be one of RGroupCoreAlignment.None_ or "
226         "RGroupCoreAlignment.MCS\n"
227         "                           If set to MCS, cores labels are mapped to "
228         "each other using their\n"
229         "                           Maximum common substructure overlap.\n"
230         "    - RGroupLabels: optionally set where the rgroup labels to use are "
231         "encoded.\n"
232         "                     RGroupLabels.IsotopeLabels - labels are stored "
233         "on isotopes\n"
234         "                     RGroupLabels.AtomMapLabels - labels are stored "
235         "on atommaps\n"
236         "                     RGroupLabels.MDLRGroupLabels - labels are stored "
237         "on MDL R-groups\n"
238         "                     RGroupLabels.DummyAtomLabels - labels are stored "
239         "on dummy atoms\n"
240         "                     RGroupLabels.AtomIndexLabels - use the atom "
241         "index "
242         "as the label\n"
243         "                     RGroupLabels.RelabelDuplicateLabels - fix any "
244         "duplicate labels\n"
245         "                     RGroupLabels.AutoDetect - auto detect the label "
246         "[default]\n"
247         "       Note: in all cases, any rgroups found on unlabelled atoms will "
248         "be automatically\n"
249         "              labelled.\n"
250         "    - RGroupLabelling: choose where the rlabels are stored on the "
251         "decomposition\n"
252         "                        RGroupLabels.AtomMap - store rgroups as atom "
253         "maps (for smiles)\n"
254         "                        RGroupLabels.Isotope - stroe rgroups on the "
255         "isotope\n"
256         "                        RGroupLabels.MDLRGroup - store rgroups as mdl "
257         "rgroups (for molblocks)\n"
258         "                       default: AtomMap | MDLRGroup\n"
259         "    - onlyMatchAtRGroups: only allow rgroup decomposition at the "
260         "specified rgroups\n"
261         "    - removeAllHydrogenRGroups: remove all user-defined rgroups that "
262         "only have hydrogens\n"
263         "    - removeAllHydrogenRGroupsAndLabels: remove all user-defined "
264         "rgroups that only have hydrogens, and also remove the corresponding "
265         "labels from the core\n"
266         "    - removeHydrogensPostMatch: remove all hydrogens from the output "
267         "molecules\n"
268         "    - allowNonTerminalRGroups: allow labelled Rgroups of degree 2 or "
269         "more\n";
270     python::class_<RDKit::RGroupDecompositionParameters>(
271         "RGroupDecompositionParameters", docString.c_str(),
272         python::init<>("Constructor, takes no arguments"))
273 
274         .def_readwrite("labels", &RDKit::RGroupDecompositionParameters::labels)
275         .def_readwrite("matchingStrategy",
276                        &RDKit::RGroupDecompositionParameters::matchingStrategy)
277         .def_readwrite("scoreMethod",
278                        &RDKit::RGroupDecompositionParameters::scoreMethod)
279         .def_readwrite("rgroupLabelling",
280                        &RDKit::RGroupDecompositionParameters::rgroupLabelling)
281         .def_readwrite("alignment",
282                        &RDKit::RGroupDecompositionParameters::alignment)
283         .def_readwrite("chunkSize",
284                        &RDKit::RGroupDecompositionParameters::chunkSize)
285         .def_readwrite(
286             "onlyMatchAtRGroups",
287             &RDKit::RGroupDecompositionParameters::onlyMatchAtRGroups)
288         .def_readwrite(
289             "removeAllHydrogenRGroups",
290             &RDKit::RGroupDecompositionParameters::removeAllHydrogenRGroups)
291         .def_readwrite(
292             "removeHydrogensPostMatch",
293             &RDKit::RGroupDecompositionParameters::removeHydrogensPostMatch)
294         .def_readwrite("timeout",
295                        &RDKit::RGroupDecompositionParameters::timeout)
296         .def_readwrite("gaPopulationSize",
297                        &RDKit::RGroupDecompositionParameters::gaPopulationSize)
298         .def_readwrite(
299             "gaMaximumOperations",
300             &RDKit::RGroupDecompositionParameters::gaMaximumOperations)
301         .def_readwrite("gaNumberOperationsWithoutImprovement",
302                        &RDKit::RGroupDecompositionParameters::
303                            gaNumberOperationsWithoutImprovement)
304         .def_readwrite("gaRandomSeed",
305                        &RDKit::RGroupDecompositionParameters::gaRandomSeed)
306         .def_readwrite("gaNumberRuns",
307                        &RDKit::RGroupDecompositionParameters::gaNumberRuns)
308         .def_readwrite("gaParallelRuns",
309                        &RDKit::RGroupDecompositionParameters::gaParallelRuns)
310         .def_readwrite(
311             "allowNonTerminalRGroups",
312             &RDKit::RGroupDecompositionParameters::allowNonTerminalRGroups)
313         .def_readwrite("removeAllHydrogenRGroupsAndLabels",
314                        &RDKit::RGroupDecompositionParameters::
315                            removeAllHydrogenRGroupsAndLabels);
316 
317     python::class_<RDKit::RGroupDecompositionHelper, boost::noncopyable>(
318         "RGroupDecomposition", docString.c_str(),
319         python::init<python::object>(
320             "Construct from a molecule or sequence of molecules"))
321         .def(
322             python::init<python::object, const RGroupDecompositionParameters &>(
323                 "Construct from a molecule or sequence of molecules and a "
324                 "parameters object"))
325         .def("Add", &RGroupDecompositionHelper::Add)
326         .def("Process", &RGroupDecompositionHelper::Process,
327              "Process the rgroups (must be done prior to "
328              "GetRGroupsAsRows/Columns and GetRGroupLabels)")
329         .def("ProcessAndScore", &RGroupDecompositionHelper::ProcessAndScore,
330              "Process the rgroups and returns the score (must be done prior to "
331              "GetRGroupsAsRows/Columns and GetRGroupLabels)")
332         .def("GetRGroupLabels", &RGroupDecompositionHelper::GetRGroupLabels,
333              "Return the current list of found rgroups.\n"
334              "Note, Process() should be called first")
335         .def("GetRGroupsAsRows", &RGroupDecompositionHelper::GetRGroupsAsRows,
336              python::arg("asSmiles") = false,
337              "Return the rgroups as rows (note: can be fed directrly into a "
338              "pandas datatable)\n"
339              "  ARGUMENTS:\n"
340              "   - asSmiles: if True return smiles strings, otherwise return "
341              "molecules [default: False]\n"
342              "    Row structure:\n"
343              "       rows[idx] = {rgroup_label: molecule_or_smiles}\n")
344         .def("GetRGroupsAsColumns",
345              &RGroupDecompositionHelper::GetRGroupsAsColumn,
346              python::arg("asSmiles") = false,
347              "Return the rgroups as columns (note: can be fed directrly into a "
348              "pandas datatable)\n"
349              "  ARGUMENTS:\n"
350              "   - asSmiles: if True return smiles strings, otherwise return "
351              "molecules [default: False]\n"
352              "    Column structure:\n"
353              "       columns[rgroup_label] = [ mols_or_smiles ]\n");
354 
355     docString =
356         "Decompose a collecion of molecules into their Rgroups\n"
357         "  ARGUMENTS:\n"
358         "    - cores: a set of cores from most to least specific.\n"
359         "             See RGroupDecompositionParameters for more details\n"
360         "             on how the cores can be labelled\n"
361         "    - mols: the molecules to be decomposed\n"
362         "    - asSmiles: if True return smiles strings, otherwise return "
363         "molecules [default: False]\n"
364         "    - asRows: return the results as rows (default) otherwise return "
365         "columns\n"
366         "\n"
367         "  RETURNS: row_or_column_results, unmatched\n"
368         "\n"
369         "    Row structure:\n"
370         "       rows[idx] = {rgroup_label: molecule_or_smiles}\n"
371         "    Column structure:\n"
372         "       columns[rgroup_label] = [ mols_or_smiles ]\n"
373         "\n"
374         "    unmatched is a vector of indices in the input mols that were not "
375         "matched.\n";
376     python::def("RGroupDecompose", RDKit::RGroupDecomp,
377                 (python::arg("cores"), python::arg("mols"),
378                  python::arg("asSmiles") = false, python::arg("asRows") = true,
379                  python::arg("options") = RGroupDecompositionParameters()),
380                 docString.c_str());
381   };
382 };
383 }  // namespace RDKit
384 
BOOST_PYTHON_MODULE(rdRGroupDecomposition)385 BOOST_PYTHON_MODULE(rdRGroupDecomposition) {
386   python::scope().attr("__doc__") =
387       "Module containing RGroupDecomposition classes and functions.";
388   RDKit::rgroupdecomp_wrapper::wrap();
389 }
390