1 // Copyright (c) 2017, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #define NO_IMPORT_ARRAY
32 #include <RDBoost/python.h>
33 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
34 #include <numpy/arrayobject.h>
35 #include <boost/python/list.hpp>
36 #include <boost/python/suite/indexing/map_indexing_suite.hpp>
37 #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
38 #include <string>
39 #include <cmath>
40 #include <chrono>
41
42 #include <RDGeneral/Exceptions.h>
43 #include <GraphMol/SmilesParse/SmilesWrite.h>
44 #include <GraphMol/RDKitBase.h>
45 #include <GraphMol/RGroupDecomposition/RGroupDecomp.h>
46 #include <RDBoost/Wrap.h>
47 #include <RDBoost/python_streambuf.h>
48
49 namespace python = boost::python;
50 using boost_adaptbx::python::streambuf;
51
52 namespace RDKit {
53
54 class RGroupDecompositionHelper {
55 RGroupDecomposition *decomp;
56
57 public:
~RGroupDecompositionHelper()58 ~RGroupDecompositionHelper() { delete decomp; }
59
RGroupDecompositionHelper(python::object cores,const RGroupDecompositionParameters & params=RGroupDecompositionParameters ())60 RGroupDecompositionHelper(python::object cores,
61 const RGroupDecompositionParameters ¶ms =
62 RGroupDecompositionParameters()) {
63 python::extract<ROMol> isROMol(cores);
64 if (isROMol.check()) {
65 decomp = new RGroupDecomposition(isROMol(), params);
66 } else {
67 MOL_SPTR_VECT coreMols;
68 python::stl_input_iterator<ROMOL_SPTR> iter(cores), end;
69 while (iter != end) {
70 if (!*iter) {
71 throw_value_error("reaction called with None reactants");
72 }
73 coreMols.push_back(*iter);
74 ++iter;
75 }
76 decomp = new RGroupDecomposition(coreMols, params);
77 }
78 }
79
Add(const ROMol & mol)80 int Add(const ROMol &mol) {
81 NOGIL gil;
82 return decomp->add(mol);
83 }
Process()84 bool Process() {
85 NOGIL gil;
86 return decomp->process();
87 }
ProcessAndScore()88 python::tuple ProcessAndScore() {
89 NOGIL gil;
90 auto result = decomp->processAndScore();
91 return python::make_tuple(result.success, result.score);
92 }
93
GetRGroupLabels()94 python::list GetRGroupLabels() {
95 python::list result;
96 std::vector<std::string> labels = decomp->getRGroupLabels();
97 for (auto label : labels) {
98 result.append(label);
99 }
100 return result;
101 }
GetRGroupsAsRows(bool asSmiles=false)102 python::list GetRGroupsAsRows(bool asSmiles = false) {
103 const RGroupRows &groups = decomp->getRGroupsAsRows();
104 python::list result;
105
106 for (const auto &side_chains : groups) {
107 python::dict dict;
108 for (const auto &side_chain : side_chains) {
109 if (asSmiles) {
110 dict[side_chain.first] = MolToSmiles(*side_chain.second, true);
111 } else {
112 dict[side_chain.first] = side_chain.second;
113 }
114 }
115 result.append(dict);
116 }
117 return result;
118 }
119
GetRGroupsAsColumn(bool asSmiles=false)120 python::dict GetRGroupsAsColumn(bool asSmiles = false) {
121 python::dict result;
122
123 RGroupColumns groups = decomp->getRGroupsAsColumns();
124
125 for (RGroupColumns::const_iterator it = groups.begin(); it != groups.end();
126 ++it) {
127 python::list col;
128
129 for (const auto &cit : it->second) {
130 if (asSmiles) {
131 col.append(MolToSmiles(*cit, true));
132 } else {
133 col.append(cit);
134 }
135 }
136 result[it->first] = col;
137 }
138 return result;
139 }
140 };
141
RGroupDecomp(python::object cores,python::object mols,bool asSmiles=false,bool asRows=true,const RGroupDecompositionParameters & options=RGroupDecompositionParameters ())142 python::object RGroupDecomp(python::object cores, python::object mols,
143 bool asSmiles = false, bool asRows = true,
144 const RGroupDecompositionParameters &options =
145 RGroupDecompositionParameters()) {
146 auto t0 = std::chrono::steady_clock::now();
147 RGroupDecompositionHelper decomp(cores, options);
148 python::list unmatched;
149
150 python::stl_input_iterator<ROMOL_SPTR> iter(mols), end;
151 unsigned int idx = 0;
152 while (iter != end) {
153 if (!*iter) {
154 throw_value_error("reaction called with None reactants");
155 }
156 if (decomp.Add(*(*iter)) == -1) {
157 unmatched.append(idx);
158 }
159 ++iter;
160 ++idx;
161 checkForTimeout(t0, options.timeout);
162 }
163
164 decomp.Process();
165 if (asRows) {
166 return make_tuple(decomp.GetRGroupsAsRows(asSmiles), unmatched);
167 } else {
168 return make_tuple(decomp.GetRGroupsAsColumn(asSmiles), unmatched);
169 }
170 } // namespace RDKit
171
172 struct rgroupdecomp_wrapper {
wrapRDKit::rgroupdecomp_wrapper173 static void wrap() {
174 // logic from https://stackoverflow.com/a/13017303
175 boost::python::type_info info =
176 boost::python::type_id<RDKit::MOL_SPTR_VECT>();
177 const boost::python::converter::registration *reg =
178 boost::python::converter::registry::query(info);
179 if (reg == nullptr || (*reg).m_to_python == nullptr) {
180 python::class_<RDKit::MOL_SPTR_VECT>("MOL_SPTR_VECT")
181 .def(python::vector_indexing_suite<RDKit::MOL_SPTR_VECT, true>());
182 }
183
184 std::string docString = "";
185 python::enum_<RDKit::RGroupLabels>("RGroupLabels")
186 .value("IsotopeLabels", RDKit::IsotopeLabels)
187 .value("AtomMapLabels", RDKit::AtomMapLabels)
188 .value("AtomIndexLabels", RDKit::AtomIndexLabels)
189 .value("RelabelDuplicateLabels", RDKit::RelabelDuplicateLabels)
190 .value("MDLRGroupLabels", RDKit::MDLRGroupLabels)
191 .value("DummyAtomLabels", RDKit::DummyAtomLabels)
192 .value("AutoDetect", RDKit::AutoDetect)
193 .export_values();
194
195 python::enum_<RDKit::RGroupMatching>("RGroupMatching")
196 .value("Greedy", RDKit::Greedy)
197 .value("GreedyChunks", RDKit::GreedyChunks)
198 .value("Exhaustive", RDKit::Exhaustive)
199 .value("NoSymmetrization", RDKit::NoSymmetrization)
200 .value("GA", RDKit::GA)
201 .export_values();
202
203 python::enum_<RDKit::RGroupLabelling>("RGroupLabelling")
204 .value("AtomMap", RDKit::AtomMap)
205 .value("Isotope", RDKit::Isotope)
206 .value("MDLRGroup", RDKit::MDLRGroup)
207 .export_values();
208
209 python::enum_<RDKit::RGroupCoreAlignment>("RGroupCoreAlignment")
210 // DEPRECATED, remove the folowing line in release 2021.03
211 .value("None", RDKit::NoAlignment)
212 .value("NoAlignment", RDKit::NoAlignment)
213 .value("MCS", RDKit::MCS)
214 .export_values();
215
216 python::enum_<RDKit::RGroupScore>("RGroupScore")
217 .value("Match", RDKit::Match)
218 .value("FingerprintVariance", RDKit::FingerprintVariance)
219 .export_values();
220
221 docString =
222 "RGroupDecompositionParameters controls how the RGroupDecomposition "
223 "sets labelling and matches structures\n"
224 " OPTIONS:\n"
225 " - RGroupCoreAlignment: can be one of RGroupCoreAlignment.None_ or "
226 "RGroupCoreAlignment.MCS\n"
227 " If set to MCS, cores labels are mapped to "
228 "each other using their\n"
229 " Maximum common substructure overlap.\n"
230 " - RGroupLabels: optionally set where the rgroup labels to use are "
231 "encoded.\n"
232 " RGroupLabels.IsotopeLabels - labels are stored "
233 "on isotopes\n"
234 " RGroupLabels.AtomMapLabels - labels are stored "
235 "on atommaps\n"
236 " RGroupLabels.MDLRGroupLabels - labels are stored "
237 "on MDL R-groups\n"
238 " RGroupLabels.DummyAtomLabels - labels are stored "
239 "on dummy atoms\n"
240 " RGroupLabels.AtomIndexLabels - use the atom "
241 "index "
242 "as the label\n"
243 " RGroupLabels.RelabelDuplicateLabels - fix any "
244 "duplicate labels\n"
245 " RGroupLabels.AutoDetect - auto detect the label "
246 "[default]\n"
247 " Note: in all cases, any rgroups found on unlabelled atoms will "
248 "be automatically\n"
249 " labelled.\n"
250 " - RGroupLabelling: choose where the rlabels are stored on the "
251 "decomposition\n"
252 " RGroupLabels.AtomMap - store rgroups as atom "
253 "maps (for smiles)\n"
254 " RGroupLabels.Isotope - stroe rgroups on the "
255 "isotope\n"
256 " RGroupLabels.MDLRGroup - store rgroups as mdl "
257 "rgroups (for molblocks)\n"
258 " default: AtomMap | MDLRGroup\n"
259 " - onlyMatchAtRGroups: only allow rgroup decomposition at the "
260 "specified rgroups\n"
261 " - removeAllHydrogenRGroups: remove all user-defined rgroups that "
262 "only have hydrogens\n"
263 " - removeAllHydrogenRGroupsAndLabels: remove all user-defined "
264 "rgroups that only have hydrogens, and also remove the corresponding "
265 "labels from the core\n"
266 " - removeHydrogensPostMatch: remove all hydrogens from the output "
267 "molecules\n"
268 " - allowNonTerminalRGroups: allow labelled Rgroups of degree 2 or "
269 "more\n";
270 python::class_<RDKit::RGroupDecompositionParameters>(
271 "RGroupDecompositionParameters", docString.c_str(),
272 python::init<>("Constructor, takes no arguments"))
273
274 .def_readwrite("labels", &RDKit::RGroupDecompositionParameters::labels)
275 .def_readwrite("matchingStrategy",
276 &RDKit::RGroupDecompositionParameters::matchingStrategy)
277 .def_readwrite("scoreMethod",
278 &RDKit::RGroupDecompositionParameters::scoreMethod)
279 .def_readwrite("rgroupLabelling",
280 &RDKit::RGroupDecompositionParameters::rgroupLabelling)
281 .def_readwrite("alignment",
282 &RDKit::RGroupDecompositionParameters::alignment)
283 .def_readwrite("chunkSize",
284 &RDKit::RGroupDecompositionParameters::chunkSize)
285 .def_readwrite(
286 "onlyMatchAtRGroups",
287 &RDKit::RGroupDecompositionParameters::onlyMatchAtRGroups)
288 .def_readwrite(
289 "removeAllHydrogenRGroups",
290 &RDKit::RGroupDecompositionParameters::removeAllHydrogenRGroups)
291 .def_readwrite(
292 "removeHydrogensPostMatch",
293 &RDKit::RGroupDecompositionParameters::removeHydrogensPostMatch)
294 .def_readwrite("timeout",
295 &RDKit::RGroupDecompositionParameters::timeout)
296 .def_readwrite("gaPopulationSize",
297 &RDKit::RGroupDecompositionParameters::gaPopulationSize)
298 .def_readwrite(
299 "gaMaximumOperations",
300 &RDKit::RGroupDecompositionParameters::gaMaximumOperations)
301 .def_readwrite("gaNumberOperationsWithoutImprovement",
302 &RDKit::RGroupDecompositionParameters::
303 gaNumberOperationsWithoutImprovement)
304 .def_readwrite("gaRandomSeed",
305 &RDKit::RGroupDecompositionParameters::gaRandomSeed)
306 .def_readwrite("gaNumberRuns",
307 &RDKit::RGroupDecompositionParameters::gaNumberRuns)
308 .def_readwrite("gaParallelRuns",
309 &RDKit::RGroupDecompositionParameters::gaParallelRuns)
310 .def_readwrite(
311 "allowNonTerminalRGroups",
312 &RDKit::RGroupDecompositionParameters::allowNonTerminalRGroups)
313 .def_readwrite("removeAllHydrogenRGroupsAndLabels",
314 &RDKit::RGroupDecompositionParameters::
315 removeAllHydrogenRGroupsAndLabels);
316
317 python::class_<RDKit::RGroupDecompositionHelper, boost::noncopyable>(
318 "RGroupDecomposition", docString.c_str(),
319 python::init<python::object>(
320 "Construct from a molecule or sequence of molecules"))
321 .def(
322 python::init<python::object, const RGroupDecompositionParameters &>(
323 "Construct from a molecule or sequence of molecules and a "
324 "parameters object"))
325 .def("Add", &RGroupDecompositionHelper::Add)
326 .def("Process", &RGroupDecompositionHelper::Process,
327 "Process the rgroups (must be done prior to "
328 "GetRGroupsAsRows/Columns and GetRGroupLabels)")
329 .def("ProcessAndScore", &RGroupDecompositionHelper::ProcessAndScore,
330 "Process the rgroups and returns the score (must be done prior to "
331 "GetRGroupsAsRows/Columns and GetRGroupLabels)")
332 .def("GetRGroupLabels", &RGroupDecompositionHelper::GetRGroupLabels,
333 "Return the current list of found rgroups.\n"
334 "Note, Process() should be called first")
335 .def("GetRGroupsAsRows", &RGroupDecompositionHelper::GetRGroupsAsRows,
336 python::arg("asSmiles") = false,
337 "Return the rgroups as rows (note: can be fed directrly into a "
338 "pandas datatable)\n"
339 " ARGUMENTS:\n"
340 " - asSmiles: if True return smiles strings, otherwise return "
341 "molecules [default: False]\n"
342 " Row structure:\n"
343 " rows[idx] = {rgroup_label: molecule_or_smiles}\n")
344 .def("GetRGroupsAsColumns",
345 &RGroupDecompositionHelper::GetRGroupsAsColumn,
346 python::arg("asSmiles") = false,
347 "Return the rgroups as columns (note: can be fed directrly into a "
348 "pandas datatable)\n"
349 " ARGUMENTS:\n"
350 " - asSmiles: if True return smiles strings, otherwise return "
351 "molecules [default: False]\n"
352 " Column structure:\n"
353 " columns[rgroup_label] = [ mols_or_smiles ]\n");
354
355 docString =
356 "Decompose a collecion of molecules into their Rgroups\n"
357 " ARGUMENTS:\n"
358 " - cores: a set of cores from most to least specific.\n"
359 " See RGroupDecompositionParameters for more details\n"
360 " on how the cores can be labelled\n"
361 " - mols: the molecules to be decomposed\n"
362 " - asSmiles: if True return smiles strings, otherwise return "
363 "molecules [default: False]\n"
364 " - asRows: return the results as rows (default) otherwise return "
365 "columns\n"
366 "\n"
367 " RETURNS: row_or_column_results, unmatched\n"
368 "\n"
369 " Row structure:\n"
370 " rows[idx] = {rgroup_label: molecule_or_smiles}\n"
371 " Column structure:\n"
372 " columns[rgroup_label] = [ mols_or_smiles ]\n"
373 "\n"
374 " unmatched is a vector of indices in the input mols that were not "
375 "matched.\n";
376 python::def("RGroupDecompose", RDKit::RGroupDecomp,
377 (python::arg("cores"), python::arg("mols"),
378 python::arg("asSmiles") = false, python::arg("asRows") = true,
379 python::arg("options") = RGroupDecompositionParameters()),
380 docString.c_str());
381 };
382 };
383 } // namespace RDKit
384
BOOST_PYTHON_MODULE(rdRGroupDecomposition)385 BOOST_PYTHON_MODULE(rdRGroupDecomposition) {
386 python::scope().attr("__doc__") =
387 "Module containing RGroupDecomposition classes and functions.";
388 RDKit::rgroupdecomp_wrapper::wrap();
389 }
390