1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32
33 #include <RDGeneral/test.h>
34 #include <RDGeneral/utils.h>
35 #include <RDGeneral/Exceptions.h>
36 #include <GraphMol/RDKitBase.h>
37 #include <GraphMol/RDKitQueries.h>
38 #include <GraphMol/SmilesParse/SmilesParse.h>
39 #include <GraphMol/FileParsers/MolSupplier.h>
40
41 #include <GraphMol/ChemReactions/Enumerate/CartesianProduct.h>
42 #include <GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h>
43 #include <GraphMol/ChemReactions/Enumerate/RandomSample.h>
44 #include <GraphMol/ChemReactions/Enumerate/RandomSampleAllBBs.h>
45 #include <GraphMol/ChemReactions/Enumerate/Enumerate.h>
46
47 #include <GraphMol/ChemReactions/ReactionParser.h>
48 #include <GraphMol/ChemReactions/ReactionUtils.h>
49 #include <GraphMol/ChemReactions/SanitizeRxn.h>
50
51 #ifdef RDK_USE_BOOST_SERIALIZATION
52 #include <RDGeneral/BoostStartInclude.h>
53 #include <boost/archive/text_oarchive.hpp>
54 #include <boost/archive/text_iarchive.hpp>
55 #include <RDGeneral/BoostEndInclude.h>
56 #endif
57
58 using namespace RDKit;
59
60 #ifdef RDK_USE_BOOST_SERIALIZATION
61 // for each starting point check to see that the archive
62 // starts at the same point
pickleTest(EnumerationStrategyBase & en,size_t len)63 void pickleTest(EnumerationStrategyBase &en, size_t len) {
64 boost::shared_ptr<EnumerationStrategyBase> base(en.copy());
65 TEST_ASSERT(std::string(base->type()) == std::string(en.type()));
66
67 for (size_t i = 0; i < len; ++i) {
68 std::stringstream ss;
69 {
70 boost::archive::text_oarchive ar(ss);
71 ar &base;
72 }
73 boost::shared_ptr<EnumerationStrategyBase> copy;
74 {
75 boost::archive::text_iarchive ar(ss);
76 ar ©
77 }
78 TEST_ASSERT(std::string(base->type()) == std::string(copy->type()));
79 TEST_ASSERT(base->next() == copy->next());
80 TEST_ASSERT(base->getPosition() == en.next());
81 }
82 }
83 #endif
84
testSamplers()85 void testSamplers() {
86 EnumerationTypes::BBS bbs;
87 bbs.resize(3);
88 for (int i = 0; i < 10; ++i) {
89 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
90 }
91
92 for (int i = 0; i < 5; ++i) {
93 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
94 }
95
96 for (int i = 0; i < 6; ++i) {
97 bbs[2].push_back(
98 boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
99 }
100
101 ChemicalReaction rxn;
102 CartesianProductStrategy cart;
103 cart.initialize(rxn, bbs);
104 RandomSampleStrategy rand;
105 rand.initialize(rxn, bbs);
106 RandomSampleAllBBsStrategy randBBs;
107 randBBs.initialize(rxn, bbs);
108 EvenSamplePairsStrategy even;
109 even.initialize(rxn, bbs);
110 std::vector<boost::shared_ptr<EnumerationStrategyBase>> enumerators;
111 enumerators.emplace_back(cart.copy());
112 enumerators.emplace_back(rand.copy());
113 enumerators.emplace_back(randBBs.copy());
114 enumerators.emplace_back(even.copy());
115 #ifdef RDK_USE_BOOST_SERIALIZATION
116 for (auto &enumerator : enumerators) {
117 TEST_ASSERT(enumerator->getNumPermutations() == 10 * 5 * 6);
118 pickleTest(*enumerator, 10 * 5 * 6);
119 }
120 #endif
121 // for(auto&& i: enumerators) {
122 // TEST_ASSERT(i->getNumPermutations() == 10*5*6);
123 //}
124 }
125
testEvenSamplers()126 void testEvenSamplers() {
127 EnumerationTypes::BBS bbs;
128 bbs.resize(3);
129 boost::uint64_t R1 = 600;
130 boost::uint64_t R2 = 50;
131 boost::uint64_t R3 = 1000;
132
133 boost::shared_ptr<ROMol> m(SmilesToMol("C=CCN=C=S"));
134 boost::shared_ptr<ROMol> m2(SmilesToMol("NCc1ncc(Cl)cc1Br"));
135 boost::shared_ptr<ROMol> m3(SmilesToMol("NCCCc1ncc(Cl)cc1Br"));
136
137 for (unsigned long i = 0; i < R1; ++i) {
138 bbs[0].push_back(m);
139 }
140
141 for (unsigned long i = 0; i < R2; ++i) {
142 bbs[1].push_back(m2);
143 }
144
145 for (unsigned long i = 0; i < R3; ++i) {
146 bbs[2].push_back(m3);
147 }
148
149 ChemicalReaction rxn;
150 EvenSamplePairsStrategy even;
151 even.initialize(rxn, bbs);
152 std::cout << even.getNumPermutations() << " " << R1 * R2 * R3 << std::endl;
153 TEST_ASSERT(even.getNumPermutations() == R1 * R2 * R3);
154
155 for (size_t i = 0; i < 5000; ++i) {
156 even.next();
157 }
158 even.stats();
159 }
160
161 const char *smiresults[] = {
162 "C=CCNC(=S)NCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCc1ncc(Cl)cc1Br",
163 "C=CCNC(=S)NCCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCCc1ncc(Cl)cc1Br",
164 "C=CCNC(=S)NCCCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br"};
165
testEnumerations()166 void testEnumerations() {
167 EnumerationTypes::BBS bbs;
168 bbs.resize(2);
169
170 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
171 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CC=CCN=C=S")));
172
173 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
174 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCc1ncc(Cl)cc1Br")));
175 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
176
177 ChemicalReaction *rxn = RxnSmartsToChemicalReaction(
178 "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);"
179 "!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:"
180 "2]");
181
182 {
183 EnumerateLibrary en(*rxn, bbs);
184 size_t i = 0;
185 for (; (bool)en; ++i) {
186 std::vector<std::vector<std::string>> res = en.nextSmiles();
187 TEST_ASSERT(res.size() == 1);
188 TEST_ASSERT(res[0].size() == 1);
189 TEST_ASSERT(res[0][0] == smiresults[i]);
190 TEST_ASSERT(i <= 6);
191 }
192 TEST_ASSERT(i == 6);
193 // tests reset
194 en.resetState();
195 i = 0;
196 for (; (bool)en; ++i) {
197 std::vector<std::vector<std::string>> res = en.nextSmiles();
198 TEST_ASSERT(res.size() == 1);
199 TEST_ASSERT(res[0].size() == 1);
200 TEST_ASSERT(res[0][0] == smiresults[i]);
201 TEST_ASSERT(i <= 6);
202 }
203 TEST_ASSERT(i == 6);
204 }
205
206 #ifdef RDK_USE_BOOST_SERIALIZATION
207 {
208 boost::shared_ptr<EnumerateLibrary> en(
209 new EnumerateLibrary(*rxn, bbs, RandomSampleStrategy()));
210
211 std::vector<std::vector<std::vector<std::string>>> smir;
212 for (size_t j = 0; j < 10; ++j) {
213 std::vector<std::vector<std::string>> smiles = en->nextSmiles();
214 smir.push_back(smiles);
215 }
216
217 en->resetState();
218
219 for (size_t i = 0; i < 1000; ++i) {
220 // pickle and unpickle
221 std::stringstream ss;
222 {
223 boost::archive::text_oarchive ar(ss);
224 ar &en;
225 }
226 boost::shared_ptr<EnumerateLibrary> copy;
227 {
228 boost::archive::text_iarchive ar(ss);
229 ar ©
230 }
231
232 for (size_t j = 0; j < 10; ++j) {
233 TEST_ASSERT(en->nextSmiles() == copy->nextSmiles());
234 }
235
236 copy->resetState();
237 for (size_t j = 0; j < 10; ++j) {
238 TEST_ASSERT(smir[j] == copy->nextSmiles());
239 }
240 }
241 }
242 #endif
243 delete rxn;
244 }
245
246 const char *rxndata =
247 "$RXN\nUntitled Document-1\n ChemDraw10291618492D\n\n 3 1\n$MOL\n\n\n\n "
248 " 2 1 0 0 0 0 0 0 0 0999 V2000\n 0.4125 0.0000 0.0000 N "
249 " 0 0 0 0 0 0 0 0 0 3 0 0\n -0.4125 0.0000 0.0000 R2 0 "
250 " 0 0 0 0 0 0 0 0 2 0 0\n 1 2 1 0 0\nM "
251 "END\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n -0.4125 "
252 "0.0000 0.0000 R1 0 0 0 0 0 0 0 0 0 1 0 0\n 0.4125 "
253 "0.0000 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 "
254 " 0\nM END\n$MOL\n\n\n\n 2 1 0 0 0 0 0 0 0 0999 V2000\n "
255 "0.4125 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 5 0 0\n "
256 "-0.4125 0.0000 0.0000 R4 0 0 0 0 0 0 0 0 0 4 0 0\n 1 "
257 "2 1 0 0\nM END\n$MOL\n\n\n\n 14 15 0 0 0 0 0 0 0 0999 "
258 "V2000\n 0.5072 -0.5166 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
259 " 0\n 0.5072 0.3084 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
260 "0\n 1.2949 -0.7616 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 "
261 "0\n 1.7817 -0.0880 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
262 "0\n 1.2967 0.5794 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
263 "0\n 1.5558 -1.5443 0.0000 R1 0 0 0 0 0 0 0 0 0 1 0 "
264 "0\n -0.2073 0.7208 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
265 "0\n -0.9218 0.3083 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
266 "0\n -0.9217 -0.5167 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
267 "0\n -0.2073 -0.9292 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 "
268 "0\n -1.6362 0.7208 0.0000 N 0 0 0 0 0 0 0 0 0 3 0 "
269 "0\n 1.5452 1.3661 0.0000 N 0 0 0 0 0 0 0 0 0 5 0 "
270 "0\n 2.3507 1.5443 0.0000 R4 0 0 0 0 0 0 0 0 0 4 0 "
271 "0\n -2.3507 0.3083 0.0000 R2 0 0 0 0 0 0 0 0 0 2 0 "
272 "0\n 1 2 2 0 0\n 1 3 1 0 0\n 3 4 1 0 0\n "
273 "4 5 1 0 0\n 5 2 1 0 0\n 3 6 1 0 0\n 2 7 "
274 " 1 0 0\n 7 8 2 0 0\n 8 9 1 0 0\n 9 10 2 "
275 "0 0\n 10 1 1 0 0\n 8 11 1 0 0\n 12 13 1 0 "
276 " 0\n 11 14 1 0 0\n 12 5 1 0 0\nM END\n";
277
testInsaneEnumerations()278 void testInsaneEnumerations() {
279 EnumerationTypes::BBS bbs;
280 bbs.resize(3);
281
282 ChemicalReaction *rxn2 = RxnBlockToChemicalReaction(rxndata);
283 // RxnOps::sanitizeRxn(*rxn2, MolOps::AdjustQueryParameters());
284 MatchVectType tvect;
285
286 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CCNCC")));
287 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCC")));
288 std::cerr << "0,0 "
289 << (int)SubstructMatch(*bbs[0][0].get(),
290 *rxn2->getReactants()[0].get(), tvect)
291 << std::endl;
292 std::cerr << "0,1 "
293 << (int)SubstructMatch(*bbs[0][1].get(),
294 *rxn2->getReactants()[0].get(), tvect)
295 << std::endl;
296
297 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("ClC1CCC1")));
298 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("ClC1CCC1Cl")));
299 std::cerr << "1,0 "
300 << (int)SubstructMatch(*bbs[1][0].get(),
301 *rxn2->getReactants()[1].get(), tvect)
302 << std::endl;
303 std::cerr << "1,1 "
304 << (int)SubstructMatch(*bbs[1][1].get(),
305 *rxn2->getReactants()[1].get(), tvect)
306 << std::endl;
307
308 bbs[2].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CCNCC")));
309 bbs[2].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCC")));
310 std::cerr << "2,0 "
311 << (int)SubstructMatch(*bbs[2][0].get(),
312 *rxn2->getReactants()[2].get(), tvect)
313 << std::endl;
314 std::cerr << "2,1 "
315 << (int)SubstructMatch(*bbs[2][1].get(),
316 *rxn2->getReactants()[2].get(), tvect)
317 << std::endl;
318
319 {
320 ChemicalReaction *rxn = RxnBlockToChemicalReaction(rxndata);
321 RxnOps::sanitizeRxn(*rxn, MolOps::AdjustQueryParameters());
322 EnumerationParams ThereCanBeOnlyOne;
323 ThereCanBeOnlyOne.reagentMaxMatchCount = 1;
324 EnumerationTypes::BBS bbs2 =
325 removeNonmatchingReagents(*rxn, bbs, ThereCanBeOnlyOne);
326 TEST_ASSERT(bbs2[0].size() == 1);
327 TEST_ASSERT(bbs2[1].size() == 1);
328 TEST_ASSERT(bbs2[2].size() == 1);
329
330 delete rxn;
331 }
332 delete rxn2;
333 }
334
335 #ifdef RDK_USE_BOOST_SERIALIZATION
testGithub1657()336 void testGithub1657() {
337 BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
338 BOOST_LOG(rdInfoLog) << "Testing github #1657: EnumerateLibrary with "
339 "initFromString called twice doesn't clear the "
340 "reaction"
341 << std::endl;
342 EnumerationTypes::BBS bbs;
343 bbs.resize(2);
344
345 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
346 bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CC=CCN=C=S")));
347
348 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
349 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCc1ncc(Cl)cc1Br")));
350 bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
351
352 ChemicalReaction *rxn = RxnSmartsToChemicalReaction(
353 "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);"
354 "!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:"
355 "2]");
356
357 { // we'll also test that initFromString() works at all
358 EnumerateLibrary en1(*rxn, bbs);
359 std::stringstream sstr;
360 en1.toStream(sstr);
361 EnumerateLibrary en2;
362 en2.initFromString(sstr.str());
363
364 EnumerateLibrary en3;
365 en3.initFromString(sstr.str());
366 bool ok = false;
367 try {
368 en3.initFromString(sstr.str());
369 } catch (const ValueErrorException &) {
370 ok = true;
371 }
372 TEST_ASSERT(ok);
373 }
374 delete rxn;
375 BOOST_LOG(rdInfoLog) << "\tdone" << std::endl;
376 }
377 #else
testGithub1657()378 void testGithub1657() {}
379 #endif
380
main()381 int main() {
382 RDLog::InitLogs();
383 #if 1
384 testSamplers();
385 testEvenSamplers();
386 testEnumerations();
387 testInsaneEnumerations();
388 #endif
389 testGithub1657();
390 }
391