1 //
2 //  Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 //  All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 //       notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 //       copyright notice, this list of conditions and the following
13 //       disclaimer in the documentation and/or other materials provided
14 //       with the distribution.
15 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 //       nor the names of its contributors may be used to endorse or promote
17 //       products derived from this software without specific prior written
18 //       permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 
33 #include <RDGeneral/test.h>
34 #include <RDGeneral/utils.h>
35 #include <RDGeneral/Exceptions.h>
36 #include <GraphMol/RDKitBase.h>
37 #include <GraphMol/RDKitQueries.h>
38 #include <GraphMol/SmilesParse/SmilesParse.h>
39 #include <GraphMol/FileParsers/MolSupplier.h>
40 
41 #include <GraphMol/ChemReactions/Enumerate/CartesianProduct.h>
42 #include <GraphMol/ChemReactions/Enumerate/EvenSamplePairs.h>
43 #include <GraphMol/ChemReactions/Enumerate/RandomSample.h>
44 #include <GraphMol/ChemReactions/Enumerate/RandomSampleAllBBs.h>
45 #include <GraphMol/ChemReactions/Enumerate/Enumerate.h>
46 
47 #include <GraphMol/ChemReactions/ReactionParser.h>
48 #include <GraphMol/ChemReactions/ReactionUtils.h>
49 #include <GraphMol/ChemReactions/SanitizeRxn.h>
50 
51 #ifdef RDK_USE_BOOST_SERIALIZATION
52 #include <RDGeneral/BoostStartInclude.h>
53 #include <boost/archive/text_oarchive.hpp>
54 #include <boost/archive/text_iarchive.hpp>
55 #include <RDGeneral/BoostEndInclude.h>
56 #endif
57 
58 using namespace RDKit;
59 
60 #ifdef RDK_USE_BOOST_SERIALIZATION
61 // for each starting point check to see that the archive
62 //  starts at the same point
pickleTest(EnumerationStrategyBase & en,size_t len)63 void pickleTest(EnumerationStrategyBase &en, size_t len) {
64   boost::shared_ptr<EnumerationStrategyBase> base(en.copy());
65   TEST_ASSERT(std::string(base->type()) == std::string(en.type()));
66 
67   for (size_t i = 0; i < len; ++i) {
68     std::stringstream ss;
69     {
70       boost::archive::text_oarchive ar(ss);
71       ar &base;
72     }
73     boost::shared_ptr<EnumerationStrategyBase> copy;
74     {
75       boost::archive::text_iarchive ar(ss);
76       ar &copy;
77     }
78     TEST_ASSERT(std::string(base->type()) == std::string(copy->type()));
79     TEST_ASSERT(base->next() == copy->next());
80     TEST_ASSERT(base->getPosition() == en.next());
81   }
82 }
83 #endif
84 
testSamplers()85 void testSamplers() {
86   EnumerationTypes::BBS bbs;
87   bbs.resize(3);
88   for (int i = 0; i < 10; ++i) {
89     bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
90   }
91 
92   for (int i = 0; i < 5; ++i) {
93     bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
94   }
95 
96   for (int i = 0; i < 6; ++i) {
97     bbs[2].push_back(
98         boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
99   }
100 
101   ChemicalReaction rxn;
102   CartesianProductStrategy cart;
103   cart.initialize(rxn, bbs);
104   RandomSampleStrategy rand;
105   rand.initialize(rxn, bbs);
106   RandomSampleAllBBsStrategy randBBs;
107   randBBs.initialize(rxn, bbs);
108   EvenSamplePairsStrategy even;
109   even.initialize(rxn, bbs);
110   std::vector<boost::shared_ptr<EnumerationStrategyBase>> enumerators;
111   enumerators.emplace_back(cart.copy());
112   enumerators.emplace_back(rand.copy());
113   enumerators.emplace_back(randBBs.copy());
114   enumerators.emplace_back(even.copy());
115 #ifdef RDK_USE_BOOST_SERIALIZATION
116   for (auto &enumerator : enumerators) {
117     TEST_ASSERT(enumerator->getNumPermutations() == 10 * 5 * 6);
118     pickleTest(*enumerator, 10 * 5 * 6);
119   }
120 #endif
121   // for(auto&& i: enumerators) {
122   //  TEST_ASSERT(i->getNumPermutations() == 10*5*6);
123   //}
124 }
125 
testEvenSamplers()126 void testEvenSamplers() {
127   EnumerationTypes::BBS bbs;
128   bbs.resize(3);
129   boost::uint64_t R1 = 600;
130   boost::uint64_t R2 = 50;
131   boost::uint64_t R3 = 1000;
132 
133   boost::shared_ptr<ROMol> m(SmilesToMol("C=CCN=C=S"));
134   boost::shared_ptr<ROMol> m2(SmilesToMol("NCc1ncc(Cl)cc1Br"));
135   boost::shared_ptr<ROMol> m3(SmilesToMol("NCCCc1ncc(Cl)cc1Br"));
136 
137   for (unsigned long i = 0; i < R1; ++i) {
138     bbs[0].push_back(m);
139   }
140 
141   for (unsigned long i = 0; i < R2; ++i) {
142     bbs[1].push_back(m2);
143   }
144 
145   for (unsigned long i = 0; i < R3; ++i) {
146     bbs[2].push_back(m3);
147   }
148 
149   ChemicalReaction rxn;
150   EvenSamplePairsStrategy even;
151   even.initialize(rxn, bbs);
152   std::cout << even.getNumPermutations() << " " << R1 * R2 * R3 << std::endl;
153   TEST_ASSERT(even.getNumPermutations() == R1 * R2 * R3);
154 
155   for (size_t i = 0; i < 5000; ++i) {
156     even.next();
157   }
158   even.stats();
159 }
160 
161 const char *smiresults[] = {
162     "C=CCNC(=S)NCc1ncc(Cl)cc1Br",   "CC=CCNC(=S)NCc1ncc(Cl)cc1Br",
163     "C=CCNC(=S)NCCc1ncc(Cl)cc1Br",  "CC=CCNC(=S)NCCc1ncc(Cl)cc1Br",
164     "C=CCNC(=S)NCCCc1ncc(Cl)cc1Br", "CC=CCNC(=S)NCCCc1ncc(Cl)cc1Br"};
165 
testEnumerations()166 void testEnumerations() {
167   EnumerationTypes::BBS bbs;
168   bbs.resize(2);
169 
170   bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
171   bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CC=CCN=C=S")));
172 
173   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
174   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCc1ncc(Cl)cc1Br")));
175   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
176 
177   ChemicalReaction *rxn = RxnSmartsToChemicalReaction(
178       "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);"
179       "!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:"
180       "2]");
181 
182   {
183     EnumerateLibrary en(*rxn, bbs);
184     size_t i = 0;
185     for (; (bool)en; ++i) {
186       std::vector<std::vector<std::string>> res = en.nextSmiles();
187       TEST_ASSERT(res.size() == 1);
188       TEST_ASSERT(res[0].size() == 1);
189       TEST_ASSERT(res[0][0] == smiresults[i]);
190       TEST_ASSERT(i <= 6);
191     }
192     TEST_ASSERT(i == 6);
193     // tests reset
194     en.resetState();
195     i = 0;
196     for (; (bool)en; ++i) {
197       std::vector<std::vector<std::string>> res = en.nextSmiles();
198       TEST_ASSERT(res.size() == 1);
199       TEST_ASSERT(res[0].size() == 1);
200       TEST_ASSERT(res[0][0] == smiresults[i]);
201       TEST_ASSERT(i <= 6);
202     }
203     TEST_ASSERT(i == 6);
204   }
205 
206 #ifdef RDK_USE_BOOST_SERIALIZATION
207   {
208     boost::shared_ptr<EnumerateLibrary> en(
209         new EnumerateLibrary(*rxn, bbs, RandomSampleStrategy()));
210 
211     std::vector<std::vector<std::vector<std::string>>> smir;
212     for (size_t j = 0; j < 10; ++j) {
213       std::vector<std::vector<std::string>> smiles = en->nextSmiles();
214       smir.push_back(smiles);
215     }
216 
217     en->resetState();
218 
219     for (size_t i = 0; i < 1000; ++i) {
220       // pickle and unpickle
221       std::stringstream ss;
222       {
223         boost::archive::text_oarchive ar(ss);
224         ar &en;
225       }
226       boost::shared_ptr<EnumerateLibrary> copy;
227       {
228         boost::archive::text_iarchive ar(ss);
229         ar &copy;
230       }
231 
232       for (size_t j = 0; j < 10; ++j) {
233         TEST_ASSERT(en->nextSmiles() == copy->nextSmiles());
234       }
235 
236       copy->resetState();
237       for (size_t j = 0; j < 10; ++j) {
238         TEST_ASSERT(smir[j] == copy->nextSmiles());
239       }
240     }
241   }
242 #endif
243   delete rxn;
244 }
245 
246 const char *rxndata =
247     "$RXN\nUntitled Document-1\n  ChemDraw10291618492D\n\n  3  1\n$MOL\n\n\n\n "
248     " 2  1  0  0  0  0  0  0  0  0999 V2000\n    0.4125    0.0000    0.0000 N  "
249     " 0  0  0  0  0  0  0  0  0  3  0  0\n   -0.4125    0.0000    0.0000 R2  0 "
250     " 0  0  0  0  0  0  0  0  2  0  0\n  1  2  1  0        0\nM  "
251     "END\n$MOL\n\n\n\n  2  1  0  0  0  0  0  0  0  0999 V2000\n   -0.4125    "
252     "0.0000    0.0000 R1  0  0  0  0  0  0  0  0  0  1  0  0\n    0.4125    "
253     "0.0000    0.0000 Cl  0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  1  0     "
254     "   0\nM  END\n$MOL\n\n\n\n  2  1  0  0  0  0  0  0  0  0999 V2000\n    "
255     "0.4125    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  5  0  0\n   "
256     "-0.4125    0.0000    0.0000 R4  0  0  0  0  0  0  0  0  0  4  0  0\n  1  "
257     "2  1  0        0\nM  END\n$MOL\n\n\n\n 14 15  0  0  0  0  0  0  0  0999 "
258     "V2000\n    0.5072   -0.5166    0.0000 C   0  0  0  0  0  0  0  0  0  0  0 "
259     " 0\n    0.5072    0.3084    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
260     "0\n    1.2949   -0.7616    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  "
261     "0\n    1.7817   -0.0880    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
262     "0\n    1.2967    0.5794    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
263     "0\n    1.5558   -1.5443    0.0000 R1  0  0  0  0  0  0  0  0  0  1  0  "
264     "0\n   -0.2073    0.7208    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
265     "0\n   -0.9218    0.3083    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
266     "0\n   -0.9217   -0.5167    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
267     "0\n   -0.2073   -0.9292    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  "
268     "0\n   -1.6362    0.7208    0.0000 N   0  0  0  0  0  0  0  0  0  3  0  "
269     "0\n    1.5452    1.3661    0.0000 N   0  0  0  0  0  0  0  0  0  5  0  "
270     "0\n    2.3507    1.5443    0.0000 R4  0  0  0  0  0  0  0  0  0  4  0  "
271     "0\n   -2.3507    0.3083    0.0000 R2  0  0  0  0  0  0  0  0  0  2  0  "
272     "0\n  1  2  2  0        0\n  1  3  1  0        0\n  3  4  1  0        0\n  "
273     "4  5  1  0        0\n  5  2  1  0        0\n  3  6  1  0        0\n  2  7 "
274     " 1  0        0\n  7  8  2  0        0\n  8  9  1  0        0\n  9 10  2  "
275     "0        0\n 10  1  1  0        0\n  8 11  1  0        0\n 12 13  1  0    "
276     "    0\n 11 14  1  0        0\n 12  5  1  0        0\nM  END\n";
277 
testInsaneEnumerations()278 void testInsaneEnumerations() {
279   EnumerationTypes::BBS bbs;
280   bbs.resize(3);
281 
282   ChemicalReaction *rxn2 = RxnBlockToChemicalReaction(rxndata);
283   // RxnOps::sanitizeRxn(*rxn2, MolOps::AdjustQueryParameters());
284   MatchVectType tvect;
285 
286   bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CCNCC")));
287   bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCC")));
288   std::cerr << "0,0 "
289             << (int)SubstructMatch(*bbs[0][0].get(),
290                                    *rxn2->getReactants()[0].get(), tvect)
291             << std::endl;
292   std::cerr << "0,1 "
293             << (int)SubstructMatch(*bbs[0][1].get(),
294                                    *rxn2->getReactants()[0].get(), tvect)
295             << std::endl;
296 
297   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("ClC1CCC1")));
298   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("ClC1CCC1Cl")));
299   std::cerr << "1,0 "
300             << (int)SubstructMatch(*bbs[1][0].get(),
301                                    *rxn2->getReactants()[1].get(), tvect)
302             << std::endl;
303   std::cerr << "1,1 "
304             << (int)SubstructMatch(*bbs[1][1].get(),
305                                    *rxn2->getReactants()[1].get(), tvect)
306             << std::endl;
307 
308   bbs[2].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CCNCC")));
309   bbs[2].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCC")));
310   std::cerr << "2,0 "
311             << (int)SubstructMatch(*bbs[2][0].get(),
312                                    *rxn2->getReactants()[2].get(), tvect)
313             << std::endl;
314   std::cerr << "2,1 "
315             << (int)SubstructMatch(*bbs[2][1].get(),
316                                    *rxn2->getReactants()[2].get(), tvect)
317             << std::endl;
318 
319   {
320     ChemicalReaction *rxn = RxnBlockToChemicalReaction(rxndata);
321     RxnOps::sanitizeRxn(*rxn, MolOps::AdjustQueryParameters());
322     EnumerationParams ThereCanBeOnlyOne;
323     ThereCanBeOnlyOne.reagentMaxMatchCount = 1;
324     EnumerationTypes::BBS bbs2 =
325         removeNonmatchingReagents(*rxn, bbs, ThereCanBeOnlyOne);
326     TEST_ASSERT(bbs2[0].size() == 1);
327     TEST_ASSERT(bbs2[1].size() == 1);
328     TEST_ASSERT(bbs2[2].size() == 1);
329 
330     delete rxn;
331   }
332   delete rxn2;
333 }
334 
335 #ifdef RDK_USE_BOOST_SERIALIZATION
testGithub1657()336 void testGithub1657() {
337   BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl;
338   BOOST_LOG(rdInfoLog) << "Testing github #1657: EnumerateLibrary with "
339                           "initFromString called twice doesn't clear the "
340                           "reaction"
341                        << std::endl;
342   EnumerationTypes::BBS bbs;
343   bbs.resize(2);
344 
345   bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("C=CCN=C=S")));
346   bbs[0].push_back(boost::shared_ptr<ROMol>(SmilesToMol("CC=CCN=C=S")));
347 
348   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCc1ncc(Cl)cc1Br")));
349   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCc1ncc(Cl)cc1Br")));
350   bbs[1].push_back(boost::shared_ptr<ROMol>(SmilesToMol("NCCCc1ncc(Cl)cc1Br")));
351 
352   ChemicalReaction *rxn = RxnSmartsToChemicalReaction(
353       "[N;$(N-[#6]):3]=[C;$(C=S):1].[N;$(N[#6]);!$(N=*);!$([N-]);!$(N#*);"
354       "!$([ND3]);!$([ND4]);!$(N[O,N]);!$(N[C,S]=[S,O,N]):2]>>[N:3]-[C:1]-[N+0:"
355       "2]");
356 
357   {  // we'll also test that initFromString() works at all
358     EnumerateLibrary en1(*rxn, bbs);
359     std::stringstream sstr;
360     en1.toStream(sstr);
361     EnumerateLibrary en2;
362     en2.initFromString(sstr.str());
363 
364     EnumerateLibrary en3;
365     en3.initFromString(sstr.str());
366     bool ok = false;
367     try {
368       en3.initFromString(sstr.str());
369     } catch (const ValueErrorException &) {
370       ok = true;
371     }
372     TEST_ASSERT(ok);
373   }
374   delete rxn;
375   BOOST_LOG(rdInfoLog) << "\tdone" << std::endl;
376 }
377 #else
testGithub1657()378 void testGithub1657() {}
379 #endif
380 
main()381 int main() {
382   RDLog::InitLogs();
383 #if 1
384   testSamplers();
385   testEvenSamplers();
386   testEnumerations();
387   testInsaneEnumerations();
388 #endif
389   testGithub1657();
390 }
391