1 //  Copyright (c) 2017-2019, Novartis Institutes for BioMedical Research Inc.
2 //  All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 //       notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 //       copyright notice, this list of conditions and the following
12 //       disclaimer in the documentation and/or other materials provided
13 //       with the distribution.
14 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 //       nor the names of its contributors may be used to endorse or promote
16 //       products derived from this software without specific prior written
17 //       permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 
32 // std bits
33 #include <RDGeneral/test.h>
34 #include <iostream>
35 
36 // RD bits
37 #include <GraphMol/RDKitBase.h>
38 #include <GraphMol/RDKitQueries.h>
39 #include <GraphMol/SubstructLibrary/SubstructLibrary.h>
40 #include <GraphMol/SubstructLibrary/PatternFactory.h>
41 
42 #include <GraphMol/Substruct/SubstructMatch.h>
43 
44 #include <GraphMol/SmilesParse/SmilesParse.h>
45 #include <GraphMol/SmilesParse/SmilesWrite.h>
46 #include <GraphMol/FileParsers/FileParsers.h>
47 #include <GraphMol/FileParsers/MolSupplier.h>
48 
49 using namespace RDKit;
50 
51 namespace {
runTest(SubstructLibrary & ssslib,const ROMol & pattern,int nThreads)52 boost::dynamic_bitset<> runTest(SubstructLibrary &ssslib, const ROMol &pattern,
53                                 int nThreads) {
54   std::vector<unsigned int> libMatches = ssslib.getMatches(pattern, nThreads);
55   boost::dynamic_bitset<> hasMatch(ssslib.size());
56   for (auto idx : libMatches) {
57     hasMatch[idx] = 1;
58   }
59 
60   for (unsigned int i = 0; i < ssslib.size(); ++i) {
61     MatchVectType match;
62     bool matched = SubstructMatch(*ssslib.getMol(i), pattern, match);
63     // std::cerr << MolToSmiles(*ssslib.getMol(i), true) << " " << hasMatch[i]
64     //           << " " << matched << std::endl;
65     TEST_ASSERT(hasMatch[i] == matched);
66   }
67   return hasMatch;
68 };
69 
runTest(SubstructLibrary & ssslib,const ROMol & pattern,int nThreads,const boost::dynamic_bitset<> & hasMatch)70 void runTest(SubstructLibrary &ssslib, const ROMol &pattern, int nThreads,
71              const boost::dynamic_bitset<> &hasMatch) {
72   std::vector<unsigned int> libMatches = ssslib.getMatches(pattern, nThreads);
73   boost::dynamic_bitset<> hasMatch2(ssslib.size());
74   for (auto idx : libMatches) {
75     hasMatch2[idx] = 1;
76   }
77   TEST_ASSERT(hasMatch == hasMatch2);
78 
79   for (unsigned int i = 0; i < ssslib.size(); ++i) {
80     MatchVectType match;
81     bool matched = SubstructMatch(*ssslib.getMol(i), pattern, match);
82     // std::cerr << MolToSmiles(*ssslib.getMol(i), true) << " " << hasMatch[i]
83     //           << " " << matched << std::endl;
84     TEST_ASSERT(hasMatch[i] == matched);
85   }
86 };
87 
88 }  // namespace
89 
test1()90 void test1() {
91   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
92   BOOST_LOG(rdErrorLog) << "    Test1" << std::endl;
93 
94   std::string fName = getenv("RDBASE");
95   fName += "/Data/NCI/first_200.props.sdf";
96   SDMolSupplier suppl(fName);
97   SubstructLibrary ssslib;
98   while (!suppl.atEnd()) {
99     ROMol *mol = nullptr;
100     try {
101       mol = suppl.next();
102     } catch (...) {
103       continue;
104     }
105     if (!mol) {
106       continue;
107     }
108     ssslib.addMol(*mol);
109     delete mol;
110   }
111 
112   std::vector<SubstructLibrary *> libs;
113   libs.push_back(&ssslib);
114 
115 #ifdef RDK_USE_BOOST_SERIALIZATION
116   std::string pickle = ssslib.Serialize();
117   SubstructLibrary serialized;
118   serialized.initFromString(pickle);
119   TEST_ASSERT(serialized.size() == ssslib.size());
120   libs.push_back(&serialized);
121 #endif
122 
123   boost::dynamic_bitset<> hasMatch;
124 
125   int i = 0;
126   for (auto lib : libs) {
127     ROMol *query = SmartsToMol("[#6;$([#6]([#6])[!#6])]");
128     if (i == 0) {
129       hasMatch = runTest(*lib, *query, 1);
130     } else {
131       runTest(*lib, *query, 1, hasMatch);
132     }
133 
134 #ifdef RDK_TEST_MULTITHREADED
135     runTest(*lib, *query, -1, hasMatch);
136 #endif
137     delete query;
138     ++i;
139   }
140 
141   i = 0;
142   for (auto lib : libs) {
143     ROMol *query = SmartsToMol("[$([O,S]-[!$(*=O)])]");
144     if (i == 0) {
145       hasMatch = runTest(*lib, *query, 1);
146     } else {
147       runTest(*lib, *query, 1, hasMatch);
148     }
149 
150 #ifdef RDK_TEST_MULTITHREADED
151     runTest(*lib, *query, -1, hasMatch);
152 #endif
153     delete query;
154     ++i;
155   }
156 
157   BOOST_LOG(rdErrorLog) << "  done" << std::endl;
158 }
159 
test2()160 void test2() {
161   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
162   BOOST_LOG(rdErrorLog) << "    Test2" << std::endl;
163 
164   std::string fName = getenv("RDBASE");
165   fName += "/Data/NCI/first_200.props.sdf";
166   SDMolSupplier suppl(fName);
167   auto *mols = new MolHolder();
168   auto *fps = new PatternHolder();
169   boost::shared_ptr<MolHolder> mols_ptr(mols);
170   boost::shared_ptr<PatternHolder> fps_ptr(fps);
171 
172   SubstructLibrary ssslib(mols_ptr, fps_ptr);
173   while (!suppl.atEnd()) {
174     ROMol *mol = nullptr;
175     try {
176       mol = suppl.next();
177     } catch (...) {
178       continue;
179     }
180     if (!mol) {
181       continue;
182     }
183     ssslib.addMol(*mol);
184     delete mol;
185   }
186 
187   std::vector<SubstructLibrary *> libs;
188   libs.push_back(&ssslib);
189 
190 #ifdef RDK_USE_BOOST_SERIALIZATION
191   std::string pickle = ssslib.Serialize();
192   SubstructLibrary serialized;
193   serialized.initFromString(pickle);
194   TEST_ASSERT(serialized.size() == ssslib.size());
195 
196   // check to see if we are still the right base type
197   MolHolderBase *_holder = serialized.getMolHolder().get();
198   TEST_ASSERT(_holder != nullptr);
199   TEST_ASSERT(dynamic_cast<MolHolder *>(_holder) != nullptr);
200   try {
201     serialized.getFingerprints();
202   } catch (...) {
203     TEST_ASSERT(0);
204   }
205 
206   libs.push_back(&serialized);
207 #endif
208 
209   for (auto lib : libs) {
210     ROMol *query = SmartsToMol("[#6]([#6])[!#6]");
211     runTest(*lib, *query, 1);
212 #ifdef RDK_TEST_MULTITHREADED
213     runTest(*lib, *query, -1);
214 #endif
215     delete query;
216   }
217 
218   BOOST_LOG(rdErrorLog) << "  done" << std::endl;
219 }
220 
test3()221 void test3() {
222   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
223   BOOST_LOG(rdErrorLog) << "    Test3 (stereo options)" << std::endl;
224 
225   SubstructLibrary ssslib(boost::make_shared<MolHolder>());
226   for (int i = 0; i < 10; ++i) {
227     ROMol *m1 = SmilesToMol("C1CCO[C@@](N)(O)1");
228     ROMol *m2 = SmilesToMol("C1CCO[C@](N)(O)1");
229     ROMol *m3 = SmilesToMol("C1CCO[C@@](O)(N)1");
230     ROMol *m4 = SmilesToMol("C1CCO[C@](O)(N)1");
231     ssslib.addMol(*m1);
232     ssslib.addMol(*m2);
233     ssslib.addMol(*m3);
234     ssslib.addMol(*m4);
235     delete m1;
236     delete m2;
237     delete m3;
238     delete m4;
239   }
240 
241   std::vector<SubstructLibrary *> libs;
242   libs.push_back(&ssslib);
243 
244 #ifdef RDK_USE_BOOST_SERIALIZATION
245   std::string pickle = ssslib.Serialize();
246   SubstructLibrary serialized;
247   serialized.initFromString(pickle);
248   TEST_ASSERT(serialized.size() == ssslib.size());
249   libs.push_back(&serialized);
250   // check to see if we are still the right base type
251   MolHolderBase *_holder = serialized.getMolHolder().get();
252   TEST_ASSERT(_holder != nullptr);
253   TEST_ASSERT(dynamic_cast<MolHolder *>(_holder) != nullptr);
254 #endif
255 
256   for (auto lib : libs) {
257     ROMol *query = SmartsToMol("C-1-C-C-O-C(-[O])(-[N])1");
258     std::vector<unsigned int> res = lib->getMatches(*query, true, false);
259     TEST_ASSERT(res.size() == 40);
260 
261     delete query;
262     query = SmartsToMol("C-1-C-C-O-[C@@](-[O])(-[N])1");
263 
264     res = lib->getMatches(*query, true, true);
265     TEST_ASSERT(res.size() == 20);
266 
267     res = lib->getMatches(*query, true, false);
268     TEST_ASSERT(res.size() == 40);
269 
270     delete query;
271   }
272   BOOST_LOG(rdErrorLog) << "    Done (stereo options)" << std::endl;
273 }
274 
test4()275 void test4() {
276   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
277   BOOST_LOG(rdErrorLog) << "    Test4 (trusted smiles)" << std::endl;
278 
279   boost::shared_ptr<CachedSmilesMolHolder> holder =
280       boost::make_shared<CachedSmilesMolHolder>();
281   SubstructLibrary ssslib(holder);
282 
283   for (int i = 0; i < 10; ++i) {
284     holder->addSmiles("C1CCO[C@@](N)(O)1");
285     holder->addSmiles("C1CCO[C@](N)(O)1");
286     holder->addSmiles("C1CCO[C@@](O)(N)1");
287     holder->addSmiles("C1CCO[C@](O)(N)1");
288   }
289 
290   std::vector<SubstructLibrary *> libs;
291   libs.push_back(&ssslib);
292 
293 #ifdef RDK_USE_BOOST_SERIALIZATION
294   std::string pickle = ssslib.Serialize();
295   SubstructLibrary serialized;
296   serialized.initFromString(pickle);
297   TEST_ASSERT(serialized.size() == ssslib.size());
298   libs.push_back(&serialized);
299   // check to see if we are still the right base type
300   MolHolderBase *_holder = serialized.getMolHolder().get();
301   TEST_ASSERT(_holder != nullptr);
302   TEST_ASSERT(dynamic_cast<CachedSmilesMolHolder *>(_holder) != nullptr);
303 #endif
304 
305   for (auto lib : libs) {
306     ROMol *query = SmartsToMol("C-1-C-C-O-C(-[O])(-[N])1");
307 
308     std::vector<unsigned int> res = lib->getMatches(*query, true, false);
309     TEST_ASSERT(res.size() == 40);
310 
311     delete query;
312     query = SmartsToMol("C-1-C-C-O-[C@@](-[O])(-[N])1");
313 
314     res = lib->getMatches(*query, true, true);
315     TEST_ASSERT(res.size() == 20);
316 
317     res = lib->getMatches(*query, true, false);
318     TEST_ASSERT(res.size() == 40);
319     delete query;
320   }
321 
322   BOOST_LOG(rdErrorLog) << "    Done (trusted smiles)" << std::endl;
323 }
324 
325 /// Tests the code in the docs
326 //   to make sure it compiles.
docTest()327 void docTest() {
328   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
329   BOOST_LOG(rdErrorLog) << "    Testing C++ docs" << std::endl;
330 
331   ROMol *q = SmartsToMol("C-1-C-C-O-C(-[O])(-[N])1");
332   ROMol *m = SmilesToMol("C1CCO[C@@](N)(O)1");
333   ROMol &query = *q;
334   ROMol &mol = *m;
335 
336   {
337     SubstructLibrary lib;
338     lib.addMol(mol);
339     std::vector<unsigned int> results = lib.getMatches(query);
340     for (std::vector<unsigned int>::const_iterator matchIndex = results.begin();
341          matchIndex != results.end(); ++matchIndex) {
342       boost::shared_ptr<ROMol> match = lib.getMol(*matchIndex);
343     }
344   }
345 
346   {
347     boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder =
348         boost::make_shared<CachedTrustedSmilesMolHolder>();
349     boost::shared_ptr<PatternHolder> patternHolder =
350         boost::make_shared<PatternHolder>();
351 
352     SubstructLibrary lib(molHolder, patternHolder);
353     lib.addMol(mol);
354   }
355 
356   {
357     boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder =
358         boost::make_shared<CachedTrustedSmilesMolHolder>();
359     boost::shared_ptr<PatternHolder> patternHolder =
360         boost::make_shared<PatternHolder>();
361 
362     // the PatternHolder instance is able to make fingerprints.
363     //  These, of course, can be read from a file.  For demonstration
364     //   purposes we construct them here.
365     const std::string trustedSmiles = "c1ccccc1";
366     ROMol *m = SmilesToMol(trustedSmiles);
367     const ExplicitBitVect *bitVector = patternHolder->makeFingerprint(*m);
368 
369     // The trusted smiles and bitVector can be read from any source.
370     //  This is the fastest way to load a substruct library.
371     molHolder->addSmiles(trustedSmiles);
372     patternHolder->addFingerprint(*bitVector);
373     SubstructLibrary lib(molHolder, patternHolder);
374     delete m;
375     delete bitVector;
376   }
377 
378   delete q;
379   delete m;
380   BOOST_LOG(rdErrorLog) << "    Done (C++ doc tests)" << std::endl;
381 }
382 
383 template <class Holder>
ringTest(const std::string & name)384 void ringTest(const std::string &name) {
385   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
386   BOOST_LOG(rdErrorLog) << "    Testing C++ ring query: " << name << std::endl;
387 
388   std::unique_ptr<ROMol> q(SmartsToMol("[C&R1]"));
389   std::unique_ptr<ROMol> q2(SmartsToMol("C@C"));
390 
391   std::unique_ptr<ROMol> m(SmilesToMol("C1CCO[C@@](N)(O)1"));
392 
393   boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder =
394       boost::make_shared<CachedTrustedSmilesMolHolder>();
395   boost::shared_ptr<Holder> patternHolder = boost::make_shared<Holder>();
396 
397   SubstructLibrary lib(molHolder, patternHolder);
398   lib.addMol(*m.get());
399   std::vector<unsigned int> results = lib.getMatches(*q.get());
400   TEST_ASSERT(results.size() == 1);
401   results = lib.getMatches(*q2.get());
402   TEST_ASSERT(results.size() == 1);
403 
404   BOOST_LOG(rdErrorLog) << "    Done (C++ ring query tests)" << std::endl;
405 }
406 
testAddPatterns()407 void testAddPatterns() {
408   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
409   BOOST_LOG(rdErrorLog) << "   Add Patterns " << std::endl;
410   std::vector<std::string> pdb_ligands = {
411       "CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
412       "COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
413       "COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
414       "COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
415       "COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
416       "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
417       "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
418       "CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
419       "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
420       "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
421       "COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
422       "CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
423       "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
424       "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
425       "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
426       "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
427       "Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
428       "COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
429       "COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
430       "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
431       "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
432       "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
433       "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
434       "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
435       "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
436       "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
437       "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
438       "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21",
439       "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"};
440 
441   boost::shared_ptr<CachedSmilesMolHolder> holder =
442       boost::make_shared<CachedSmilesMolHolder>();
443 
444   for (auto s : pdb_ligands) {
445     holder->addSmiles(s);
446   }
447 
448   SubstructLibrary ssslib(holder);
449   std::vector<int> num_threads = {1, 0};
450   for (auto nthreads : num_threads) {
451     SubstructLibrary ssslib_with_patterns(holder);
452     SubstructLibrary ssslib_with_taut_patterns(holder);
453     addPatterns(ssslib_with_patterns, nthreads);
454     boost::shared_ptr<TautomerPatternHolder> patterns(
455         new TautomerPatternHolder);
456     addPatterns(ssslib_with_taut_patterns, patterns, nthreads);
457     for (unsigned int i = 0; i < ssslib.size(); ++i) {
458       TEST_ASSERT(ssslib.countMatches(*ssslib.getMol(i).get()) ==
459                   ssslib_with_patterns.countMatches(*ssslib.getMol(i).get()));
460       TEST_ASSERT(
461           ssslib.countMatches(*ssslib.getMol(i).get()) ==
462           ssslib_with_taut_patterns.countMatches(*ssslib.getMol(i).get()));
463     }
464   }
465 }
466 
testMaxResultsNumThreads()467 void testMaxResultsNumThreads() {
468   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
469   BOOST_LOG(rdErrorLog) << "   Results do not depend on numThreads "
470                         << std::endl;
471 
472   std::string fName = getenv("RDBASE");
473   fName += "/Data/NCI/first_5K.smi";
474   SmilesMolSupplier suppl(fName, "\t", 0, 1, false);
475   auto *mols = new MolHolder();
476   auto *fps = new PatternHolder();
477   boost::shared_ptr<MolHolder> mols_ptr(mols);
478   boost::shared_ptr<PatternHolder> fps_ptr(fps);
479 
480   SubstructLibrary ssslib(mols_ptr, fps_ptr);
481   boost::logging::disable_logs("rdApp.error");
482   while (!suppl.atEnd()) {
483     ROMol *mol = nullptr;
484     try {
485       mol = suppl.next();
486     } catch (...) {
487       continue;
488     }
489     if (!mol) {
490       continue;
491     }
492     ssslib.addMol(*mol);
493     delete mol;
494   }
495   boost::logging::enable_logs("rdApp.error");
496   std::vector<std::vector<unsigned int>> resVect;
497   ROMOL_SPTR query(SmartsToMol("N"));
498   TEST_ASSERT(query);
499   for (auto numThreads : {1, 2, 4, 8}) {
500     resVect.emplace_back(
501         ssslib.getMatches(*query, true, false, false, numThreads));
502   }
503   for (auto it = resVect.begin() + 1; it != resVect.end(); ++it) {
504     TEST_ASSERT(resVect.front().size() == it->size());
505     for (size_t i = 0; i < resVect.front().size(); ++i) {
506       TEST_ASSERT(resVect.front().at(i) == it->at(i));
507     }
508   }
509   size_t results60 = resVect.front().size() * 0.6;
510   size_t results99 = resVect.front().size() * 0.99;
511   for (auto maxRes : {results60, results99}) {
512     std::vector<std::vector<unsigned int>> resVectPartial;
513     for (auto numThreads : {1, 2, 4, 8}) {
514       resVectPartial.emplace_back(
515           ssslib.getMatches(*query, true, false, false, numThreads, maxRes));
516     }
517     for (auto it = resVectPartial.begin(); it != resVectPartial.end(); ++it) {
518       TEST_ASSERT(it->size() == maxRes);
519       for (size_t i = 0; i < maxRes; ++i) {
520         TEST_ASSERT(resVect.front().at(i) == it->at(i));
521       }
522     }
523   }
524 }
525 
testMaxResultsAllSameNumThreads()526 void testMaxResultsAllSameNumThreads() {
527   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
528   BOOST_LOG(rdErrorLog) << "   Results do not depend on numThreads (all same) "
529                         << std::endl;
530 
531   auto *mols = new MolHolder();
532   auto *fps = new PatternHolder();
533   boost::shared_ptr<MolHolder> mols_ptr(mols);
534   boost::shared_ptr<PatternHolder> fps_ptr(fps);
535 
536   SubstructLibrary ssslib(mols_ptr, fps_ptr);
537   boost::logging::disable_logs("rdApp.error");
538   auto mol = "N"_smiles;
539   for (int i = 0; i < 999; ++i) {
540     ssslib.addMol(*mol);
541   }
542 
543   boost::logging::enable_logs("rdApp.error");
544   std::vector<std::vector<unsigned int>> resVect;
545   ROMOL_SPTR query(SmartsToMol("N"));
546   TEST_ASSERT(query);
547   for (auto numThreads : {1, 2, 4, 8}) {
548     resVect.emplace_back(
549         ssslib.getMatches(*query, true, false, false, numThreads));
550     TEST_ASSERT(resVect.back().size() == 999);
551   }
552   for (auto it = resVect.begin() + 1; it != resVect.end(); ++it) {
553     TEST_ASSERT(resVect.front().size() == it->size());
554     for (size_t i = 0; i < resVect.front().size(); ++i) {
555       TEST_ASSERT(resVect.front().at(i) == it->at(i));
556     }
557   }
558   size_t results60 = resVect.front().size() * 0.6;
559   size_t results99 = resVect.front().size() * 0.99;
560   for (auto maxRes : {results60, results99}) {
561     std::vector<std::vector<unsigned int>> resVectPartial;
562     for (auto numThreads : {1, 2, 4, 8}) {
563       resVectPartial.emplace_back(
564           ssslib.getMatches(*query, true, false, false, numThreads, maxRes));
565     }
566     for (auto it = resVectPartial.begin(); it != resVectPartial.end(); ++it) {
567       TEST_ASSERT(it->size() == maxRes);
568       for (size_t i = 0; i < maxRes; ++i) {
569         TEST_ASSERT(resVect.front().at(i) == it->at(i));
570       }
571     }
572   }
573 }
574 
575 template <class Holder>
testPatternHolder(const std::string & name)576 void testPatternHolder(const std::string &name) {
577   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
578   BOOST_LOG(rdErrorLog) << "   testing " << name << std::endl;
579 
580   std::string fName = getenv("RDBASE");
581   fName += "/Data/NCI/first_5K.smi";
582   SmilesMolSupplier suppl(fName, "\t", 0, 1, false);
583   boost::shared_ptr<CachedTrustedSmilesMolHolder> mols1(
584       new CachedTrustedSmilesMolHolder());
585   boost::shared_ptr<Holder> fps1(new Holder());
586   SubstructLibrary ssslib1(mols1, fps1);
587   boost::shared_ptr<CachedTrustedSmilesMolHolder> mols2(
588       new CachedTrustedSmilesMolHolder());
589   boost::shared_ptr<Holder> fps2(new Holder());
590   SubstructLibrary ssslib2(mols2, fps2);
591 
592   boost::logging::disable_logs("rdApp.error");
593   for (unsigned int i = 0; i < 1000; i += 10) {
594     ROMol *mol = nullptr;
595     try {
596       mol = suppl[i];
597     } catch (...) {
598       continue;
599     }
600     if (!mol) {
601       continue;
602     }
603     mols1->addSmiles(MolToSmiles(*mol));
604     fps1->addFingerprint(fps1->makeFingerprint(*mol));
605     ssslib2.addMol(*mol);
606     delete mol;
607   }
608   boost::logging::enable_logs("rdApp.error");
609   ROMOL_SPTR query(SmartsToMol("N"));
610   TEST_ASSERT(query);
611   {
612     auto matches1 = ssslib1.getMatches(*query);
613     std::sort(matches1.begin(), matches1.end());
614     auto matches2 = ssslib2.getMatches(*query);
615     std::sort(matches2.begin(), matches2.end());
616     TEST_ASSERT(matches1.size() == matches2.size());
617     for (size_t i = 0; i < matches1.size(); ++i) {
618       TEST_ASSERT(matches1.at(i) == matches2.at(i));
619     }
620   }
621 #ifdef RDK_USE_BOOST_SERIALIZATION
622   std::string pickle = ssslib1.Serialize();
623   SubstructLibrary serialized;
624   serialized.initFromString(pickle);
625   TEST_ASSERT(serialized.size() == ssslib1.size());
626   SubstructLibrary serializedLegacy;
627   std::string pklName = getenv("RDBASE");
628   TEST_ASSERT(!pklName.empty());
629   pklName += "/Code/GraphMol/test_data/substructLibV1.pkl";
630   std::ifstream pickle_istream(pklName.c_str(), std::ios_base::binary);
631   serializedLegacy.initFromStream(pickle_istream);
632   pickle_istream.close();
633   TEST_ASSERT(serializedLegacy.size() == serialized.size());
634   {
635     auto matches1 = serializedLegacy.getMatches(*query);
636     std::sort(matches1.begin(), matches1.end());
637     auto matches2 = serialized.getMatches(*query);
638     std::sort(matches2.begin(), matches2.end());
639     TEST_ASSERT(matches1.size() == matches2.size());
640     for (size_t i = 0; i < matches1.size(); ++i) {
641       TEST_ASSERT(matches1.at(i) == matches2.at(i));
642     }
643   }
644   for (size_t i = 0; i < 2; ++i) {
645     auto serialized_pattern_holder =
646         dynamic_cast<Holder *>(serialized.getFpHolder().get());
647     TEST_ASSERT(serialized_pattern_holder);
648     auto orig_pattern_holder =
649         dynamic_cast<Holder *>(ssslib1.getFpHolder().get());
650     TEST_ASSERT(orig_pattern_holder);
651     TEST_ASSERT(serialized_pattern_holder->getNumBits() ==
652                 orig_pattern_holder->getNumBits());
653     if (i) {
654       break;
655     }
656     orig_pattern_holder->getNumBits() = 1024;
657     pickle = ssslib1.Serialize();
658     serialized.initFromString(pickle);
659   }
660 #endif
661 }
662 
testSegFaultInHolder()663 void testSegFaultInHolder() {
664   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
665   BOOST_LOG(rdErrorLog) << "   testSegFaultInHolder" << std::endl;
666 
667   boost::shared_ptr<CachedTrustedSmilesMolHolder> mols1(
668       new CachedTrustedSmilesMolHolder());
669   boost::shared_ptr<CachedSmilesMolHolder> mols2(new CachedSmilesMolHolder());
670   for (int i = 0; i < 100; ++i) {
671     if (i % 2 == 0) {
672       mols1->addSmiles("dsafsdf");
673       mols2->addSmiles("dsafsdf");
674     } else {
675       mols1->addSmiles("c1ccccc1");
676       mols2->addSmiles("c1ccccc1");
677     }
678   }
679   SubstructLibrary sss(mols1);
680   SubstructLibrary sss2(mols2);
681   ROMOL_SPTR query(SmartsToMol("c1ccccc1"));
682   auto matches1 = sss.getMatches(*query);
683   TEST_ASSERT(matches1.size() == 50);
684   matches1 = sss2.getMatches(*query);
685   TEST_ASSERT(matches1.size() == 50);
686 
687   // Check that we don't segfault when adding patterns
688   addPatterns(sss, 2);
689   addPatterns(sss2, 2);
690 }
691 
testTautomerQueries()692 void testTautomerQueries() {
693   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
694   BOOST_LOG(rdErrorLog) << "   testTautomerQueries" << std::endl;
695 
696   boost::shared_ptr<CachedTrustedSmilesMolHolder> mols1(
697       new CachedTrustedSmilesMolHolder());
698   mols1->addSmiles("CN1C2=C(C(=O)Nc3ccccc3)C(=O)CCN2c2ccccc21");
699   SubstructLibrary sss(mols1);
700   auto query = "Cc1nc2ccccc2[nH]1"_smiles;
701   // auto matches1 = sss.getMatches(*query);
702   // TEST_ASSERT(matches1.size() == 0);
703   std::unique_ptr<TautomerQuery> tq(TautomerQuery::fromMol(*query));
704   auto matches2 = sss.getMatches(*tq);
705   TEST_ASSERT(matches2.size() == 1);
706 
707   SubstructLibrary sss2(sss);
708   addPatterns(sss, boost::make_shared<TautomerPatternHolder>());
709   matches2 = sss.getMatches(*tq);
710   TEST_ASSERT(matches2.size() == 1);
711 
712   // should work but throw logging errors
713   addPatterns(sss2);
714   matches2 = sss2.getMatches(*tq);
715   TEST_ASSERT(matches2.size() == 1);
716 }
717 
github3881()718 void github3881() {
719   BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
720   BOOST_LOG(rdErrorLog) << "  github3881 recursive smarts with rings "
721                         << std::endl;
722   boost::shared_ptr<CachedTrustedSmilesMolHolder> mols(
723       new CachedTrustedSmilesMolHolder());
724   mols->addSmiles("c1ccccc1S(=O)(=O)Cl");
725   SubstructLibrary sss(mols);
726   auto pat = "[$(S-!@[#6]):2](=O)(=O)(Cl)"_smarts;
727   TEST_ASSERT(sss.getMatches(*pat).size() == 1);
728 }
729 
main()730 int main() {
731   RDLog::InitLogs();
732 #if 1
733   test1();
734   test2();
735   test3();
736   test4();
737   docTest();
738   ringTest<PatternHolder>("PatternHolder");
739   ringTest<TautomerPatternHolder>("TautomerPatternHolder");
740   testAddPatterns();
741   testPatternHolder<PatternHolder>("PatternHolder");
742   testPatternHolder<TautomerPatternHolder>("TautomerPatternHolder");
743   testSegFaultInHolder();
744 #ifdef RDK_TEST_MULTITHREADED
745   testMaxResultsNumThreads();
746   testMaxResultsAllSameNumThreads();
747   testTautomerQueries();
748 #endif
749   github3881();
750 #endif
751   return 0;
752 }
753