1 // Copyright (c) 2017-2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31
32 // std bits
33 #include <RDGeneral/test.h>
34 #include <iostream>
35
36 // RD bits
37 #include <GraphMol/RDKitBase.h>
38 #include <GraphMol/RDKitQueries.h>
39 #include <GraphMol/SubstructLibrary/SubstructLibrary.h>
40 #include <GraphMol/SubstructLibrary/PatternFactory.h>
41
42 #include <GraphMol/Substruct/SubstructMatch.h>
43
44 #include <GraphMol/SmilesParse/SmilesParse.h>
45 #include <GraphMol/SmilesParse/SmilesWrite.h>
46 #include <GraphMol/FileParsers/FileParsers.h>
47 #include <GraphMol/FileParsers/MolSupplier.h>
48
49 using namespace RDKit;
50
51 namespace {
runTest(SubstructLibrary & ssslib,const ROMol & pattern,int nThreads)52 boost::dynamic_bitset<> runTest(SubstructLibrary &ssslib, const ROMol &pattern,
53 int nThreads) {
54 std::vector<unsigned int> libMatches = ssslib.getMatches(pattern, nThreads);
55 boost::dynamic_bitset<> hasMatch(ssslib.size());
56 for (auto idx : libMatches) {
57 hasMatch[idx] = 1;
58 }
59
60 for (unsigned int i = 0; i < ssslib.size(); ++i) {
61 MatchVectType match;
62 bool matched = SubstructMatch(*ssslib.getMol(i), pattern, match);
63 // std::cerr << MolToSmiles(*ssslib.getMol(i), true) << " " << hasMatch[i]
64 // << " " << matched << std::endl;
65 TEST_ASSERT(hasMatch[i] == matched);
66 }
67 return hasMatch;
68 };
69
runTest(SubstructLibrary & ssslib,const ROMol & pattern,int nThreads,const boost::dynamic_bitset<> & hasMatch)70 void runTest(SubstructLibrary &ssslib, const ROMol &pattern, int nThreads,
71 const boost::dynamic_bitset<> &hasMatch) {
72 std::vector<unsigned int> libMatches = ssslib.getMatches(pattern, nThreads);
73 boost::dynamic_bitset<> hasMatch2(ssslib.size());
74 for (auto idx : libMatches) {
75 hasMatch2[idx] = 1;
76 }
77 TEST_ASSERT(hasMatch == hasMatch2);
78
79 for (unsigned int i = 0; i < ssslib.size(); ++i) {
80 MatchVectType match;
81 bool matched = SubstructMatch(*ssslib.getMol(i), pattern, match);
82 // std::cerr << MolToSmiles(*ssslib.getMol(i), true) << " " << hasMatch[i]
83 // << " " << matched << std::endl;
84 TEST_ASSERT(hasMatch[i] == matched);
85 }
86 };
87
88 } // namespace
89
test1()90 void test1() {
91 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
92 BOOST_LOG(rdErrorLog) << " Test1" << std::endl;
93
94 std::string fName = getenv("RDBASE");
95 fName += "/Data/NCI/first_200.props.sdf";
96 SDMolSupplier suppl(fName);
97 SubstructLibrary ssslib;
98 while (!suppl.atEnd()) {
99 ROMol *mol = nullptr;
100 try {
101 mol = suppl.next();
102 } catch (...) {
103 continue;
104 }
105 if (!mol) {
106 continue;
107 }
108 ssslib.addMol(*mol);
109 delete mol;
110 }
111
112 std::vector<SubstructLibrary *> libs;
113 libs.push_back(&ssslib);
114
115 #ifdef RDK_USE_BOOST_SERIALIZATION
116 std::string pickle = ssslib.Serialize();
117 SubstructLibrary serialized;
118 serialized.initFromString(pickle);
119 TEST_ASSERT(serialized.size() == ssslib.size());
120 libs.push_back(&serialized);
121 #endif
122
123 boost::dynamic_bitset<> hasMatch;
124
125 int i = 0;
126 for (auto lib : libs) {
127 ROMol *query = SmartsToMol("[#6;$([#6]([#6])[!#6])]");
128 if (i == 0) {
129 hasMatch = runTest(*lib, *query, 1);
130 } else {
131 runTest(*lib, *query, 1, hasMatch);
132 }
133
134 #ifdef RDK_TEST_MULTITHREADED
135 runTest(*lib, *query, -1, hasMatch);
136 #endif
137 delete query;
138 ++i;
139 }
140
141 i = 0;
142 for (auto lib : libs) {
143 ROMol *query = SmartsToMol("[$([O,S]-[!$(*=O)])]");
144 if (i == 0) {
145 hasMatch = runTest(*lib, *query, 1);
146 } else {
147 runTest(*lib, *query, 1, hasMatch);
148 }
149
150 #ifdef RDK_TEST_MULTITHREADED
151 runTest(*lib, *query, -1, hasMatch);
152 #endif
153 delete query;
154 ++i;
155 }
156
157 BOOST_LOG(rdErrorLog) << " done" << std::endl;
158 }
159
test2()160 void test2() {
161 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
162 BOOST_LOG(rdErrorLog) << " Test2" << std::endl;
163
164 std::string fName = getenv("RDBASE");
165 fName += "/Data/NCI/first_200.props.sdf";
166 SDMolSupplier suppl(fName);
167 auto *mols = new MolHolder();
168 auto *fps = new PatternHolder();
169 boost::shared_ptr<MolHolder> mols_ptr(mols);
170 boost::shared_ptr<PatternHolder> fps_ptr(fps);
171
172 SubstructLibrary ssslib(mols_ptr, fps_ptr);
173 while (!suppl.atEnd()) {
174 ROMol *mol = nullptr;
175 try {
176 mol = suppl.next();
177 } catch (...) {
178 continue;
179 }
180 if (!mol) {
181 continue;
182 }
183 ssslib.addMol(*mol);
184 delete mol;
185 }
186
187 std::vector<SubstructLibrary *> libs;
188 libs.push_back(&ssslib);
189
190 #ifdef RDK_USE_BOOST_SERIALIZATION
191 std::string pickle = ssslib.Serialize();
192 SubstructLibrary serialized;
193 serialized.initFromString(pickle);
194 TEST_ASSERT(serialized.size() == ssslib.size());
195
196 // check to see if we are still the right base type
197 MolHolderBase *_holder = serialized.getMolHolder().get();
198 TEST_ASSERT(_holder != nullptr);
199 TEST_ASSERT(dynamic_cast<MolHolder *>(_holder) != nullptr);
200 try {
201 serialized.getFingerprints();
202 } catch (...) {
203 TEST_ASSERT(0);
204 }
205
206 libs.push_back(&serialized);
207 #endif
208
209 for (auto lib : libs) {
210 ROMol *query = SmartsToMol("[#6]([#6])[!#6]");
211 runTest(*lib, *query, 1);
212 #ifdef RDK_TEST_MULTITHREADED
213 runTest(*lib, *query, -1);
214 #endif
215 delete query;
216 }
217
218 BOOST_LOG(rdErrorLog) << " done" << std::endl;
219 }
220
test3()221 void test3() {
222 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
223 BOOST_LOG(rdErrorLog) << " Test3 (stereo options)" << std::endl;
224
225 SubstructLibrary ssslib(boost::make_shared<MolHolder>());
226 for (int i = 0; i < 10; ++i) {
227 ROMol *m1 = SmilesToMol("C1CCO[C@@](N)(O)1");
228 ROMol *m2 = SmilesToMol("C1CCO[C@](N)(O)1");
229 ROMol *m3 = SmilesToMol("C1CCO[C@@](O)(N)1");
230 ROMol *m4 = SmilesToMol("C1CCO[C@](O)(N)1");
231 ssslib.addMol(*m1);
232 ssslib.addMol(*m2);
233 ssslib.addMol(*m3);
234 ssslib.addMol(*m4);
235 delete m1;
236 delete m2;
237 delete m3;
238 delete m4;
239 }
240
241 std::vector<SubstructLibrary *> libs;
242 libs.push_back(&ssslib);
243
244 #ifdef RDK_USE_BOOST_SERIALIZATION
245 std::string pickle = ssslib.Serialize();
246 SubstructLibrary serialized;
247 serialized.initFromString(pickle);
248 TEST_ASSERT(serialized.size() == ssslib.size());
249 libs.push_back(&serialized);
250 // check to see if we are still the right base type
251 MolHolderBase *_holder = serialized.getMolHolder().get();
252 TEST_ASSERT(_holder != nullptr);
253 TEST_ASSERT(dynamic_cast<MolHolder *>(_holder) != nullptr);
254 #endif
255
256 for (auto lib : libs) {
257 ROMol *query = SmartsToMol("C-1-C-C-O-C(-[O])(-[N])1");
258 std::vector<unsigned int> res = lib->getMatches(*query, true, false);
259 TEST_ASSERT(res.size() == 40);
260
261 delete query;
262 query = SmartsToMol("C-1-C-C-O-[C@@](-[O])(-[N])1");
263
264 res = lib->getMatches(*query, true, true);
265 TEST_ASSERT(res.size() == 20);
266
267 res = lib->getMatches(*query, true, false);
268 TEST_ASSERT(res.size() == 40);
269
270 delete query;
271 }
272 BOOST_LOG(rdErrorLog) << " Done (stereo options)" << std::endl;
273 }
274
test4()275 void test4() {
276 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
277 BOOST_LOG(rdErrorLog) << " Test4 (trusted smiles)" << std::endl;
278
279 boost::shared_ptr<CachedSmilesMolHolder> holder =
280 boost::make_shared<CachedSmilesMolHolder>();
281 SubstructLibrary ssslib(holder);
282
283 for (int i = 0; i < 10; ++i) {
284 holder->addSmiles("C1CCO[C@@](N)(O)1");
285 holder->addSmiles("C1CCO[C@](N)(O)1");
286 holder->addSmiles("C1CCO[C@@](O)(N)1");
287 holder->addSmiles("C1CCO[C@](O)(N)1");
288 }
289
290 std::vector<SubstructLibrary *> libs;
291 libs.push_back(&ssslib);
292
293 #ifdef RDK_USE_BOOST_SERIALIZATION
294 std::string pickle = ssslib.Serialize();
295 SubstructLibrary serialized;
296 serialized.initFromString(pickle);
297 TEST_ASSERT(serialized.size() == ssslib.size());
298 libs.push_back(&serialized);
299 // check to see if we are still the right base type
300 MolHolderBase *_holder = serialized.getMolHolder().get();
301 TEST_ASSERT(_holder != nullptr);
302 TEST_ASSERT(dynamic_cast<CachedSmilesMolHolder *>(_holder) != nullptr);
303 #endif
304
305 for (auto lib : libs) {
306 ROMol *query = SmartsToMol("C-1-C-C-O-C(-[O])(-[N])1");
307
308 std::vector<unsigned int> res = lib->getMatches(*query, true, false);
309 TEST_ASSERT(res.size() == 40);
310
311 delete query;
312 query = SmartsToMol("C-1-C-C-O-[C@@](-[O])(-[N])1");
313
314 res = lib->getMatches(*query, true, true);
315 TEST_ASSERT(res.size() == 20);
316
317 res = lib->getMatches(*query, true, false);
318 TEST_ASSERT(res.size() == 40);
319 delete query;
320 }
321
322 BOOST_LOG(rdErrorLog) << " Done (trusted smiles)" << std::endl;
323 }
324
325 /// Tests the code in the docs
326 // to make sure it compiles.
docTest()327 void docTest() {
328 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
329 BOOST_LOG(rdErrorLog) << " Testing C++ docs" << std::endl;
330
331 ROMol *q = SmartsToMol("C-1-C-C-O-C(-[O])(-[N])1");
332 ROMol *m = SmilesToMol("C1CCO[C@@](N)(O)1");
333 ROMol &query = *q;
334 ROMol &mol = *m;
335
336 {
337 SubstructLibrary lib;
338 lib.addMol(mol);
339 std::vector<unsigned int> results = lib.getMatches(query);
340 for (std::vector<unsigned int>::const_iterator matchIndex = results.begin();
341 matchIndex != results.end(); ++matchIndex) {
342 boost::shared_ptr<ROMol> match = lib.getMol(*matchIndex);
343 }
344 }
345
346 {
347 boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder =
348 boost::make_shared<CachedTrustedSmilesMolHolder>();
349 boost::shared_ptr<PatternHolder> patternHolder =
350 boost::make_shared<PatternHolder>();
351
352 SubstructLibrary lib(molHolder, patternHolder);
353 lib.addMol(mol);
354 }
355
356 {
357 boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder =
358 boost::make_shared<CachedTrustedSmilesMolHolder>();
359 boost::shared_ptr<PatternHolder> patternHolder =
360 boost::make_shared<PatternHolder>();
361
362 // the PatternHolder instance is able to make fingerprints.
363 // These, of course, can be read from a file. For demonstration
364 // purposes we construct them here.
365 const std::string trustedSmiles = "c1ccccc1";
366 ROMol *m = SmilesToMol(trustedSmiles);
367 const ExplicitBitVect *bitVector = patternHolder->makeFingerprint(*m);
368
369 // The trusted smiles and bitVector can be read from any source.
370 // This is the fastest way to load a substruct library.
371 molHolder->addSmiles(trustedSmiles);
372 patternHolder->addFingerprint(*bitVector);
373 SubstructLibrary lib(molHolder, patternHolder);
374 delete m;
375 delete bitVector;
376 }
377
378 delete q;
379 delete m;
380 BOOST_LOG(rdErrorLog) << " Done (C++ doc tests)" << std::endl;
381 }
382
383 template <class Holder>
ringTest(const std::string & name)384 void ringTest(const std::string &name) {
385 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
386 BOOST_LOG(rdErrorLog) << " Testing C++ ring query: " << name << std::endl;
387
388 std::unique_ptr<ROMol> q(SmartsToMol("[C&R1]"));
389 std::unique_ptr<ROMol> q2(SmartsToMol("C@C"));
390
391 std::unique_ptr<ROMol> m(SmilesToMol("C1CCO[C@@](N)(O)1"));
392
393 boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder =
394 boost::make_shared<CachedTrustedSmilesMolHolder>();
395 boost::shared_ptr<Holder> patternHolder = boost::make_shared<Holder>();
396
397 SubstructLibrary lib(molHolder, patternHolder);
398 lib.addMol(*m.get());
399 std::vector<unsigned int> results = lib.getMatches(*q.get());
400 TEST_ASSERT(results.size() == 1);
401 results = lib.getMatches(*q2.get());
402 TEST_ASSERT(results.size() == 1);
403
404 BOOST_LOG(rdErrorLog) << " Done (C++ ring query tests)" << std::endl;
405 }
406
testAddPatterns()407 void testAddPatterns() {
408 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
409 BOOST_LOG(rdErrorLog) << " Add Patterns " << std::endl;
410 std::vector<std::string> pdb_ligands = {
411 "CCS(=O)(=O)c1ccc(OC)c(Nc2ncc(-c3cccc(-c4ccccn4)c3)o2)c1",
412 "COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1",
413 "COc1ccc(-c2oc3ncnc(N)c3c2-c2ccc(NC(=O)Nc3cc(C(F)(F)F)ccc3F)cc2)cc1",
414 "COC(=O)Nc1nc2ccc(Oc3ccc(NC(=O)Nc4cc(C(F)(F)F)ccc4F)cc3)cc2[nH]1",
415 "COc1cc(Nc2ncnc(-c3cccnc3Nc3ccccc3)n2)cc(OC)c1OC",
416 "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
417 "O=C(Nc1ccc(Oc2ccccc2)cc1)c1cccnc1NCc1ccncc1",
418 "CNC(=O)c1cc(Oc2ccc3[nH]c(Nc4ccc(Cl)c(C(F)(F)F)c4)nc3c2)ccn1",
419 "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
420 "CNC(=O)c1cc(Oc2ccc3oc(Nc4ccc(Cl)c(OCC5CCC[NH+]5C)c4)nc3c2)ccn1",
421 "COc1cc2nccc(Oc3ccc4c(c3)OCCN4C(=O)Nc3ccc(Cl)cc3)c2cc1OC",
422 "CNC(=O)c1c(C)oc2cc(Oc3cc[nH+]c4cc(OCCN5CCOCC5)ccc34)ccc12",
423 "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
424 "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)Nc5ccc(Cl)cc5)cccc4c3)c2cc1OC",
425 "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
426 "COc1cc2[nH+]ccc(Oc3ccc4c(C(=O)NC5CC5)cccc4c3)c2cc1OC",
427 "Cc1ccc(C(=O)Nc2cc(CCC[NH+](C)C)cc(C(F)(F)F)c2)cc1Nc1ncccc1-c1ccncn1",
428 "COc1cc(Nc2nccc(Nc3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
429 "COc1cc(Nc2nccc(N(C)c3ccc4c(C)n[nH]c4c3)n2)cc(OC)c1OC",
430 "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
431 "Cc1ccn(-c2ccc3c(c2)NCC3(C)C)c(=O)c1-c1ccc2nc(N)ncc2c1",
432 "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
433 "Cc1ccc(C(=O)NCCC2CCCC2)cc1C(=O)Nc1ccc(N)nc1",
434 "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
435 "Cc1ccn(-c2cccc(C(F)(F)F)c2)c(=O)c1-c1ccc2nc(N)ncc2c1",
436 "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
437 "O=C(Nc1cncnc1)c1c(Cl)ccc2c(Nc3cccc(C(F)(F)F)c3)noc12",
438 "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21",
439 "CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21"};
440
441 boost::shared_ptr<CachedSmilesMolHolder> holder =
442 boost::make_shared<CachedSmilesMolHolder>();
443
444 for (auto s : pdb_ligands) {
445 holder->addSmiles(s);
446 }
447
448 SubstructLibrary ssslib(holder);
449 std::vector<int> num_threads = {1, 0};
450 for (auto nthreads : num_threads) {
451 SubstructLibrary ssslib_with_patterns(holder);
452 SubstructLibrary ssslib_with_taut_patterns(holder);
453 addPatterns(ssslib_with_patterns, nthreads);
454 boost::shared_ptr<TautomerPatternHolder> patterns(
455 new TautomerPatternHolder);
456 addPatterns(ssslib_with_taut_patterns, patterns, nthreads);
457 for (unsigned int i = 0; i < ssslib.size(); ++i) {
458 TEST_ASSERT(ssslib.countMatches(*ssslib.getMol(i).get()) ==
459 ssslib_with_patterns.countMatches(*ssslib.getMol(i).get()));
460 TEST_ASSERT(
461 ssslib.countMatches(*ssslib.getMol(i).get()) ==
462 ssslib_with_taut_patterns.countMatches(*ssslib.getMol(i).get()));
463 }
464 }
465 }
466
testMaxResultsNumThreads()467 void testMaxResultsNumThreads() {
468 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
469 BOOST_LOG(rdErrorLog) << " Results do not depend on numThreads "
470 << std::endl;
471
472 std::string fName = getenv("RDBASE");
473 fName += "/Data/NCI/first_5K.smi";
474 SmilesMolSupplier suppl(fName, "\t", 0, 1, false);
475 auto *mols = new MolHolder();
476 auto *fps = new PatternHolder();
477 boost::shared_ptr<MolHolder> mols_ptr(mols);
478 boost::shared_ptr<PatternHolder> fps_ptr(fps);
479
480 SubstructLibrary ssslib(mols_ptr, fps_ptr);
481 boost::logging::disable_logs("rdApp.error");
482 while (!suppl.atEnd()) {
483 ROMol *mol = nullptr;
484 try {
485 mol = suppl.next();
486 } catch (...) {
487 continue;
488 }
489 if (!mol) {
490 continue;
491 }
492 ssslib.addMol(*mol);
493 delete mol;
494 }
495 boost::logging::enable_logs("rdApp.error");
496 std::vector<std::vector<unsigned int>> resVect;
497 ROMOL_SPTR query(SmartsToMol("N"));
498 TEST_ASSERT(query);
499 for (auto numThreads : {1, 2, 4, 8}) {
500 resVect.emplace_back(
501 ssslib.getMatches(*query, true, false, false, numThreads));
502 }
503 for (auto it = resVect.begin() + 1; it != resVect.end(); ++it) {
504 TEST_ASSERT(resVect.front().size() == it->size());
505 for (size_t i = 0; i < resVect.front().size(); ++i) {
506 TEST_ASSERT(resVect.front().at(i) == it->at(i));
507 }
508 }
509 size_t results60 = resVect.front().size() * 0.6;
510 size_t results99 = resVect.front().size() * 0.99;
511 for (auto maxRes : {results60, results99}) {
512 std::vector<std::vector<unsigned int>> resVectPartial;
513 for (auto numThreads : {1, 2, 4, 8}) {
514 resVectPartial.emplace_back(
515 ssslib.getMatches(*query, true, false, false, numThreads, maxRes));
516 }
517 for (auto it = resVectPartial.begin(); it != resVectPartial.end(); ++it) {
518 TEST_ASSERT(it->size() == maxRes);
519 for (size_t i = 0; i < maxRes; ++i) {
520 TEST_ASSERT(resVect.front().at(i) == it->at(i));
521 }
522 }
523 }
524 }
525
testMaxResultsAllSameNumThreads()526 void testMaxResultsAllSameNumThreads() {
527 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
528 BOOST_LOG(rdErrorLog) << " Results do not depend on numThreads (all same) "
529 << std::endl;
530
531 auto *mols = new MolHolder();
532 auto *fps = new PatternHolder();
533 boost::shared_ptr<MolHolder> mols_ptr(mols);
534 boost::shared_ptr<PatternHolder> fps_ptr(fps);
535
536 SubstructLibrary ssslib(mols_ptr, fps_ptr);
537 boost::logging::disable_logs("rdApp.error");
538 auto mol = "N"_smiles;
539 for (int i = 0; i < 999; ++i) {
540 ssslib.addMol(*mol);
541 }
542
543 boost::logging::enable_logs("rdApp.error");
544 std::vector<std::vector<unsigned int>> resVect;
545 ROMOL_SPTR query(SmartsToMol("N"));
546 TEST_ASSERT(query);
547 for (auto numThreads : {1, 2, 4, 8}) {
548 resVect.emplace_back(
549 ssslib.getMatches(*query, true, false, false, numThreads));
550 TEST_ASSERT(resVect.back().size() == 999);
551 }
552 for (auto it = resVect.begin() + 1; it != resVect.end(); ++it) {
553 TEST_ASSERT(resVect.front().size() == it->size());
554 for (size_t i = 0; i < resVect.front().size(); ++i) {
555 TEST_ASSERT(resVect.front().at(i) == it->at(i));
556 }
557 }
558 size_t results60 = resVect.front().size() * 0.6;
559 size_t results99 = resVect.front().size() * 0.99;
560 for (auto maxRes : {results60, results99}) {
561 std::vector<std::vector<unsigned int>> resVectPartial;
562 for (auto numThreads : {1, 2, 4, 8}) {
563 resVectPartial.emplace_back(
564 ssslib.getMatches(*query, true, false, false, numThreads, maxRes));
565 }
566 for (auto it = resVectPartial.begin(); it != resVectPartial.end(); ++it) {
567 TEST_ASSERT(it->size() == maxRes);
568 for (size_t i = 0; i < maxRes; ++i) {
569 TEST_ASSERT(resVect.front().at(i) == it->at(i));
570 }
571 }
572 }
573 }
574
575 template <class Holder>
testPatternHolder(const std::string & name)576 void testPatternHolder(const std::string &name) {
577 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
578 BOOST_LOG(rdErrorLog) << " testing " << name << std::endl;
579
580 std::string fName = getenv("RDBASE");
581 fName += "/Data/NCI/first_5K.smi";
582 SmilesMolSupplier suppl(fName, "\t", 0, 1, false);
583 boost::shared_ptr<CachedTrustedSmilesMolHolder> mols1(
584 new CachedTrustedSmilesMolHolder());
585 boost::shared_ptr<Holder> fps1(new Holder());
586 SubstructLibrary ssslib1(mols1, fps1);
587 boost::shared_ptr<CachedTrustedSmilesMolHolder> mols2(
588 new CachedTrustedSmilesMolHolder());
589 boost::shared_ptr<Holder> fps2(new Holder());
590 SubstructLibrary ssslib2(mols2, fps2);
591
592 boost::logging::disable_logs("rdApp.error");
593 for (unsigned int i = 0; i < 1000; i += 10) {
594 ROMol *mol = nullptr;
595 try {
596 mol = suppl[i];
597 } catch (...) {
598 continue;
599 }
600 if (!mol) {
601 continue;
602 }
603 mols1->addSmiles(MolToSmiles(*mol));
604 fps1->addFingerprint(fps1->makeFingerprint(*mol));
605 ssslib2.addMol(*mol);
606 delete mol;
607 }
608 boost::logging::enable_logs("rdApp.error");
609 ROMOL_SPTR query(SmartsToMol("N"));
610 TEST_ASSERT(query);
611 {
612 auto matches1 = ssslib1.getMatches(*query);
613 std::sort(matches1.begin(), matches1.end());
614 auto matches2 = ssslib2.getMatches(*query);
615 std::sort(matches2.begin(), matches2.end());
616 TEST_ASSERT(matches1.size() == matches2.size());
617 for (size_t i = 0; i < matches1.size(); ++i) {
618 TEST_ASSERT(matches1.at(i) == matches2.at(i));
619 }
620 }
621 #ifdef RDK_USE_BOOST_SERIALIZATION
622 std::string pickle = ssslib1.Serialize();
623 SubstructLibrary serialized;
624 serialized.initFromString(pickle);
625 TEST_ASSERT(serialized.size() == ssslib1.size());
626 SubstructLibrary serializedLegacy;
627 std::string pklName = getenv("RDBASE");
628 TEST_ASSERT(!pklName.empty());
629 pklName += "/Code/GraphMol/test_data/substructLibV1.pkl";
630 std::ifstream pickle_istream(pklName.c_str(), std::ios_base::binary);
631 serializedLegacy.initFromStream(pickle_istream);
632 pickle_istream.close();
633 TEST_ASSERT(serializedLegacy.size() == serialized.size());
634 {
635 auto matches1 = serializedLegacy.getMatches(*query);
636 std::sort(matches1.begin(), matches1.end());
637 auto matches2 = serialized.getMatches(*query);
638 std::sort(matches2.begin(), matches2.end());
639 TEST_ASSERT(matches1.size() == matches2.size());
640 for (size_t i = 0; i < matches1.size(); ++i) {
641 TEST_ASSERT(matches1.at(i) == matches2.at(i));
642 }
643 }
644 for (size_t i = 0; i < 2; ++i) {
645 auto serialized_pattern_holder =
646 dynamic_cast<Holder *>(serialized.getFpHolder().get());
647 TEST_ASSERT(serialized_pattern_holder);
648 auto orig_pattern_holder =
649 dynamic_cast<Holder *>(ssslib1.getFpHolder().get());
650 TEST_ASSERT(orig_pattern_holder);
651 TEST_ASSERT(serialized_pattern_holder->getNumBits() ==
652 orig_pattern_holder->getNumBits());
653 if (i) {
654 break;
655 }
656 orig_pattern_holder->getNumBits() = 1024;
657 pickle = ssslib1.Serialize();
658 serialized.initFromString(pickle);
659 }
660 #endif
661 }
662
testSegFaultInHolder()663 void testSegFaultInHolder() {
664 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
665 BOOST_LOG(rdErrorLog) << " testSegFaultInHolder" << std::endl;
666
667 boost::shared_ptr<CachedTrustedSmilesMolHolder> mols1(
668 new CachedTrustedSmilesMolHolder());
669 boost::shared_ptr<CachedSmilesMolHolder> mols2(new CachedSmilesMolHolder());
670 for (int i = 0; i < 100; ++i) {
671 if (i % 2 == 0) {
672 mols1->addSmiles("dsafsdf");
673 mols2->addSmiles("dsafsdf");
674 } else {
675 mols1->addSmiles("c1ccccc1");
676 mols2->addSmiles("c1ccccc1");
677 }
678 }
679 SubstructLibrary sss(mols1);
680 SubstructLibrary sss2(mols2);
681 ROMOL_SPTR query(SmartsToMol("c1ccccc1"));
682 auto matches1 = sss.getMatches(*query);
683 TEST_ASSERT(matches1.size() == 50);
684 matches1 = sss2.getMatches(*query);
685 TEST_ASSERT(matches1.size() == 50);
686
687 // Check that we don't segfault when adding patterns
688 addPatterns(sss, 2);
689 addPatterns(sss2, 2);
690 }
691
testTautomerQueries()692 void testTautomerQueries() {
693 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
694 BOOST_LOG(rdErrorLog) << " testTautomerQueries" << std::endl;
695
696 boost::shared_ptr<CachedTrustedSmilesMolHolder> mols1(
697 new CachedTrustedSmilesMolHolder());
698 mols1->addSmiles("CN1C2=C(C(=O)Nc3ccccc3)C(=O)CCN2c2ccccc21");
699 SubstructLibrary sss(mols1);
700 auto query = "Cc1nc2ccccc2[nH]1"_smiles;
701 // auto matches1 = sss.getMatches(*query);
702 // TEST_ASSERT(matches1.size() == 0);
703 std::unique_ptr<TautomerQuery> tq(TautomerQuery::fromMol(*query));
704 auto matches2 = sss.getMatches(*tq);
705 TEST_ASSERT(matches2.size() == 1);
706
707 SubstructLibrary sss2(sss);
708 addPatterns(sss, boost::make_shared<TautomerPatternHolder>());
709 matches2 = sss.getMatches(*tq);
710 TEST_ASSERT(matches2.size() == 1);
711
712 // should work but throw logging errors
713 addPatterns(sss2);
714 matches2 = sss2.getMatches(*tq);
715 TEST_ASSERT(matches2.size() == 1);
716 }
717
github3881()718 void github3881() {
719 BOOST_LOG(rdErrorLog) << "-------------------------------------" << std::endl;
720 BOOST_LOG(rdErrorLog) << " github3881 recursive smarts with rings "
721 << std::endl;
722 boost::shared_ptr<CachedTrustedSmilesMolHolder> mols(
723 new CachedTrustedSmilesMolHolder());
724 mols->addSmiles("c1ccccc1S(=O)(=O)Cl");
725 SubstructLibrary sss(mols);
726 auto pat = "[$(S-!@[#6]):2](=O)(=O)(Cl)"_smarts;
727 TEST_ASSERT(sss.getMatches(*pat).size() == 1);
728 }
729
main()730 int main() {
731 RDLog::InitLogs();
732 #if 1
733 test1();
734 test2();
735 test3();
736 test4();
737 docTest();
738 ringTest<PatternHolder>("PatternHolder");
739 ringTest<TautomerPatternHolder>("TautomerPatternHolder");
740 testAddPatterns();
741 testPatternHolder<PatternHolder>("PatternHolder");
742 testPatternHolder<TautomerPatternHolder>("TautomerPatternHolder");
743 testSegFaultInHolder();
744 #ifdef RDK_TEST_MULTITHREADED
745 testMaxResultsNumThreads();
746 testMaxResultsAllSameNumThreads();
747 testTautomerQueries();
748 #endif
749 github3881();
750 #endif
751 return 0;
752 }
753