1 //  Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 //  All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 //       notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 //       copyright notice, this list of conditions and the following
12 //       disclaimer in the documentation and/or other materials provided
13 //       with the distribution.
14 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 //       nor the names of its contributors may be used to endorse or promote
16 //       products derived from this software without specific prior written
17 //       permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #include <RDGeneral/test.h>
32 #include <RDGeneral/RDLog.h>
33 #include <GraphMol/RDKitBase.h>
34 #include <GraphMol/FileParsers/MolSupplier.h>
35 #include <GraphMol/FilterCatalog/FilterCatalog.h>
36 #include <GraphMol/SmilesParse/SmilesParse.h>
37 #include <fstream>
38 #include <iostream>
39 #include <map>
40 #include <algorithm>
41 
42 using namespace RDKit;
43 using namespace std;
44 
45 struct IntPair {
46   int first;
47   int second;
48 };
49 
50 template <class T>
dump(std::string name,const T & v)51 void dump(std::string name, const T &v) {
52   std::cerr << name << " = { " << std::endl;
53   for (size_t i = 0; i < v.size(); ++i) {
54     std::cerr << "\t" << v[i].first << "," << v[i].second << "}," << std::endl;
55     ;
56   }
57   std::cerr << "}" << std::endl;
58 }
59 
check(MatchVectType v,MatchVectType match)60 bool check(MatchVectType v, MatchVectType match) {
61   dump("v", v);
62   dump("match", match);
63   for (size_t i = 0; i < v.size(); ++i) {
64     if (v[i].first != match[i].first) {
65       return false;
66     }
67     if (v[i].second != match[i].second) {
68       return false;
69     }
70   }
71   return true;
72 }
73 
testFilterCatalog()74 void testFilterCatalog() {
75   BOOST_LOG(rdInfoLog)
76       << "-----------------------\n Testing the filter catalog " << std::endl;
77   {
78     std::string pathName = getenv("RDBASE");
79     pathName += "/Code/GraphMol/test_data/";
80     SmilesMolSupplier suppl(pathName + "pains.smi");
81 
82     FilterCatalogParams params;
83     params.addCatalog(FilterCatalogParams::PAINS_A);
84     params.addCatalog(FilterCatalogParams::PAINS_B);
85     params.addCatalog(FilterCatalogParams::PAINS_C);
86 
87     FilterCatalog catalog(params);
88     boost::scoped_ptr<ROMol> mol;
89     const IntPair match1[10] = {{0, 23}, {1, 22}, {2, 20}, {3, 19}, {4, 25},
90                                 {5, 24}, {6, 18}, {7, 17}, {8, 16}, {9, 21}};
91     MatchVectType matchvec1;
92     for (auto i : match1) {
93       matchvec1.push_back(std::make_pair(i.first, i.second));
94     }
95 
96     const IntPair match2[13] = {{0, 11}, {1, 12},  {2, 13}, {3, 14}, {4, 15},
97                                 {5, 10}, {6, 9},   {7, 8},  {8, 7},  {9, 6},
98                                 {10, 5}, {11, 17}, {12, 16}};
99     MatchVectType matchvec2;
100     for (auto i : match2) {
101       matchvec2.push_back(std::make_pair(i.first, i.second));
102     }
103 
104     const IntPair match3[12] = {{0, 0}, {1, 1},  {2, 2},   {3, 4},
105                                 {4, 5}, {5, 6},  {6, 7},   {7, 8},
106                                 {8, 9}, {9, 14}, {10, 15}, {11, 16}};
107     MatchVectType matchvec3;
108     for (auto i : match3) {
109       matchvec3.push_back(std::make_pair(i.first, i.second));
110     }
111     int count = 0;
112     while (!suppl.atEnd()) {
113       mol.reset(suppl.next());
114       std::string name = mol->getProp<std::string>(common_properties::_Name);
115 
116       TEST_ASSERT(mol.get());
117       if (catalog.hasMatch(*mol)) {
118         std::cerr << "Warning: molecule failed filter " << std::endl;
119       }
120       // More detailed
121       FilterCatalog::CONST_SENTRY entry = catalog.getFirstMatch(*mol);
122       TEST_ASSERT(entry);
123       if (entry) {
124         std::cerr << "Warning: molecule failed filter: reason "
125                   << entry->getDescription() << std::endl;
126         switch (count) {
127           case 0:
128             TEST_ASSERT(entry->getDescription() == "hzone_phenol_A(479)");
129             break;
130           case 1:
131             TEST_ASSERT(entry->getDescription() == "cyano_imine_B(17)");
132             break;
133           case 2:
134             TEST_ASSERT(entry->getDescription() == "keto_keto_gamma(5)");
135             break;
136         }
137         TEST_ASSERT(entry->getDescription() == name);
138 
139         // get the substructure atoms for visualization
140         std::vector<FilterMatch> matches;
141         if (entry->getFilterMatches(*mol, matches)) {
142           for (std::vector<FilterMatch>::const_iterator it = matches.begin();
143                it != matches.end(); ++it) {
144             // Get the FilterMatcherBase that matched
145             const FilterMatch &fm = (*it);
146             boost::shared_ptr<FilterMatcherBase> matchingFilter =
147                 fm.filterMatch;
148 
149             // Get the matching atom indices
150             const MatchVectType &vect = fm.atomPairs;
151             switch (count) {
152               case 0:
153                 TEST_ASSERT(check(vect, matchvec1));
154                 break;
155               case 1:
156                 TEST_ASSERT(check(vect, matchvec2));
157                 break;
158               case 2:
159                 TEST_ASSERT(check(vect, matchvec3));
160                 break;
161             }
162 
163             // do something with these...
164           }
165         }
166       }
167       count++;
168     }  // end while
169   }
170   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
171 }
172 
testFilterCatalogEntry()173 void testFilterCatalogEntry() {
174   SmartsMatcher *sm = new SmartsMatcher("Aromatic carbon chain");
175   boost::shared_ptr<FilterMatcherBase> matcher(sm);
176   TEST_ASSERT(!matcher->isValid());
177   const int debugParse = 0;
178   const bool mergeHs = true;
179   ROMOL_SPTR pattern(SmartsToMol("c:c:c:c:c", debugParse, mergeHs));
180   TEST_ASSERT(pattern.get() != nullptr);
181   sm->setPattern(pattern);
182   sm->setMinCount(1);
183   FilterCatalogEntry entry("Bar", matcher);
184   TEST_ASSERT(entry.getDescription() == "Bar");
185   TEST_ASSERT(sm->getMinCount() == 1);
186   TEST_ASSERT(sm->getMaxCount() == (unsigned int)-1);
187 
188   entry.setDescription("Foo");
189   TEST_ASSERT(entry.getDescription() == "Foo");
190 
191   entry.setProp("foo", "foo");
192   TEST_ASSERT(entry.getProp<std::string>("foo") == "foo");
193   entry.setProp(std::string("bar"), "bar");
194   TEST_ASSERT(entry.getProp<std::string>("bar") == "bar");
195 
196   RWMol *newM = SmilesToMol("c1ccccc1", 0, true);
197   TEST_ASSERT(entry.hasFilterMatch(*newM));
198   delete newM;
199 }
200 
testFilterCatalogThreadedRunner()201 void testFilterCatalogThreadedRunner() {
202   FilterCatalogParams params;
203   params.addCatalog(FilterCatalogParams::PAINS_A);
204   params.addCatalog(FilterCatalogParams::PAINS_B);
205   params.addCatalog(FilterCatalogParams::PAINS_C);
206 
207   FilterCatalog catalog(params);
208 
209   std::string pathName = getenv("RDBASE");
210   pathName += "/Code/GraphMol/test_data/pains.smi";
211 
212   std::ifstream infile(pathName);
213   std::vector<std::string> smiles;
214 
215   std::string line;
216   int count = 0;
217   while (std::getline(infile, line)) {
218     if (count) {
219       std::cerr << line << std::endl;
220       smiles.push_back(line);
221     }
222     count += 1;
223   }
224   TEST_ASSERT(smiles.size() == 3);
225 
226   int numThreads = 3;  // one per entry
227   auto results = RunFilterCatalog(catalog, smiles, numThreads);
228   TEST_ASSERT(results.size() == smiles.size());
229   count = 0;
230   for (auto &entries : results) {
231     TEST_ASSERT(entries.size() > 0);
232     std::cerr << count << " " << entries[0]->getDescription() << std::endl;
233     switch (count) {
234       case 0:
235         TEST_ASSERT(entries[0]->getDescription() == "hzone_phenol_A(479)");
236         break;
237       case 1:
238         TEST_ASSERT(entries[0]->getDescription() == "cyano_imine_B(17)");
239         break;
240       case 2:
241         TEST_ASSERT(entries[0]->getDescription() == "keto_keto_gamma(5)");
242         break;
243     }
244     count += 1;
245   }
246 }
247 
main()248 int main() {
249   RDLog::InitLogs();
250   // boost::logging::enable_logs("rdApp.debug");
251 
252   testFilterCatalog();
253   testFilterCatalogEntry();
254   testFilterCatalogThreadedRunner();
255   return 0;
256 }
257