1 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #include <RDGeneral/test.h>
32 #include <RDGeneral/RDLog.h>
33 #include <GraphMol/RDKitBase.h>
34 #include <GraphMol/FileParsers/MolSupplier.h>
35 #include <GraphMol/FilterCatalog/FilterCatalog.h>
36 #include <GraphMol/SmilesParse/SmilesParse.h>
37 #include <fstream>
38 #include <iostream>
39 #include <map>
40 #include <algorithm>
41
42 using namespace RDKit;
43 using namespace std;
44
45 struct IntPair {
46 int first;
47 int second;
48 };
49
50 template <class T>
dump(std::string name,const T & v)51 void dump(std::string name, const T &v) {
52 std::cerr << name << " = { " << std::endl;
53 for (size_t i = 0; i < v.size(); ++i) {
54 std::cerr << "\t" << v[i].first << "," << v[i].second << "}," << std::endl;
55 ;
56 }
57 std::cerr << "}" << std::endl;
58 }
59
check(MatchVectType v,MatchVectType match)60 bool check(MatchVectType v, MatchVectType match) {
61 dump("v", v);
62 dump("match", match);
63 for (size_t i = 0; i < v.size(); ++i) {
64 if (v[i].first != match[i].first) {
65 return false;
66 }
67 if (v[i].second != match[i].second) {
68 return false;
69 }
70 }
71 return true;
72 }
73
testFilterCatalog()74 void testFilterCatalog() {
75 BOOST_LOG(rdInfoLog)
76 << "-----------------------\n Testing the filter catalog " << std::endl;
77 {
78 std::string pathName = getenv("RDBASE");
79 pathName += "/Code/GraphMol/test_data/";
80 SmilesMolSupplier suppl(pathName + "pains.smi");
81
82 FilterCatalogParams params;
83 params.addCatalog(FilterCatalogParams::PAINS_A);
84 params.addCatalog(FilterCatalogParams::PAINS_B);
85 params.addCatalog(FilterCatalogParams::PAINS_C);
86
87 FilterCatalog catalog(params);
88 boost::scoped_ptr<ROMol> mol;
89 const IntPair match1[10] = {{0, 23}, {1, 22}, {2, 20}, {3, 19}, {4, 25},
90 {5, 24}, {6, 18}, {7, 17}, {8, 16}, {9, 21}};
91 MatchVectType matchvec1;
92 for (auto i : match1) {
93 matchvec1.push_back(std::make_pair(i.first, i.second));
94 }
95
96 const IntPair match2[13] = {{0, 11}, {1, 12}, {2, 13}, {3, 14}, {4, 15},
97 {5, 10}, {6, 9}, {7, 8}, {8, 7}, {9, 6},
98 {10, 5}, {11, 17}, {12, 16}};
99 MatchVectType matchvec2;
100 for (auto i : match2) {
101 matchvec2.push_back(std::make_pair(i.first, i.second));
102 }
103
104 const IntPair match3[12] = {{0, 0}, {1, 1}, {2, 2}, {3, 4},
105 {4, 5}, {5, 6}, {6, 7}, {7, 8},
106 {8, 9}, {9, 14}, {10, 15}, {11, 16}};
107 MatchVectType matchvec3;
108 for (auto i : match3) {
109 matchvec3.push_back(std::make_pair(i.first, i.second));
110 }
111 int count = 0;
112 while (!suppl.atEnd()) {
113 mol.reset(suppl.next());
114 std::string name = mol->getProp<std::string>(common_properties::_Name);
115
116 TEST_ASSERT(mol.get());
117 if (catalog.hasMatch(*mol)) {
118 std::cerr << "Warning: molecule failed filter " << std::endl;
119 }
120 // More detailed
121 FilterCatalog::CONST_SENTRY entry = catalog.getFirstMatch(*mol);
122 TEST_ASSERT(entry);
123 if (entry) {
124 std::cerr << "Warning: molecule failed filter: reason "
125 << entry->getDescription() << std::endl;
126 switch (count) {
127 case 0:
128 TEST_ASSERT(entry->getDescription() == "hzone_phenol_A(479)");
129 break;
130 case 1:
131 TEST_ASSERT(entry->getDescription() == "cyano_imine_B(17)");
132 break;
133 case 2:
134 TEST_ASSERT(entry->getDescription() == "keto_keto_gamma(5)");
135 break;
136 }
137 TEST_ASSERT(entry->getDescription() == name);
138
139 // get the substructure atoms for visualization
140 std::vector<FilterMatch> matches;
141 if (entry->getFilterMatches(*mol, matches)) {
142 for (std::vector<FilterMatch>::const_iterator it = matches.begin();
143 it != matches.end(); ++it) {
144 // Get the FilterMatcherBase that matched
145 const FilterMatch &fm = (*it);
146 boost::shared_ptr<FilterMatcherBase> matchingFilter =
147 fm.filterMatch;
148
149 // Get the matching atom indices
150 const MatchVectType &vect = fm.atomPairs;
151 switch (count) {
152 case 0:
153 TEST_ASSERT(check(vect, matchvec1));
154 break;
155 case 1:
156 TEST_ASSERT(check(vect, matchvec2));
157 break;
158 case 2:
159 TEST_ASSERT(check(vect, matchvec3));
160 break;
161 }
162
163 // do something with these...
164 }
165 }
166 }
167 count++;
168 } // end while
169 }
170 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
171 }
172
testFilterCatalogEntry()173 void testFilterCatalogEntry() {
174 SmartsMatcher *sm = new SmartsMatcher("Aromatic carbon chain");
175 boost::shared_ptr<FilterMatcherBase> matcher(sm);
176 TEST_ASSERT(!matcher->isValid());
177 const int debugParse = 0;
178 const bool mergeHs = true;
179 ROMOL_SPTR pattern(SmartsToMol("c:c:c:c:c", debugParse, mergeHs));
180 TEST_ASSERT(pattern.get() != nullptr);
181 sm->setPattern(pattern);
182 sm->setMinCount(1);
183 FilterCatalogEntry entry("Bar", matcher);
184 TEST_ASSERT(entry.getDescription() == "Bar");
185 TEST_ASSERT(sm->getMinCount() == 1);
186 TEST_ASSERT(sm->getMaxCount() == (unsigned int)-1);
187
188 entry.setDescription("Foo");
189 TEST_ASSERT(entry.getDescription() == "Foo");
190
191 entry.setProp("foo", "foo");
192 TEST_ASSERT(entry.getProp<std::string>("foo") == "foo");
193 entry.setProp(std::string("bar"), "bar");
194 TEST_ASSERT(entry.getProp<std::string>("bar") == "bar");
195
196 RWMol *newM = SmilesToMol("c1ccccc1", 0, true);
197 TEST_ASSERT(entry.hasFilterMatch(*newM));
198 delete newM;
199 }
200
testFilterCatalogThreadedRunner()201 void testFilterCatalogThreadedRunner() {
202 FilterCatalogParams params;
203 params.addCatalog(FilterCatalogParams::PAINS_A);
204 params.addCatalog(FilterCatalogParams::PAINS_B);
205 params.addCatalog(FilterCatalogParams::PAINS_C);
206
207 FilterCatalog catalog(params);
208
209 std::string pathName = getenv("RDBASE");
210 pathName += "/Code/GraphMol/test_data/pains.smi";
211
212 std::ifstream infile(pathName);
213 std::vector<std::string> smiles;
214
215 std::string line;
216 int count = 0;
217 while (std::getline(infile, line)) {
218 if (count) {
219 std::cerr << line << std::endl;
220 smiles.push_back(line);
221 }
222 count += 1;
223 }
224 TEST_ASSERT(smiles.size() == 3);
225
226 int numThreads = 3; // one per entry
227 auto results = RunFilterCatalog(catalog, smiles, numThreads);
228 TEST_ASSERT(results.size() == smiles.size());
229 count = 0;
230 for (auto &entries : results) {
231 TEST_ASSERT(entries.size() > 0);
232 std::cerr << count << " " << entries[0]->getDescription() << std::endl;
233 switch (count) {
234 case 0:
235 TEST_ASSERT(entries[0]->getDescription() == "hzone_phenol_A(479)");
236 break;
237 case 1:
238 TEST_ASSERT(entries[0]->getDescription() == "cyano_imine_B(17)");
239 break;
240 case 2:
241 TEST_ASSERT(entries[0]->getDescription() == "keto_keto_gamma(5)");
242 break;
243 }
244 count += 1;
245 }
246 }
247
main()248 int main() {
249 RDLog::InitLogs();
250 // boost::logging::enable_logs("rdApp.debug");
251
252 testFilterCatalog();
253 testFilterCatalogEntry();
254 testFilterCatalogThreadedRunner();
255 return 0;
256 }
257