1 //
2 // Copyright (C) 2018 Susan H. Leung
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include "Tautomer.h"
11 #include <GraphMol/RDKitBase.h>
12 #include <GraphMol/SmilesParse/SmilesParse.h>
13 #include <GraphMol/SmilesParse/SmilesWrite.h>
14 #include <chrono>
15 #include <ctime>
16
17 using namespace RDKit;
18 using namespace MolStandardize;
19
testEnumerator()20 void testEnumerator() {
21 BOOST_LOG(rdInfoLog)
22 << "-----------------------\n Testing tautomer enumeration" << std::endl;
23
24 std::string rdbase = getenv("RDBASE");
25 std::string tautomerFile =
26 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
27 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
28 new TautomerCatalogParams(tautomerFile));
29
30 // DEPRECATED, remove from here in release 2021.01
31 {
32 unsigned int ntransforms = tautparams->getNumTautomers();
33 TEST_ASSERT(ntransforms == 36);
34 }
35 // DEPRECATED, remove until here in release 2021.01
36
37 unsigned int ntransforms = tautparams->getTransforms().size();
38 TEST_ASSERT(ntransforms == 36);
39
40 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
41
42 std::function<void(const std::string &, const std::vector<std::string> &)>
43 checkAns([te](const std::string &smi,
44 const std::vector<std::string> &ans) {
45 ROMOL_SPTR m(SmilesToMol(smi));
46 TautomerEnumeratorResult res = te.enumerate(*m);
47 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
48 size_t sz = std::max(res.size(), ans.size());
49 bool exceedingTautomer = false;
50 bool missingTautomer = false;
51 bool wrongTautomer = false;
52 for (size_t i = 0; i < sz; ++i) {
53 if (i >= res.size()) {
54 missingTautomer = true;
55 std::cerr << "missingTautomer, ans " << ans[i] << std::endl;
56 } else if (i >= ans.size()) {
57 exceedingTautomer = true;
58 std::cerr << "exceedingTautomer, taut " << MolToSmiles(*res.at(i))
59 << std::endl;
60 } else if (MolToSmiles(*res[i]) != ans[i]) {
61 wrongTautomer = true;
62 std::cerr << "wrongTautomer, taut " << MolToSmiles(*res[i])
63 << ", ans " << ans[i] << std::endl;
64 }
65 }
66 TEST_ASSERT(!(missingTautomer || exceedingTautomer || wrongTautomer));
67 });
68
69 // Enumerate 1,3 keto/enol tautomer.
70 checkAns("C1(=CCCCC1)O", {"O=C1CCCCC1", "OC1=CCCCC1"});
71
72 // Enumerate 1,3 keto/enol tautomer.
73 checkAns("C1(=CCCCC1)O", {"O=C1CCCCC1", "OC1=CCCCC1"});
74
75 // Enumerate 1,3 keto/enol tautomer.
76 checkAns("C1(CCCCC1)=O", {"O=C1CCCCC1", "OC1=CCCCC1"});
77
78 // Enumerate acetophenone keto/enol tautomer.
79 checkAns("C(=C)(O)C1=CC=CC=C1", {"C=C(O)c1ccccc1", "CC(=O)c1ccccc1"});
80
81 // Enumerate acetone keto/enol tautomer.
82 checkAns("CC(C)=O", {"C=C(C)O", "CC(C)=O"});
83
84 // keto/enol tautomer
85 checkAns("OC(C)=C(C)C", {"C=C(O)C(C)C", "CC(=O)C(C)C", "CC(C)=C(C)O"});
86
87 // 1-phenyl-2-propanone enol/keto
88 checkAns("c1(ccccc1)CC(=O)C",
89 {"C=C(O)Cc1ccccc1", "CC(=O)Cc1ccccc1", "CC(O)=Cc1ccccc1"});
90
91 // 1,5 keto/enol tautomer
92 checkAns("Oc1nccc2cc[nH]c(=N)c12",
93 {"N=c1[nH]ccc2cc[nH]c(=O)c12", "N=c1[nH]ccc2ccnc(O)c12",
94 "N=c1nccc2cc[nH]c(O)c1-2", "Nc1[nH]ccc2ccnc(=O)c1-2",
95 "Nc1nccc2cc[nH]c(=O)c12", "Nc1nccc2ccnc(O)c12"});
96
97 // 1,5 keto/enol tautomer
98 checkAns("C1(C=CCCC1)=O", {"O=C1C=CCCC1", "O=C1CC=CCC1", "OC1=CC=CCC1",
99 "OC1=CCC=CC1", "OC1=CCCC=C1"});
100
101 // 1,5 keto/enol tautomer
102 checkAns("C1(=CC=CCC1)O", {"O=C1C=CCCC1", "O=C1CC=CCC1", "OC1=CC=CCC1",
103 "OC1=CCC=CC1", "OC1=CCCC=C1"});
104
105 // aliphatic imine tautomer
106 checkAns("C1(CCCCC1)=N", {"N=C1CCCCC1", "NC1=CCCCC1"});
107
108 // aliphatic imine tautomer
109 checkAns("C1(=CCCCC1)N", {"N=C1CCCCC1", "NC1=CCCCC1"});
110
111 // special imine tautomer
112 checkAns("C1(C=CC=CN1)=CC", {"CC=C1C=CC=CN1", "CC=C1C=CCC=N1", "CCc1ccccn1"});
113
114 // special imine tautomer
115 checkAns("C1(=NC=CC=C1)CC", {"CC=C1C=CC=CN1", "CC=C1C=CCC=N1", "CCc1ccccn1"});
116
117 // 1,3 aromatic heteroatom H shift
118 checkAns("O=c1cccc[nH]1", {"O=c1cccc[nH]1", "Oc1ccccn1"});
119
120 // 1,3 aromatic heteroatom H shift
121 checkAns("Oc1ccccn1", {"O=c1cccc[nH]1", "Oc1ccccn1"});
122
123 // 1,3 aromatic heteroatom H shift
124 checkAns("Oc1ncc[nH]1", {"O=c1[nH]cc[nH]1", "Oc1ncc[nH]1"});
125
126 // 1,3 heteroatom H shift
127 checkAns("OC(C)=NC", {"C=C(O)NC", "CN=C(C)O", "CNC(C)=O"});
128
129 // 1,3 heteroatom H shift
130 checkAns("CNC(C)=O", {"C=C(O)NC", "CN=C(C)O", "CNC(C)=O"});
131
132 // 1,3 heteroatom H shift
133 checkAns("S=C(N)N", {"N=C(N)S", "NC(N)=S"});
134
135 // 1,3 heteroatom H shift
136 checkAns("SC(N)=N", {"N=C(N)S", "NC(N)=S"});
137
138 // 1,3 heteroatom H shift
139 checkAns("N=c1[nH]ccn(C)1", {"Cn1cc[nH]c1=N", "Cn1ccnc1N"});
140
141 // 1,3 heteroatom H shift
142 checkAns("CN=c1[nH]cncc1",
143 {"CN=c1cc[nH]cn1", "CN=c1ccnc[nH]1", "CNc1ccncn1"});
144
145 // 1,5 aromatic heteroatom H shift
146 checkAns("Oc1cccc2ccncc12", {"O=c1cccc2cc[nH]cc1-2", "Oc1cccc2ccncc12"});
147
148 // 1,5 aromatic heteroatom H shift
149 checkAns("O=c1cccc2cc[nH]cc1-2", {"O=c1cccc2cc[nH]cc1-2", "Oc1cccc2ccncc12"});
150
151 // 1,5 aromatic heteroatom H shift
152 checkAns("Cc1n[nH]c2ncnn12",
153 {"C=C1NN=C2N=CNN12", "C=C1NN=C2NC=NN12", "C=C1NNc2ncnn21",
154 "Cc1n[nH]c2ncnn12", "Cc1nnc2[nH]cnn12", "Cc1nnc2nc[nH]n12"});
155
156 // 1,5 aromatic heteroatom H shift
157 checkAns("Cc1nnc2nc[nH]n12",
158 {"C=C1NN=C2N=CNN12", "C=C1NN=C2NC=NN12", "C=C1NNc2ncnn21",
159 "Cc1n[nH]c2ncnn12", "Cc1nnc2[nH]cnn12", "Cc1nnc2nc[nH]n12"});
160
161 // 1,5 aromatic heteroatom H shift
162 checkAns("Oc1ccncc1", {"O=c1cc[nH]cc1", "Oc1ccncc1"});
163
164 // 1,5 aromatic heteroatom H shift
165 checkAns("Oc1c(cccc3)c3nc2ccncc12",
166 {"O=c1c2c[nH]ccc-2nc2ccccc12", "O=c1c2ccccc2[nH]c2ccncc12",
167 "Oc1c2ccccc2nc2ccncc12"});
168
169 // 1,3 and 1,5 aromatic heteroatom H shift
170 checkAns("Oc1ncncc1", {"O=c1cc[nH]cn1", "O=c1ccnc[nH]1", "Oc1ccncn1"});
171
172 // 1,5 aromatic heteroatom H shift
173 checkAns("C2(=C1C(=NC=N1)[NH]C(=N2)N)O",
174 {"N=c1[nH]c(=O)c2[nH]cnc2[nH]1", "N=c1[nH]c(=O)c2nc[nH]c2[nH]1",
175 "N=c1[nH]c2ncnc-2c(O)[nH]1", "N=c1nc(O)c2[nH]cnc2[nH]1",
176 "N=c1nc(O)c2nc[nH]c2[nH]1", "N=c1nc2[nH]cnc2c(O)[nH]1",
177 "N=c1nc2nc[nH]c2c(O)[nH]1", "Nc1nc(=O)c2[nH]cnc2[nH]1",
178 "Nc1nc(=O)c2nc[nH]c2[nH]1", "Nc1nc(O)c2[nH]cnc2n1",
179 "Nc1nc(O)c2nc[nH]c2n1", "Nc1nc(O)c2ncnc-2[nH]1",
180 "Nc1nc2[nH]cnc2c(=O)[nH]1", "Nc1nc2nc[nH]c2c(=O)[nH]1",
181 "Nc1nc2ncnc-2c(O)[nH]1"});
182
183 // 1,5 aromatic heteroatom H shift
184 checkAns("C2(C1=C([NH]C=N1)[NH]C(=N2)N)=O",
185 {"N=c1[nH]c(=O)c2[nH]cnc2[nH]1", "N=c1[nH]c(=O)c2nc[nH]c2[nH]1",
186 "N=c1[nH]c2ncnc-2c(O)[nH]1", "N=c1nc(O)c2[nH]cnc2[nH]1",
187 "N=c1nc(O)c2nc[nH]c2[nH]1", "N=c1nc2[nH]cnc2c(O)[nH]1",
188 "N=c1nc2nc[nH]c2c(O)[nH]1", "Nc1nc(=O)c2[nH]cnc2[nH]1",
189 "Nc1nc(=O)c2nc[nH]c2[nH]1", "Nc1nc(O)c2[nH]cnc2n1",
190 "Nc1nc(O)c2nc[nH]c2n1", "Nc1nc(O)c2ncnc-2[nH]1",
191 "Nc1nc2[nH]cnc2c(=O)[nH]1", "Nc1nc2nc[nH]c2c(=O)[nH]1",
192 "Nc1nc2ncnc-2c(O)[nH]1"});
193
194 // 1,5 aromatic heteroatom H shift
195 checkAns("Oc1n(C)ncc1", {"CN1N=CCC1=O", "Cn1[nH]ccc1=O", "Cn1nccc1O"});
196
197 // 1,5 aromatic heteroatom H shift
198 checkAns("O=c1nc2[nH]ccn2cc1",
199 {"O=c1ccn2cc[nH]c2n1", "O=c1ccn2ccnc2[nH]1", "Oc1ccn2ccnc2n1"});
200
201 // 1,5 aromatic heteroatom H shift
202 checkAns("N=c1nc[nH]cc1", {"N=c1cc[nH]cn1", "N=c1ccnc[nH]1", "Nc1ccncn1"});
203
204 // 1,5 aromatic heteroatom H shift
205 checkAns("N=c(c1)ccn2cc[nH]c12", {"N=c1ccn2cc[nH]c2c1", "Nc1ccn2ccnc2c1"});
206
207 // 1,5 aromatic heteroatom H shift
208 checkAns("CN=c1nc[nH]cc1",
209 {"CN=c1cc[nH]cn1", "CN=c1ccnc[nH]1", "CNc1ccncn1"});
210
211 // 1,7 aromatic heteroatom H shift
212 checkAns("c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
213 {"c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
214 "c1ccc2c(c1)=NC(c1nc3ccccc3[nH]1)N=2",
215 "c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2"});
216
217 // 1,7 aromatic heteroatom H shift
218 checkAns("c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2",
219 {"c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
220 "c1ccc2c(c1)=NC(c1nc3ccccc3[nH]1)N=2",
221 "c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2"});
222
223 // 1,9 aromatic heteroatom H shift
224 checkAns("CNc1ccnc2ncnn21", {"CN=c1cc[nH]c2ncnn12", "CN=c1ccnc2[nH]cnn12",
225 "CN=c1ccnc2nc[nH]n12", "CNc1ccnc2ncnn12"});
226
227 // 1,9 aromatic heteroatom H shift
228 checkAns("CN=c1ccnc2nc[nH]n21", {"CN=c1cc[nH]c2ncnn12", "CN=c1ccnc2[nH]cnn12",
229 "CN=c1ccnc2nc[nH]n12", "CNc1ccnc2ncnn12"});
230
231 // 1,11 aromatic heteroatom H shift
232 checkAns("Nc1ccc(C=C2C=CC(=O)C=C2)cc1",
233 {"N=C1C=CC(=CC2C=CC(=O)C=C2)C=C1", "N=C1C=CC(=Cc2ccc(O)cc2)C=C1",
234 "N=C1C=CC(C=C2C=CC(=O)C=C2)C=C1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"});
235
236 // 1,11 aromatic heteroatom H shift
237 checkAns("N=C1C=CC(=Cc2ccc(O)cc2)C=C1",
238 {"N=C1C=CC(=CC2C=CC(=O)C=C2)C=C1", "N=C1C=CC(=Cc2ccc(O)cc2)C=C1",
239 "N=C1C=CC(C=C2C=CC(=O)C=C2)C=C1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"});
240
241 // heterocyclic tautomer
242 checkAns("n1ccc2ccc[nH]c12", {"c1c[nH]c2nccc-2c1", "c1cnc2[nH]ccc2c1"});
243
244 // heterocyclic tautomer
245 checkAns("c1cc(=O)[nH]c2nccn12",
246 {"O=c1ccn2cc[nH]c2n1", "O=c1ccn2ccnc2[nH]1", "Oc1ccn2ccnc2n1"});
247
248 // heterocyclic tautomer
249 checkAns("c1cnc2c[nH]ccc12", {"c1cc2cc[nH]c2cn1", "c1cc2cc[nH]cc-2n1"});
250
251 // heterocyclic tautomer
252 checkAns("n1ccc2c[nH]ccc12", {"c1cc2[nH]ccc2cn1", "c1cc2c[nH]ccc-2n1"});
253
254 // heterocyclic tautomer
255 checkAns("c1cnc2ccc[nH]c12", {"c1c[nH]c2ccnc-2c1", "c1cnc2cc[nH]c2c1"});
256
257 // furanone tautomer
258 checkAns("C1=CC=C(O1)O", {"O=C1CC=CO1", "Oc1ccco1"});
259
260 // furanone tautomer
261 checkAns("O=C1CC=CO1", {"O=C1CC=CO1", "Oc1ccco1"});
262
263 // keten/ynol tautomer
264 checkAns("CC=C=O", {"CC#CO", "CC=C=O"});
265
266 // keten/ynol tautomer
267 checkAns("CC#CO", {"CC#CO", "CC=C=O"});
268
269 // ionic nitro/aci-nitro tautomer
270 checkAns("C([N+](=O)[O-])C", {"CC=[N+]([O-])O", "CC[N+](=O)[O-]"});
271
272 // ionic nitro/aci-nitro tautomer
273 checkAns("C(=[N+](O)[O-])C", {"CC=[N+]([O-])O", "CC[N+](=O)[O-]"});
274
275 // oxim nitroso tautomer
276 checkAns("CC(C)=NO", {"C=C(C)NO", "CC(C)=NO", "CC(C)N=O"});
277
278 // oxim nitroso tautomer
279 checkAns("CC(C)N=O", {"C=C(C)NO", "CC(C)=NO", "CC(C)N=O"});
280
281 // oxim/nitroso tautomer via phenol
282 checkAns("O=Nc1ccc(O)cc1",
283 {"O=C1C=CC(=NO)C=C1", "O=NC1C=CC(=O)C=C1", "O=Nc1ccc(O)cc1"});
284
285 // oxim/nitroso tautomer via phenol
286 checkAns("O=C1C=CC(=NO)C=C1",
287 {"O=C1C=CC(=NO)C=C1", "O=NC1C=CC(=O)C=C1", "O=Nc1ccc(O)cc1"});
288
289 // cyano/iso-cyanic acid tautomer
290 checkAns("C(#N)O", {"N#CO", "N=C=O"});
291
292 // cyano/iso-cyanic acid tautomer
293 checkAns("C(=N)=O", {"N#CO", "N=C=O"});
294
295 // formamidinesulfinic acid tautomer
296 checkAns("NC(N)=S(=O)=O",
297 {"N=C(N)S(=O)O", "N=C(N)[SH](=O)=O", "NC(N)=S(=O)=O"});
298
299 // formamidinesulfinic acid tautomer
300 checkAns("NC(=N)S(=O)O",
301 {"N=C(N)S(=O)O", "N=C(N)[SH](=O)=O", "NC(N)=S(=O)=O"});
302
303 // formamidinesulfonic acid tautomer
304 checkAns("NC(=N)S(=O)(=O)O", {"N=C(N)S(=O)(=O)O"});
305
306 // isocyanide tautomer
307 checkAns("C#N", {"C#N", "[C-]#[NH+]"});
308
309 // isocyanide tautomer
310 checkAns("[C-]#[NH+]", {"C#N", "[C-]#[NH+]"});
311
312 // phosphonic acid tautomer
313 checkAns("[PH](=O)(O)(O)", {"O=[PH](O)O", "OP(O)O"});
314
315 // phosphonic acid tautomer
316 checkAns("P(O)(O)O", {"O=[PH](O)O", "OP(O)O"});
317
318 // Remove stereochemistry from mobile double bonds
319 checkAns("c1(ccccc1)/C=C(/O)\\C",
320 {"C=C(O)Cc1ccccc1", "CC(=O)Cc1ccccc1", "CC(O)=Cc1ccccc1"});
321
322 // Remove stereochemistry from mobile double bonds
323 checkAns("C/C=C/C(C)=O", {"C=C(O)C=CC", "C=CC=C(C)O", "C=CCC(=C)O",
324 "C=CCC(C)=O", "CC=CC(C)=O"});
325
326 // Remove stereochemistry from mobile double bonds
327 std::string smi66 = "C/C=C\\C(C)=O";
328 ROMOL_SPTR m66(SmilesToMol(smi66));
329 TautomerEnumeratorResult res66 = te.enumerate(*m66);
330 std::vector<std::string> ans66 = {"C=C(O)C=CC", "C=CC=C(C)O", "C=CCC(=C)O",
331 "C=CCC(C)=O", "CC=CC(C)=O"};
332 TEST_ASSERT(res66.size() == ans66.size());
333 TEST_ASSERT(res66.status() == TautomerEnumeratorStatus::Completed);
334
335 std::vector<std::string> sm66;
336 for (const auto &r : res66) {
337 sm66.push_back(MolToSmiles(*r));
338 }
339 // sort both for alphabetical order
340 std::sort(sm66.begin(), sm66.end());
341 std::sort(ans66.begin(), ans66.end());
342 TEST_ASSERT(sm66 == ans66);
343
344 // Gaunine tautomers
345 std::string smi67 = "N1C(N)=NC=2N=CNC2C1=O";
346 ROMOL_SPTR m67(SmilesToMol(smi67));
347 TautomerEnumeratorResult res67 = te.enumerate(*m67);
348 std::vector<std::string> ans67 = {
349 "N=c1[nH]c(=O)c2[nH]cnc2[nH]1", "N=c1[nH]c(=O)c2nc[nH]c2[nH]1",
350 "N=c1[nH]c2ncnc-2c(O)[nH]1", "N=c1nc(O)c2[nH]cnc2[nH]1",
351 "N=c1nc(O)c2nc[nH]c2[nH]1", "N=c1nc2[nH]cnc2c(O)[nH]1",
352 "N=c1nc2nc[nH]c2c(O)[nH]1", "Nc1nc(=O)c2[nH]cnc2[nH]1",
353 "Nc1nc(=O)c2nc[nH]c2[nH]1", "Nc1nc(O)c2[nH]cnc2n1",
354 "Nc1nc(O)c2nc[nH]c2n1", "Nc1nc(O)c2ncnc-2[nH]1",
355 "Nc1nc2[nH]cnc2c(=O)[nH]1", "Nc1nc2nc[nH]c2c(=O)[nH]1",
356 "Nc1nc2ncnc-2c(O)[nH]1"};
357 TEST_ASSERT(res67.size() == ans67.size());
358 TEST_ASSERT(res67.status() == TautomerEnumeratorStatus::Completed);
359 std::vector<std::string> sm67;
360 for (const auto &r : res67) {
361 sm67.push_back(MolToSmiles(*r));
362 }
363 // sort both by alphabetical order
364 std::sort(sm67.begin(), sm67.end());
365 std::sort(ans67.begin(), ans67.end());
366 TEST_ASSERT(sm67 == ans67);
367
368 // Test a structure with hundreds of tautomers.
369 std::string smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O";
370 ROMOL_SPTR m68(SmilesToMol(smi68));
371 TautomerEnumeratorResult res68 = te.enumerate(*m68);
372 // the maxTransforms limit is hit before the maxTautomers one
373 TEST_ASSERT(res68.size() == 292);
374 TEST_ASSERT(res68.status() == TautomerEnumeratorStatus::MaxTransformsReached);
375 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
376 }
377
testEnumeratorParams()378 void testEnumeratorParams() {
379 BOOST_LOG(rdInfoLog)
380 << "-----------------------\n Testing TautomerEnumerator params"
381 << std::endl;
382
383 // Test a structure with hundreds of tautomers.
384 std::string smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O";
385 ROMOL_SPTR m68(SmilesToMol(smi68));
386
387 {
388 TautomerEnumerator te;
389 TautomerEnumeratorResult res68 = te.enumerate(*m68);
390 TEST_ASSERT(res68.size() == 292);
391 TEST_ASSERT(res68.status() ==
392 TautomerEnumeratorStatus::MaxTransformsReached);
393 }
394 {
395 CleanupParameters params;
396 params.maxTautomers = 50;
397 TautomerEnumerator te(params);
398 TautomerEnumeratorResult res68 = te.enumerate(*m68);
399 TEST_ASSERT(res68.size() == 50);
400 TEST_ASSERT(res68.status() ==
401 TautomerEnumeratorStatus::MaxTautomersReached);
402 }
403 std::string sAlaSmi = "C[C@H](N)C(=O)O";
404 ROMOL_SPTR sAla(SmilesToMol(sAlaSmi));
405 {
406 // test remove (S)-Ala stereochemistry
407 TEST_ASSERT(sAla->getAtomWithIdx(1)->getChiralTag() ==
408 Atom::CHI_TETRAHEDRAL_CCW);
409 TEST_ASSERT(sAla->getAtomWithIdx(1)->getProp<std::string>(
410 common_properties::_CIPCode) == "S");
411 CleanupParameters params;
412 params.tautomerRemoveSp3Stereo = true;
413 TautomerEnumerator te(params);
414 TautomerEnumeratorResult res = te.enumerate(*sAla);
415 for (const auto &taut : res) {
416 TEST_ASSERT(taut->getAtomWithIdx(1)->getChiralTag() ==
417 Atom::CHI_UNSPECIFIED);
418 TEST_ASSERT(
419 !taut->getAtomWithIdx(1)->hasProp(common_properties::_CIPCode));
420 }
421 }
422 {
423 // test retain (S)-Ala stereochemistry
424 TEST_ASSERT(sAla->getAtomWithIdx(1)->getChiralTag() ==
425 Atom::CHI_TETRAHEDRAL_CCW);
426 TEST_ASSERT(sAla->getAtomWithIdx(1)->getProp<std::string>(
427 common_properties::_CIPCode) == "S");
428 CleanupParameters params;
429 params.tautomerRemoveSp3Stereo = false;
430 TautomerEnumerator te(params);
431 TautomerEnumeratorResult res = te.enumerate(*sAla);
432 for (const auto &taut : res) {
433 const auto tautAtom = taut->getAtomWithIdx(1);
434 if (tautAtom->getHybridization() == Atom::SP3) {
435 TEST_ASSERT(tautAtom->hasProp(common_properties::_CIPCode));
436 TEST_ASSERT(
437 tautAtom->getProp<std::string>(common_properties::_CIPCode) == "S");
438 TEST_ASSERT(tautAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
439 } else {
440 TEST_ASSERT(!tautAtom->hasProp(common_properties::_CIPCode));
441 TEST_ASSERT(tautAtom->getChiralTag() == Atom::CHI_UNSPECIFIED);
442 }
443 }
444 }
445 std::string eEnolSmi = "C/C=C/O";
446 ROMOL_SPTR eEnol(SmilesToMol(eEnolSmi));
447 TEST_ASSERT(eEnol->getBondWithIdx(1)->getStereo() == Bond::STEREOE);
448 {
449 // test remove enol E stereochemistry
450 CleanupParameters params;
451 params.tautomerRemoveBondStereo = true;
452 TautomerEnumerator te(params);
453 TautomerEnumeratorResult res = te.enumerate(*eEnol);
454 for (const auto &taut : res) {
455 TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREONONE);
456 }
457 }
458 {
459 // test retain enol E stereochemistry
460 CleanupParameters params;
461 params.tautomerRemoveBondStereo = false;
462 TautomerEnumerator te(params);
463 TautomerEnumeratorResult res = te.enumerate(*eEnol);
464 for (const auto &taut : res) {
465 if (taut->getBondWithIdx(1)->getBondType() == Bond::DOUBLE) {
466 TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREOE);
467 }
468 }
469 }
470 ROMOL_SPTR zEnol = "C/C=C\\O"_smiles;
471 TEST_ASSERT(zEnol->getBondWithIdx(1)->getStereo() == Bond::STEREOZ);
472 {
473 // test remove enol Z stereochemistry
474 CleanupParameters params;
475 params.tautomerRemoveBondStereo = true;
476 TautomerEnumerator te(params);
477 TautomerEnumeratorResult res = te.enumerate(*zEnol);
478 for (const auto &taut : res) {
479 TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREONONE);
480 }
481 }
482 {
483 // test retain enol Z stereochemistry
484 CleanupParameters params;
485 params.tautomerRemoveBondStereo = false;
486 TautomerEnumerator te(params);
487 TautomerEnumeratorResult res = te.enumerate(*zEnol);
488 for (const auto &taut : res) {
489 if (taut->getBondWithIdx(1)->getBondType() == Bond::DOUBLE) {
490 TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREOZ);
491 }
492 }
493 }
494 ROMOL_SPTR chembl2024142 =
495 "[2H]C1=C(C(=C2C(=C1[2H])C(=O)C(=C(C2=O)C([2H])([2H])[2H])C/C=C(\\C)/CC([2H])([2H])/C=C(/CC/C=C(\\C)/CCC=C(C)C)\\C([2H])([2H])[2H])[2H])[2H]"_smiles;
496 MolOps::RemoveHsParameters hparams;
497 hparams.removeAndTrackIsotopes = true;
498 chembl2024142.reset(MolOps::removeHs(*chembl2024142, hparams));
499 TEST_ASSERT(chembl2024142->getAtomWithIdx(12)->hasProp(
500 common_properties::_isotopicHs));
501 {
502 // test remove isotopic Hs involved in tautomerism
503 CleanupParameters params;
504 params.tautomerRemoveIsotopicHs = true;
505 TautomerEnumerator te(params);
506 TautomerEnumeratorResult res = te.enumerate(*chembl2024142);
507 for (const auto &taut : res) {
508 const auto tautAtom = taut->getAtomWithIdx(12);
509 TEST_ASSERT(!tautAtom->hasProp(common_properties::_isotopicHs));
510 }
511 }
512 {
513 // test retain isotopic Hs involved in tautomerism
514 CleanupParameters params;
515 params.tautomerRemoveIsotopicHs = false;
516 TautomerEnumerator te(params);
517 TautomerEnumeratorResult res = te.enumerate(*chembl2024142);
518 for (const auto &taut : res) {
519 const auto tautAtom = taut->getAtomWithIdx(12);
520 TEST_ASSERT(tautAtom->hasProp(common_properties::_isotopicHs));
521 }
522 }
523 ROMOL_SPTR enolexample = "[2H]OC=C"_smiles;
524 enolexample.reset(MolOps::removeHs(*enolexample, hparams));
525 TEST_ASSERT(
526 enolexample->getAtomWithIdx(0)->hasProp(common_properties::_isotopicHs));
527 {
528 CleanupParameters params;
529 params.tautomerRemoveIsotopicHs = true;
530 TautomerEnumerator te(params);
531 TautomerEnumeratorResult res = te.enumerate(*enolexample);
532 for (const auto &taut : res) {
533 const auto tautAtom = taut->getAtomWithIdx(0);
534 TEST_ASSERT(!(tautAtom->hasProp(common_properties::_isotopicHs) &&
535 !tautAtom->getTotalNumHs()));
536 }
537 }
538 {
539 CleanupParameters params;
540 params.tautomerRemoveIsotopicHs = false;
541 TautomerEnumerator te(params);
542 TautomerEnumeratorResult res = te.enumerate(*enolexample);
543 for (const auto &taut : res) {
544 const auto tautAtom = taut->getAtomWithIdx(0);
545 TEST_ASSERT(!(tautAtom->hasProp(common_properties::_isotopicHs) &&
546 !tautAtom->getTotalNumHs()));
547 }
548 }
549 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
550 }
551
testEnumeratorCallback()552 void testEnumeratorCallback() {
553 class MyTautomerEnumeratorCallback : public TautomerEnumeratorCallback {
554 public:
555 MyTautomerEnumeratorCallback(double timeoutMs)
556 : d_timeoutMs(timeoutMs), d_start(std::chrono::system_clock::now()) {}
557 bool operator()(const ROMol &, const TautomerEnumeratorResult &) override {
558 double elapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(
559 std::chrono::system_clock::now() - d_start)
560 .count();
561 return (elapsedMs < d_timeoutMs);
562 }
563
564 private:
565 double d_timeoutMs;
566 std::chrono::time_point<std::chrono::system_clock> d_start;
567 };
568
569 BOOST_LOG(rdInfoLog)
570 << "-----------------------\n Testing TautomerEnumerator callback"
571 << std::endl;
572
573 // Test a structure with hundreds of tautomers.
574 std::string smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O";
575 ROMOL_SPTR m68(SmilesToMol(smi68));
576
577 CleanupParameters params;
578 params.maxTransforms = 10000;
579 params.maxTautomers = 10000;
580 {
581 TautomerEnumerator te(params);
582 te.setCallback(new MyTautomerEnumeratorCallback(50.0));
583 TautomerEnumeratorResult res68 = te.enumerate(*m68);
584 // either the enumeration was canceled due to timeout
585 // or it has completed very quickly
586 bool hasReachedTimeout =
587 (res68.size() < 375 &&
588 res68.status() == TautomerEnumeratorStatus::Canceled);
589 bool hasCompleted = (res68.size() == 375 &&
590 res68.status() == TautomerEnumeratorStatus::Completed);
591 if (hasReachedTimeout) {
592 std::cerr << "Enumeration was canceled due to timeout (50 ms)"
593 << std::endl;
594 }
595 if (hasCompleted) {
596 std::cerr << "Enumeration has completed" << std::endl;
597 }
598 TEST_ASSERT(hasReachedTimeout || hasCompleted);
599 TEST_ASSERT(hasReachedTimeout ^ hasCompleted);
600 }
601 {
602 TautomerEnumerator te(params);
603 te.setCallback(new MyTautomerEnumeratorCallback(10000.0));
604 TautomerEnumeratorResult res68 = te.enumerate(*m68);
605 // either the enumeration completed
606 // or it ran very slowly and was canceled due to timeout
607 bool hasReachedTimeout =
608 (res68.size() < 375 &&
609 res68.status() == TautomerEnumeratorStatus::Canceled);
610 bool hasCompleted = (res68.size() == 375 &&
611 res68.status() == TautomerEnumeratorStatus::Completed);
612 if (hasReachedTimeout) {
613 std::cerr << "Enumeration was canceled due to timeout (10 s)"
614 << std::endl;
615 }
616 if (hasCompleted) {
617 std::cerr << "Enumeration has completed" << std::endl;
618 }
619 TEST_ASSERT(hasReachedTimeout || hasCompleted);
620 TEST_ASSERT(hasReachedTimeout ^ hasCompleted);
621 }
622
623 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
624 }
625
626 // tests from the molvs repo:
627 // https://github.com/mcs07/MolVS/blob/456f2fe723acfedbf634a8bcfe943b83ea7d4c20/tests/test_tautomer.py
628 std::vector<std::pair<std::string, std::string>> canonTautomerData{
629 {"C1(=CCCCC1)O", "O=C1CCCCC1"},
630 {"C1(CCCCC1)=O", "O=C1CCCCC1"},
631 {"C(=C)(O)C1=CC=CC=C1", "CC(=O)c1ccccc1"},
632 {"CC(C)=O", "CC(C)=O"},
633 {"OC(C)=C(C)C", "CC(=O)C(C)C"},
634 {"c1(ccccc1)CC(=O)C", "CC(=O)Cc1ccccc1"},
635 {"Oc1nccc2cc[nH]c(=N)c12", "Nc1nccc2cc[nH]c(=O)c12"},
636 {"C1(C=CCCC1)=O", "O=C1C=CCCC1"},
637 {"C1(CCCCC1)=N", "N=C1CCCCC1"},
638 {"C1(=CCCCC1)N", "N=C1CCCCC1"},
639 {"C1(C=CC=CN1)=CC", "CCc1ccccn1"},
640 {"C1(=NC=CC=C1)CC", "CCc1ccccn1"},
641 {"O=c1cccc[nH]1", "O=c1cccc[nH]1"},
642 {"Oc1ccccn1", "O=c1cccc[nH]1"},
643 {"Oc1ncc[nH]1", "O=c1[nH]cc[nH]1"},
644 {"OC(C)=NC", "CNC(C)=O"},
645 {"CNC(C)=O", "CNC(C)=O"},
646 {"S=C(N)N", "NC(N)=S"},
647 {"SC(N)=N", "NC(N)=S"},
648 {"N=c1[nH]ccn(C)1", "Cn1ccnc1N"},
649 {"CN=c1[nH]cncc1", "CNc1ccncn1"},
650 {"Oc1cccc2ccncc12", "Oc1cccc2ccncc12"},
651 {"O=c1cccc2cc[nH]cc1-2", "Oc1cccc2ccncc12"},
652 {"Cc1n[nH]c2ncnn12", "Cc1n[nH]c2ncnn12"},
653 {"Cc1nnc2nc[nH]n12", "Cc1n[nH]c2ncnn12"},
654 {"Oc1ccncc1", "O=c1cc[nH]cc1"},
655 {"Oc1c(cccc3)c3nc2ccncc12", "O=c1c2ccccc2[nH]c2ccncc12"},
656 {"Oc1ncncc1", "O=c1cc[nH]cn1"},
657 {"C2(=C1C(=NC=N1)[NH]C(=N2)N)O", "Nc1nc(=O)c2[nH]cnc2[nH]1"},
658 {"C2(C1=C([NH]C=N1)[NH]C(=N2)N)=O", "Nc1nc(=O)c2[nH]cnc2[nH]1"},
659 {"Oc1n(C)ncc1", "Cn1[nH]ccc1=O"},
660 {"O=c1nc2[nH]ccn2cc1", "O=c1ccn2cc[nH]c2n1"},
661 {"N=c1nc[nH]cc1", "Nc1ccncn1"},
662 {"N=c(c1)ccn2cc[nH]c12", "Nc1ccn2ccnc2c1"},
663 {"CN=c1nc[nH]cc1", "CNc1ccncn1"},
664 {"c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
665 "c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1"},
666 {"c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2",
667 "c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1"},
668 {"CNc1ccnc2ncnn21", "CNc1ccnc2ncnn12"},
669 {"CN=c1ccnc2nc[nH]n21", "CNc1ccnc2ncnn12"},
670 {"Nc1ccc(C=C2C=CC(=O)C=C2)cc1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"},
671 {"N=C1C=CC(=Cc2ccc(O)cc2)C=C1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"},
672 {"n1ccc2ccc[nH]c12", "c1cnc2[nH]ccc2c1"},
673 {"c1cc(=O)[nH]c2nccn12", "O=c1ccn2cc[nH]c2n1"},
674 {"c1cnc2c[nH]ccc12", "c1cc2cc[nH]c2cn1"},
675 {"n1ccc2c[nH]ccc12", "c1cc2[nH]ccc2cn1"},
676 {"c1cnc2ccc[nH]c12", "c1cnc2cc[nH]c2c1"},
677 {"C1=CC=C(O1)O", "Oc1ccco1"},
678 {"O=C1CC=CO1", "Oc1ccco1"},
679 {"CC=C=O", "CC=C=O"},
680 {"CC#CO", "CC=C=O"},
681 {"C([N+](=O)[O-])C", "CC[N+](=O)[O-]"},
682 {"C(=[N+](O)[O-])C", "CC[N+](=O)[O-]"},
683 {"CC(C)=NO", "CC(C)=NO"},
684 {"CC(C)N=O", "CC(C)=NO"},
685 {"O=Nc1ccc(O)cc1", "O=Nc1ccc(O)cc1"},
686 {"O=C1C=CC(=NO)C=C1", "O=Nc1ccc(O)cc1"},
687 {"C(#N)O", "N=C=O"},
688 {"C(=N)=O", "N=C=O"},
689 {"N=C(N)S(=O)O", "N=C(N)S(=O)O"},
690 {"C#N", "C#N"},
691 {"[C-]#[NH+]", "C#N"},
692 {"[PH](=O)(O)(O)", "O=[PH](O)O"},
693 {"P(O)(O)O", "O=[PH](O)O"}};
694
testCanonicalize()695 void testCanonicalize() {
696 BOOST_LOG(rdInfoLog)
697 << "-----------------------\n Testing tautomer canonicalization"
698 << std::endl;
699
700 std::string rdbase = getenv("RDBASE");
701 std::string tautomerFile =
702 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
703 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
704 new TautomerCatalogParams(tautomerFile));
705 // DEPRECATED, remove from here in release 2021.01
706 {
707 unsigned int ntransforms = tautparams->getNumTautomers();
708 TEST_ASSERT(ntransforms == 36);
709 }
710 // DEPRECATED, remove until here in release 2021.01
711
712 unsigned int ntransforms = tautparams->getTransforms().size();
713 TEST_ASSERT(ntransforms == 36);
714
715 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
716
717 for (const auto &itm : canonTautomerData) {
718 std::unique_ptr<ROMol> mol{SmilesToMol(itm.first)};
719 TEST_ASSERT(mol);
720 std::unique_ptr<ROMol> res{te.canonicalize(*mol)};
721 TEST_ASSERT(res);
722 TEST_ASSERT(MolToSmiles(*res) == itm.second);
723 }
724 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
725 }
726
testPickCanonical()727 void testPickCanonical() {
728 BOOST_LOG(rdInfoLog) << "-----------------------\n Testing pickCanonical"
729 << std::endl;
730
731 std::string rdbase = getenv("RDBASE");
732 std::string tautomerFile =
733 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
734 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
735 new TautomerCatalogParams(tautomerFile));
736 // DEPRECATED, remove from here in release 2021.01
737 {
738 unsigned int ntransforms = tautparams->getNumTautomers();
739 TEST_ASSERT(ntransforms == 36);
740 }
741 // DEPRECATED, remove until here in release 2021.01
742
743 unsigned int ntransforms = tautparams->getTransforms().size();
744 TEST_ASSERT(ntransforms == 36);
745
746 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
747
748 for (const auto &itm : canonTautomerData) {
749 std::unique_ptr<ROMol> mol{SmilesToMol(itm.first)};
750 TEST_ASSERT(mol);
751 auto tautRes = te.enumerate(*mol);
752 std::unique_ptr<ROMol> res{te.pickCanonical(tautRes)};
753 TEST_ASSERT(res);
754 // std::cerr << itm.first<<" -> "<<MolToSmiles(*res)<<std::endl;
755 TEST_ASSERT(MolToSmiles(*res) == itm.second);
756 }
757 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
758 }
759
testCustomScoreFunc()760 void testCustomScoreFunc() {
761 BOOST_LOG(rdInfoLog)
762 << "-----------------------\n Testing custom scoring functions"
763 << std::endl;
764
765 std::string rdbase = getenv("RDBASE");
766 std::string tautomerFile =
767 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
768 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
769 new TautomerCatalogParams(tautomerFile));
770 // DEPRECATED, remove from here in release 2021.01
771 {
772 unsigned int ntransforms = tautparams->getNumTautomers();
773 TEST_ASSERT(ntransforms == 36);
774 }
775 // DEPRECATED, remove until here in release 2021.01
776
777 unsigned int ntransforms = tautparams->getTransforms().size();
778 TEST_ASSERT(ntransforms == 36);
779
780 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
781
782 // silly examples just using the scoreRings() function
783 std::vector<std::pair<std::string, std::string>> subsetTautomerData{
784 {"C1(=CCCCC1)O", "O=C1CCCCC1"},
785 {"C1(CCCCC1)=O", "O=C1CCCCC1"},
786 {"C(=C)(O)C1=CC=CC=C1", "C=C(O)c1ccccc1"},
787 {"CC(C)=O", "C=C(C)O"},
788 {"OC(C)=C(C)C", "C=C(O)C(C)C"},
789 };
790 for (const auto &itm : subsetTautomerData) {
791 std::unique_ptr<ROMol> mol{SmilesToMol(itm.first)};
792 TEST_ASSERT(mol);
793 {
794 // this uses the non-templated pickCanonical() function
795 std::unique_ptr<ROMol> res{
796 te.canonicalize(*mol, [](const ROMol &m) -> int {
797 return MolStandardize::TautomerScoringFunctions::scoreRings(m);
798 })};
799 TEST_ASSERT(res);
800 TEST_ASSERT(MolToSmiles(*res) == itm.second);
801 }
802 {
803 // this uses the non-templated pickCanonical() overload
804 auto tautRes = te.enumerate(*mol);
805 std::unique_ptr<ROMol> res{
806 te.pickCanonical(tautRes, [](const ROMol &m) -> int {
807 return MolStandardize::TautomerScoringFunctions::scoreRings(m);
808 })};
809 TEST_ASSERT(res);
810 TEST_ASSERT(MolToSmiles(*res) == itm.second);
811 }
812 {
813 // this tests the templated pickCanonical() overload on a std::vector
814 auto tautRes = te.enumerate(*mol);
815 std::unique_ptr<ROMol> res{
816 te.pickCanonical(tautRes.tautomers(), [](const ROMol &m) -> int {
817 return MolStandardize::TautomerScoringFunctions::scoreRings(m);
818 })};
819 TEST_ASSERT(res);
820 TEST_ASSERT(MolToSmiles(*res) == itm.second);
821 }
822 {
823 // this tests the templated pickCanonical() overload
824 // with a different iterable container
825 auto tautRes = te.enumerate(*mol);
826 std::set<ROMOL_SPTR> tautomerSet(tautRes.begin(), tautRes.end());
827 std::unique_ptr<ROMol> res{
828 te.pickCanonical(tautomerSet, [](const ROMol &m) -> int {
829 return MolStandardize::TautomerScoringFunctions::scoreRings(m);
830 })};
831 TEST_ASSERT(res);
832 TEST_ASSERT(MolToSmiles(*res) == itm.second);
833 }
834 }
835 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
836 }
837
testEnumerationProblems()838 void testEnumerationProblems() {
839 BOOST_LOG(rdInfoLog)
840 << "-----------------------\n Testing tautomer enumeration problems"
841 << std::endl;
842
843 std::string rdbase = getenv("RDBASE");
844 std::string tautomerFile =
845 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
846 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
847 new TautomerCatalogParams(tautomerFile));
848 // DEPRECATED, remove from here in release 2021.01
849 {
850 unsigned int ntransforms = tautparams->getNumTautomers();
851 TEST_ASSERT(ntransforms == 36);
852 }
853 // DEPRECATED, remove until here in release 2021.01
854
855 unsigned int ntransforms = tautparams->getTransforms().size();
856 TEST_ASSERT(ntransforms == 36);
857
858 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
859 #if 1
860 { // from the discussion of #2908
861 auto mol = "O=C(C1=C[NH+]=CC=C1)[O-]"_smiles;
862 auto tautRes = te.enumerate(*mol);
863 TEST_ASSERT(tautRes.size() == 1);
864 }
865 #endif
866 { // one of the examples from the tautobase paper
867 auto m =
868 "[S:1]=[c:2]1[nH+:3][c:5]([NH2:9])[nH:8][c:7]2[c:4]1[n:6][nH:10][n:11]2"_smiles;
869 TEST_ASSERT(m);
870
871 auto tautRes = te.enumerate(*m);
872 // for (auto taut : tauts) {
873 // std::cerr << MolToSmiles(*taut) << std::endl;
874 // }
875 TEST_ASSERT(tautRes.size() == 12);
876 }
877
878 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
879 }
880
testPickCanonical2()881 void testPickCanonical2() {
882 BOOST_LOG(rdInfoLog) << "-----------------------\n Testing pickCanonical"
883 << std::endl;
884
885 std::string rdbase = getenv("RDBASE");
886 std::string tautomerFile =
887 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
888 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
889 new TautomerCatalogParams(tautomerFile));
890 // DEPRECATED, remove from here in release 2021.01
891 {
892 unsigned int ntransforms = tautparams->getNumTautomers();
893 TEST_ASSERT(ntransforms == 36);
894 }
895 // DEPRECATED, remove until here in release 2021.01
896
897 unsigned int ntransforms = tautparams->getTransforms().size();
898 TEST_ASSERT(ntransforms == 36);
899
900 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
901 {
902 auto mol = "CN=c1nc[nH]cc1"_smiles;
903 TEST_ASSERT(mol);
904 auto tautRes = te.enumerate(*mol);
905 for (const auto &taut : tautRes) {
906 std::cerr << MolToSmiles(*taut) << std::endl;
907 }
908 std::unique_ptr<ROMol> canon{te.pickCanonical(tautRes)};
909 std::cerr << "res: " << MolToSmiles(*canon) << std::endl;
910 }
911 {
912 auto mol = "CN=c1[nH]cccc1"_smiles;
913 TEST_ASSERT(mol);
914 auto tautRes = te.enumerate(*mol);
915 for (const auto &taut : tautRes) {
916 std::cerr << MolToSmiles(*taut) << std::endl;
917 }
918 std::unique_ptr<ROMol> canon{te.pickCanonical(tautRes)};
919 std::cerr << "res: " << MolToSmiles(*canon) << std::endl;
920 }
921 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
922 }
923
testEnumerateDetails()924 void testEnumerateDetails() {
925 BOOST_LOG(rdInfoLog)
926 << "-----------------------\n Testing getting details back "
927 "from tautomer enumeration"
928 << std::endl;
929 std::string rdbase = getenv("RDBASE");
930 std::string tautomerFile =
931 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
932 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
933 new TautomerCatalogParams(tautomerFile));
934 // DEPRECATED, remove from here in release 2021.01
935 {
936 unsigned int ntransforms = tautparams->getNumTautomers();
937 TEST_ASSERT(ntransforms == 36);
938 }
939 // DEPRECATED, remove until here in release 2021.01
940
941 unsigned int ntransforms = tautparams->getTransforms().size();
942 TEST_ASSERT(ntransforms == 36);
943 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
944 {
945 auto mol = "c1ccccc1CN=c1[nH]cccc1"_smiles;
946 TEST_ASSERT(mol);
947
948 auto tautRes = te.enumerate(*mol);
949 TEST_ASSERT(tautRes.size() == 2);
950 TEST_ASSERT(tautRes.modifiedAtoms().count() == 2);
951 TEST_ASSERT(tautRes.modifiedBonds().count() == 7);
952 TEST_ASSERT(tautRes.modifiedAtoms().test(7));
953 TEST_ASSERT(tautRes.modifiedAtoms().test(9));
954 TEST_ASSERT(!tautRes.modifiedBonds().test(0));
955 TEST_ASSERT(tautRes.modifiedBonds().test(7));
956 TEST_ASSERT(tautRes.modifiedBonds().test(8));
957 TEST_ASSERT(tautRes.modifiedBonds().test(14));
958 }
959 {
960 // test the deprecated form
961 auto mol = "c1ccccc1CN=c1[nH]cccc1"_smiles;
962 TEST_ASSERT(mol);
963 boost::dynamic_bitset<> atomsModified(mol->getNumAtoms());
964 boost::dynamic_bitset<> bondsModified(mol->getNumBonds());
965
966 #if defined(_MSC_VER)
967 #pragma warning(suppress : 4996)
968 #elif defined(__GNUC__)
969 #pragma GCC diagnostic push
970 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
971 #endif
972 auto tauts = te.enumerate(*mol, &atomsModified, &bondsModified);
973 #if defined(__GNUC__)
974 #pragma GCC diagnostic pop
975 #endif
976 TEST_ASSERT(tauts.size() == 2);
977 TEST_ASSERT(atomsModified.count() == 2);
978 TEST_ASSERT(bondsModified.count() == 7);
979 TEST_ASSERT(atomsModified[7]);
980 TEST_ASSERT(atomsModified[9]);
981 TEST_ASSERT(!bondsModified[0]);
982 TEST_ASSERT(bondsModified[7]);
983 TEST_ASSERT(bondsModified[8]);
984 TEST_ASSERT(bondsModified[14]);
985 }
986 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
987 }
988
testGithub2990()989 void testGithub2990() {
990 BOOST_LOG(rdInfoLog) << "-----------------------\n Testing Github #2990: "
991 "Tautomer enumeration "
992 "should remove stereo in all tautomers"
993 << std::endl;
994 std::string rdbase = getenv("RDBASE");
995 std::string tautomerFile =
996 rdbase + "/Data/MolStandardize/tautomerTransforms.in";
997 auto tautparams = std::unique_ptr<TautomerCatalogParams>(
998 new TautomerCatalogParams(tautomerFile));
999 // DEPRECATED, remove from here in release 2021.01
1000 {
1001 unsigned int ntransforms = tautparams->getNumTautomers();
1002 TEST_ASSERT(ntransforms == 36);
1003 }
1004 // DEPRECATED, remove until here in release 2021.01
1005
1006 unsigned int ntransforms = tautparams->getTransforms().size();
1007 TEST_ASSERT(ntransforms == 36);
1008 TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
1009 {
1010 // atom stereo
1011 auto mol = "COC(=O)[C@@H](N)CO"_smiles;
1012 TEST_ASSERT(mol);
1013 auto res = te.enumerate(*mol);
1014 for (const auto &taut : res) {
1015 auto smi = MolToSmiles(*taut);
1016 // std::cerr << smi << std::endl;
1017 TEST_ASSERT(smi.find("@H") == std::string::npos);
1018 }
1019 }
1020 {
1021 // atom stereo, atoms not in the tautomer zone are still ok
1022 auto mol = "C[C@](Cl)(F)COC(=O)[C@@H](N)CO"_smiles;
1023 TEST_ASSERT(mol);
1024 auto res = te.enumerate(*mol);
1025 for (const auto &taut : res) {
1026 auto smi = MolToSmiles(*taut);
1027 // std::cerr << smi << std::endl;
1028 TEST_ASSERT(smi.find("@H") == std::string::npos);
1029 TEST_ASSERT(smi.find("@]") != std::string::npos);
1030 }
1031 }
1032 {
1033 // bond stereo
1034 auto mol = "C/C=C/C/N=c1/[nH]cccc1"_smiles;
1035 TEST_ASSERT(mol);
1036 TEST_ASSERT(mol->getBondBetweenAtoms(0, 1)->getBondDir() !=
1037 Bond::BondDir::NONE);
1038 TEST_ASSERT(mol->getBondBetweenAtoms(2, 3)->getBondDir() !=
1039 Bond::BondDir::NONE);
1040 TEST_ASSERT(mol->getBondBetweenAtoms(3, 4)->getBondDir() !=
1041 Bond::BondDir::NONE);
1042 TEST_ASSERT(mol->getBondBetweenAtoms(5, 6)->getBondDir() !=
1043 Bond::BondDir::NONE);
1044 TEST_ASSERT(mol->getBondBetweenAtoms(1, 2)->getStereo() >
1045 Bond::BondStereo::STEREOANY);
1046 TEST_ASSERT(mol->getBondBetweenAtoms(4, 5)->getStereo() >
1047 Bond::BondStereo::STEREOANY);
1048
1049 auto res = te.enumerate(*mol);
1050 for (const auto &taut : res) {
1051 TEST_ASSERT(taut->getBondBetweenAtoms(0, 1)->getBondDir() !=
1052 Bond::BondDir::NONE);
1053 TEST_ASSERT(taut->getBondBetweenAtoms(2, 3)->getBondDir() !=
1054 Bond::BondDir::NONE);
1055 TEST_ASSERT(taut->getBondBetweenAtoms(3, 4)->getBondDir() ==
1056 Bond::BondDir::NONE);
1057 TEST_ASSERT(taut->getBondBetweenAtoms(5, 6)->getBondDir() ==
1058 Bond::BondDir::NONE);
1059 TEST_ASSERT(taut->getBondBetweenAtoms(1, 2)->getStereo() >
1060 Bond::BondStereo::STEREOANY);
1061 TEST_ASSERT(taut->getBondBetweenAtoms(4, 5)->getStereo() ==
1062 Bond::BondStereo::STEREONONE);
1063 }
1064 }
1065
1066 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
1067 }
1068
testPickCanonicalCIPChangeOnChiralCenter()1069 void testPickCanonicalCIPChangeOnChiralCenter() {
1070 BOOST_LOG(rdInfoLog)
1071 << "-----------------------\n testPickCanonicalCIPChangeOnChiralCenter"
1072 << std::endl;
1073
1074 struct CanonicalTaut {
1075 static ROMOL_SPTR get(const TautomerEnumeratorResult &res) {
1076 std::vector<int> scores;
1077 scores.reserve(res.size());
1078 std::transform(res.begin(), res.end(), std::back_inserter(scores),
1079 [](const ROMOL_SPTR &m) {
1080 return TautomerScoringFunctions::scoreTautomer(*m);
1081 });
1082 std::vector<size_t> indices(res.size());
1083 std::iota(indices.begin(), indices.end(), 0);
1084 int bestIdx =
1085 *std::max_element(indices.begin(), indices.end(),
1086 [scores](const size_t &a, const size_t &b) {
1087 if (scores.at(a) != scores.at(b)) {
1088 return (scores.at(a) < scores.at(b));
1089 }
1090 return (a < b);
1091 });
1092 TEST_ASSERT(*std::max_element(scores.begin(), scores.end()) ==
1093 scores.at(bestIdx));
1094 return res.at(bestIdx);
1095 }
1096 };
1097
1098 auto mol = "CC\\C=C(/O)[C@@H](C)C(C)=O"_smiles;
1099 TEST_ASSERT(mol.get());
1100 TEST_ASSERT(mol->getAtomWithIdx(5)->getChiralTag() ==
1101 Atom::CHI_TETRAHEDRAL_CW);
1102 TEST_ASSERT(mol->getAtomWithIdx(5)->getProp<std::string>(
1103 common_properties::_CIPCode) == "R");
1104 {
1105 // here the chirality disappears as the chiral center is itself involved in
1106 // tautomerism
1107 TautomerEnumerator te;
1108 ROMOL_SPTR canTaut(te.canonicalize(*mol));
1109 TEST_ASSERT(canTaut.get());
1110 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1111 Atom::CHI_UNSPECIFIED);
1112 TEST_ASSERT(
1113 !canTaut->getAtomWithIdx(5)->hasProp(common_properties::_CIPCode));
1114 TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)C(C)C(C)=O");
1115 }
1116 {
1117 // here the chirality stays even if the chiral center is itself involved in
1118 // tautomerism because of the tautomerRemoveSp3Stereo parameter being set to
1119 // false
1120 CleanupParameters params;
1121 params.tautomerRemoveSp3Stereo = false;
1122 TautomerEnumerator te(params);
1123 ROMOL_SPTR canTaut(te.canonicalize(*mol));
1124 TEST_ASSERT(canTaut.get());
1125 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1126 Atom::CHI_TETRAHEDRAL_CW);
1127 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getProp<std::string>(
1128 common_properties::_CIPCode) == "S");
1129 TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)[C@@H](C)C(C)=O");
1130 }
1131 {
1132 // here the chirality disappears as the chiral center is itself involved in
1133 // tautomerism; the reassignStereo setting has no influence
1134 TautomerEnumerator te;
1135 auto res = te.enumerate(*mol);
1136 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1137 TEST_ASSERT(res.size() == 8);
1138 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1139 TEST_ASSERT(bestTaut.get());
1140 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1141 Atom::CHI_UNSPECIFIED);
1142 TEST_ASSERT(
1143 !bestTaut->getAtomWithIdx(5)->hasProp(common_properties::_CIPCode));
1144 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)C(C)C(C)=O");
1145 }
1146 {
1147 // here the chirality disappears as the chiral center is itself involved in
1148 // tautomerism; the reassignStereo setting has no influence
1149 CleanupParameters params;
1150 params.tautomerReassignStereo = false;
1151 TautomerEnumerator te(params);
1152 auto res = te.enumerate(*mol);
1153 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1154 TEST_ASSERT(res.size() == 8);
1155 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1156 TEST_ASSERT(bestTaut.get());
1157 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1158 Atom::CHI_UNSPECIFIED);
1159 TEST_ASSERT(
1160 !bestTaut->getAtomWithIdx(5)->hasProp(common_properties::_CIPCode));
1161 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)C(C)C(C)=O");
1162 }
1163 {
1164 // here the chirality stays even if the chiral center is itself involved in
1165 // tautomerism because of the tautomerRemoveSp3Stereo parameter being set to
1166 // false. As reassignStereo by default is true, the CIP code has been
1167 // recomputed and therefore it is now S (correct)
1168 CleanupParameters params;
1169 params.tautomerRemoveSp3Stereo = false;
1170 TautomerEnumerator te(params);
1171 auto res = te.enumerate(*mol);
1172 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1173 TEST_ASSERT(res.size() == 8);
1174 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1175 TEST_ASSERT(bestTaut.get());
1176 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1177 Atom::CHI_TETRAHEDRAL_CW);
1178 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1179 common_properties::_CIPCode) == "S");
1180 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@@H](C)C(C)=O");
1181 }
1182 {
1183 // here the chirality stays even if the chiral center is itself involved in
1184 // tautomerism because of the tautomerRemoveSp3Stereo parameter being set to
1185 // false. As reassignStereo is false, the CIP code has not been recomputed
1186 // and therefore it is still R (incorrect)
1187 CleanupParameters params;
1188 params.tautomerRemoveSp3Stereo = false;
1189 params.tautomerReassignStereo = false;
1190 TautomerEnumerator te(params);
1191 auto res = te.enumerate(*mol);
1192 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1193 TEST_ASSERT(res.size() == 8);
1194 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1195 TEST_ASSERT(bestTaut.get());
1196 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1197 Atom::CHI_TETRAHEDRAL_CW);
1198 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1199 common_properties::_CIPCode) == "R");
1200 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@@H](C)C(C)=O");
1201 }
1202
1203 mol = "CC\\C=C(/O)[C@@](CC)(C)C(C)=O"_smiles;
1204 TEST_ASSERT(mol.get());
1205 TEST_ASSERT(mol->getAtomWithIdx(5)->getChiralTag() ==
1206 Atom::CHI_TETRAHEDRAL_CW);
1207 TEST_ASSERT(mol->getAtomWithIdx(5)->getProp<std::string>(
1208 common_properties::_CIPCode) == "S");
1209 // here the chirality stays no matter how tautomerRemoveSp3Stereo
1210 // is set as the chiral center is not involved in tautomerism
1211 {
1212 TautomerEnumerator te;
1213 ROMOL_SPTR canTaut(te.canonicalize(*mol));
1214 TEST_ASSERT(canTaut.get());
1215 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1216 Atom::CHI_TETRAHEDRAL_CW);
1217 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getProp<std::string>(
1218 common_properties::_CIPCode) == "R");
1219 TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1220 }
1221 {
1222 CleanupParameters params;
1223 params.tautomerRemoveSp3Stereo = false;
1224 TautomerEnumerator te(params);
1225 ROMOL_SPTR canTaut(te.canonicalize(*mol));
1226 TEST_ASSERT(canTaut.get());
1227 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1228 Atom::CHI_TETRAHEDRAL_CW);
1229 TEST_ASSERT(canTaut->getAtomWithIdx(5)->getProp<std::string>(
1230 common_properties::_CIPCode) == "R");
1231 TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1232 }
1233 {
1234 // as reassignStereo by default is true, the CIP code has been recomputed
1235 // and therefore it is now R (correct)
1236 TautomerEnumerator te;
1237 auto res = te.enumerate(*mol);
1238 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1239 TEST_ASSERT(res.size() == 4);
1240 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1241 TEST_ASSERT(bestTaut.get());
1242 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1243 Atom::CHI_TETRAHEDRAL_CW);
1244 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1245 common_properties::_CIPCode) == "R");
1246 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1247 }
1248 {
1249 // as reassignStereo is false, the CIP code has not been recomputed
1250 // and therefore it is still S (incorrect)
1251 CleanupParameters params;
1252 params.tautomerReassignStereo = false;
1253 TautomerEnumerator te(params);
1254 auto res = te.enumerate(*mol);
1255 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1256 TEST_ASSERT(res.size() == 4);
1257 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1258 TEST_ASSERT(bestTaut.get());
1259 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1260 Atom::CHI_TETRAHEDRAL_CW);
1261 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1262 common_properties::_CIPCode) == "S");
1263 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1264 }
1265 {
1266 // as reassignStereo by default is true, the CIP code has been recomputed
1267 // and therefore it is now R (correct)
1268 CleanupParameters params;
1269 params.tautomerRemoveSp3Stereo = false;
1270 TautomerEnumerator te(params);
1271 auto res = te.enumerate(*mol);
1272 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1273 TEST_ASSERT(res.size() == 4);
1274 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1275 TEST_ASSERT(bestTaut.get());
1276 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1277 Atom::CHI_TETRAHEDRAL_CW);
1278 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1279 common_properties::_CIPCode) == "R");
1280 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1281 }
1282 {
1283 // here the chirality stays even if the tautomerRemoveSp3Stereo parameter
1284 // is set to false as the chiral center is not involved in tautomerism.
1285 // As reassignStereo is false, the CIP code has not been recomputed
1286 // and therefore it is still S (incorrect)
1287 CleanupParameters params;
1288 params.tautomerRemoveSp3Stereo = false;
1289 params.tautomerReassignStereo = false;
1290 TautomerEnumerator te(params);
1291 auto res = te.enumerate(*mol);
1292 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1293 TEST_ASSERT(res.size() == 4);
1294 ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1295 TEST_ASSERT(bestTaut.get());
1296 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1297 Atom::CHI_TETRAHEDRAL_CW);
1298 TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1299 common_properties::_CIPCode) == "S");
1300 TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1301 }
1302
1303 BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
1304 }
1305
testTautomerEnumeratorResult_const_iterator()1306 void testTautomerEnumeratorResult_const_iterator() {
1307 BOOST_LOG(rdInfoLog)
1308 << "-----------------------\n testTautomerEnumeratorResult_const_iterator"
1309 << std::endl;
1310 // CHEMBL3480964
1311 RWMOL_SPTR mol = "Cc1nnc(NC(=O)N2CCN(Cc3ccc(F)cc3)C(=O)C2)s1"_smiles;
1312 TautomerEnumerator te;
1313 auto res = te.enumerate(*mol);
1314 TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1315 TEST_ASSERT(res.size() == 12);
1316 auto it = res.begin();
1317 auto it2 = res.begin();
1318 // Test semantic requirements of bidirectional_iterator
1319 // https://en.cppreference.com/w/cpp/iterator/bidirectional_iterator
1320 TEST_ASSERT(it == it2);
1321 TEST_ASSERT(it++ == it2);
1322 TEST_ASSERT(it == ++it2);
1323 TEST_ASSERT(it == it2);
1324 TEST_ASSERT(it-- == it2);
1325 TEST_ASSERT(it == --it2);
1326 TEST_ASSERT(it == it2);
1327 ++it;
1328 ++it2;
1329 TEST_ASSERT(++(--it) == it2);
1330 TEST_ASSERT(--(++it) == it2);
1331 TEST_ASSERT(std::addressof(--it) == std::addressof(it));
1332 ++it;
1333 TEST_ASSERT(it == it2);
1334 it--;
1335 --it2;
1336 TEST_ASSERT(it == it2);
1337 TEST_ASSERT(*it == res[0]);
1338 TEST_ASSERT(*it++ == res.at(0));
1339 TEST_ASSERT(*it == res[1]);
1340 TEST_ASSERT(*++it == res.at(2));
1341 TEST_ASSERT(*it == res[2]);
1342 ++it;
1343 TEST_ASSERT(*it == res[3]);
1344 ++it;
1345 TEST_ASSERT(*it == res[4]);
1346 it++;
1347 TEST_ASSERT(*it == res[5]);
1348 TEST_ASSERT(*it-- == res.at(5));
1349 TEST_ASSERT(*it == res[4]);
1350 TEST_ASSERT(*--it == res.at(3));
1351 TEST_ASSERT(*it == res[3]);
1352 --it;
1353 TEST_ASSERT(*it == res[2]);
1354 --it;
1355 TEST_ASSERT(*it == res[1]);
1356 it--;
1357 TEST_ASSERT(*it == res[0]);
1358 std::ptrdiff_t i = 0;
1359 for (auto t : res) {
1360 TEST_ASSERT(t == res[i++]);
1361 }
1362 i = 0;
1363 for (auto it = res.begin(); it != res.end(); ++it) {
1364 TEST_ASSERT(std::distance(res.begin(), it) == i);
1365 TEST_ASSERT(*it == res[i]);
1366 TEST_ASSERT(it->getNumAtoms() == res[i++]->getNumAtoms());
1367 }
1368 i = res.size();
1369 for (auto it = res.end(); it != res.begin();) {
1370 TEST_ASSERT(std::distance(res.begin(), it) == i);
1371 TEST_ASSERT(*--it == res[--i]);
1372 TEST_ASSERT(it->getNumAtoms() == res[i]->getNumAtoms());
1373 }
1374 i = 0;
1375 for (const auto &pair : res.smilesTautomerMap()) {
1376 TEST_ASSERT(pair.first == MolToSmiles(*res[i]));
1377 TEST_ASSERT(pair.second.tautomer == res[i++]);
1378 }
1379 i = 0;
1380 for (auto it = res.smilesTautomerMap().begin();
1381 it != res.smilesTautomerMap().end(); ++it) {
1382 TEST_ASSERT(std::distance(res.smilesTautomerMap().begin(), it) == i);
1383 TEST_ASSERT(it->first == MolToSmiles(*res[i]));
1384 TEST_ASSERT(it->second.tautomer == res[i++]);
1385 }
1386 i = res.smilesTautomerMap().size();
1387 for (auto it = res.smilesTautomerMap().end();
1388 it != res.smilesTautomerMap().begin();) {
1389 TEST_ASSERT(std::distance(res.smilesTautomerMap().begin(), it) == i);
1390 TEST_ASSERT((--it)->first == MolToSmiles(*res[--i]));
1391 TEST_ASSERT(it->second.tautomer == res[i]);
1392 }
1393 }
1394
testGithub3430()1395 void testGithub3430() {
1396 BOOST_LOG(rdInfoLog) << "-----------------------\n testGithub3430"
1397 << std::endl;
1398 // The "guanidine terminal=N" rule should not apply to aromatic C
1399 // as this balances the "aromatic C = exocyclic N" rule with no net
1400 // effect on the score
1401 std::vector<ROMOL_SPTR> mols{"Cc1ccc(NC(=O)N=c2[nH]c(C)cn2C)nc1"_smiles,
1402 "CCCCC(=O)N=c1nc(C)c2ncn(C)c2[nH]1"_smiles,
1403 "c12ccccc1[nH]c(=N)[nH]2"_smiles};
1404 for (auto mol : mols) {
1405 TEST_ASSERT(mol);
1406 TautomerEnumerator te;
1407 auto res = te.enumerate(*mol);
1408 std::vector<int> scores;
1409 scores.reserve(res.size());
1410 std::transform(res.begin(), res.end(), std::back_inserter(scores),
1411 [](const ROMOL_SPTR &m) {
1412 return TautomerScoringFunctions::scoreTautomer(*m);
1413 });
1414 std::sort(scores.begin(), scores.end(), std::greater<int>());
1415 TEST_ASSERT(scores[1] < scores[0]);
1416 }
1417 }
1418
testGithub3755()1419 void testGithub3755() {
1420 BOOST_LOG(rdInfoLog) << "-----------------------\n testGithub3755"
1421 << std::endl;
1422 // hydrates, aminals and hemiaminals should be scored lower than
1423 // carboxylic acids, amides, amidines, and guanidines
1424 std::vector<std::pair<std::string, std::string>> orig_vs_expected{
1425 {"OC(=O)C(N)CO", "NC(CO)C(=O)O"}, {"C([C@@H](C(=O)O)N)O", "NC(CO)C(=O)O"},
1426 {"OC(=O)C(N)CN", "NCC(N)C(=O)O"}, {"NC(=O)C(N)CO", "NC(=O)C(N)CO"},
1427 {"NC(=N)C(N)CO", "N=C(N)C(N)CO"}, {"NC(=N)NC(N)CO", "N=C(N)NC(N)CO"}};
1428 TautomerEnumerator te;
1429 for (const auto &pair : orig_vs_expected) {
1430 ROMOL_SPTR orig(SmilesToMol(pair.first));
1431 TEST_ASSERT(orig);
1432 ROMOL_SPTR canonical(te.canonicalize(*orig));
1433 TEST_ASSERT(MolToSmiles(*canonical) == pair.second);
1434 }
1435 }
1436
main()1437 int main() {
1438 RDLog::InitLogs();
1439 #if 1
1440 testEnumerator();
1441 testEnumeratorParams();
1442 testEnumeratorCallback();
1443 testCanonicalize();
1444 testPickCanonical();
1445 testCustomScoreFunc();
1446 testEnumerationProblems();
1447 #endif
1448 testPickCanonical2();
1449 testEnumerateDetails();
1450 testGithub2990();
1451 testPickCanonicalCIPChangeOnChiralCenter();
1452 testTautomerEnumeratorResult_const_iterator();
1453 testGithub3430();
1454 testGithub3755();
1455 return 0;
1456 }
1457