1 //
2 //  Copyright (C) 2018 Susan H. Leung
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include "Tautomer.h"
11 #include <GraphMol/RDKitBase.h>
12 #include <GraphMol/SmilesParse/SmilesParse.h>
13 #include <GraphMol/SmilesParse/SmilesWrite.h>
14 #include <chrono>
15 #include <ctime>
16 
17 using namespace RDKit;
18 using namespace MolStandardize;
19 
testEnumerator()20 void testEnumerator() {
21   BOOST_LOG(rdInfoLog)
22       << "-----------------------\n Testing tautomer enumeration" << std::endl;
23 
24   std::string rdbase = getenv("RDBASE");
25   std::string tautomerFile =
26       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
27   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
28       new TautomerCatalogParams(tautomerFile));
29 
30   // DEPRECATED, remove from here in release 2021.01
31   {
32     unsigned int ntransforms = tautparams->getNumTautomers();
33     TEST_ASSERT(ntransforms == 36);
34   }
35   // DEPRECATED, remove until here in release 2021.01
36 
37   unsigned int ntransforms = tautparams->getTransforms().size();
38   TEST_ASSERT(ntransforms == 36);
39 
40   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
41 
42   std::function<void(const std::string &, const std::vector<std::string> &)>
43       checkAns([te](const std::string &smi,
44                     const std::vector<std::string> &ans) {
45         ROMOL_SPTR m(SmilesToMol(smi));
46         TautomerEnumeratorResult res = te.enumerate(*m);
47         TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
48         size_t sz = std::max(res.size(), ans.size());
49         bool exceedingTautomer = false;
50         bool missingTautomer = false;
51         bool wrongTautomer = false;
52         for (size_t i = 0; i < sz; ++i) {
53           if (i >= res.size()) {
54             missingTautomer = true;
55             std::cerr << "missingTautomer, ans " << ans[i] << std::endl;
56           } else if (i >= ans.size()) {
57             exceedingTautomer = true;
58             std::cerr << "exceedingTautomer, taut " << MolToSmiles(*res.at(i))
59                       << std::endl;
60           } else if (MolToSmiles(*res[i]) != ans[i]) {
61             wrongTautomer = true;
62             std::cerr << "wrongTautomer, taut " << MolToSmiles(*res[i])
63                       << ", ans " << ans[i] << std::endl;
64           }
65         }
66         TEST_ASSERT(!(missingTautomer || exceedingTautomer || wrongTautomer));
67       });
68 
69   // Enumerate 1,3 keto/enol tautomer.
70   checkAns("C1(=CCCCC1)O", {"O=C1CCCCC1", "OC1=CCCCC1"});
71 
72   // Enumerate 1,3 keto/enol tautomer.
73   checkAns("C1(=CCCCC1)O", {"O=C1CCCCC1", "OC1=CCCCC1"});
74 
75   // Enumerate 1,3 keto/enol tautomer.
76   checkAns("C1(CCCCC1)=O", {"O=C1CCCCC1", "OC1=CCCCC1"});
77 
78   // Enumerate acetophenone keto/enol tautomer.
79   checkAns("C(=C)(O)C1=CC=CC=C1", {"C=C(O)c1ccccc1", "CC(=O)c1ccccc1"});
80 
81   // Enumerate acetone keto/enol tautomer.
82   checkAns("CC(C)=O", {"C=C(C)O", "CC(C)=O"});
83 
84   // keto/enol tautomer
85   checkAns("OC(C)=C(C)C", {"C=C(O)C(C)C", "CC(=O)C(C)C", "CC(C)=C(C)O"});
86 
87   // 1-phenyl-2-propanone enol/keto
88   checkAns("c1(ccccc1)CC(=O)C",
89            {"C=C(O)Cc1ccccc1", "CC(=O)Cc1ccccc1", "CC(O)=Cc1ccccc1"});
90 
91   // 1,5 keto/enol tautomer
92   checkAns("Oc1nccc2cc[nH]c(=N)c12",
93            {"N=c1[nH]ccc2cc[nH]c(=O)c12", "N=c1[nH]ccc2ccnc(O)c12",
94             "N=c1nccc2cc[nH]c(O)c1-2", "Nc1[nH]ccc2ccnc(=O)c1-2",
95             "Nc1nccc2cc[nH]c(=O)c12", "Nc1nccc2ccnc(O)c12"});
96 
97   // 1,5 keto/enol tautomer
98   checkAns("C1(C=CCCC1)=O", {"O=C1C=CCCC1", "O=C1CC=CCC1", "OC1=CC=CCC1",
99                              "OC1=CCC=CC1", "OC1=CCCC=C1"});
100 
101   // 1,5 keto/enol tautomer
102   checkAns("C1(=CC=CCC1)O", {"O=C1C=CCCC1", "O=C1CC=CCC1", "OC1=CC=CCC1",
103                              "OC1=CCC=CC1", "OC1=CCCC=C1"});
104 
105   // aliphatic imine tautomer
106   checkAns("C1(CCCCC1)=N", {"N=C1CCCCC1", "NC1=CCCCC1"});
107 
108   // aliphatic imine tautomer
109   checkAns("C1(=CCCCC1)N", {"N=C1CCCCC1", "NC1=CCCCC1"});
110 
111   // special imine tautomer
112   checkAns("C1(C=CC=CN1)=CC", {"CC=C1C=CC=CN1", "CC=C1C=CCC=N1", "CCc1ccccn1"});
113 
114   // special imine tautomer
115   checkAns("C1(=NC=CC=C1)CC", {"CC=C1C=CC=CN1", "CC=C1C=CCC=N1", "CCc1ccccn1"});
116 
117   // 1,3 aromatic heteroatom H shift
118   checkAns("O=c1cccc[nH]1", {"O=c1cccc[nH]1", "Oc1ccccn1"});
119 
120   // 1,3 aromatic heteroatom H shift
121   checkAns("Oc1ccccn1", {"O=c1cccc[nH]1", "Oc1ccccn1"});
122 
123   // 1,3 aromatic heteroatom H shift
124   checkAns("Oc1ncc[nH]1", {"O=c1[nH]cc[nH]1", "Oc1ncc[nH]1"});
125 
126   // 1,3 heteroatom H shift
127   checkAns("OC(C)=NC", {"C=C(O)NC", "CN=C(C)O", "CNC(C)=O"});
128 
129   // 1,3 heteroatom H shift
130   checkAns("CNC(C)=O", {"C=C(O)NC", "CN=C(C)O", "CNC(C)=O"});
131 
132   // 1,3 heteroatom H shift
133   checkAns("S=C(N)N", {"N=C(N)S", "NC(N)=S"});
134 
135   // 1,3 heteroatom H shift
136   checkAns("SC(N)=N", {"N=C(N)S", "NC(N)=S"});
137 
138   // 1,3 heteroatom H shift
139   checkAns("N=c1[nH]ccn(C)1", {"Cn1cc[nH]c1=N", "Cn1ccnc1N"});
140 
141   // 1,3 heteroatom H shift
142   checkAns("CN=c1[nH]cncc1",
143            {"CN=c1cc[nH]cn1", "CN=c1ccnc[nH]1", "CNc1ccncn1"});
144 
145   // 1,5 aromatic heteroatom H shift
146   checkAns("Oc1cccc2ccncc12", {"O=c1cccc2cc[nH]cc1-2", "Oc1cccc2ccncc12"});
147 
148   // 1,5 aromatic heteroatom H shift
149   checkAns("O=c1cccc2cc[nH]cc1-2", {"O=c1cccc2cc[nH]cc1-2", "Oc1cccc2ccncc12"});
150 
151   // 1,5 aromatic heteroatom H shift
152   checkAns("Cc1n[nH]c2ncnn12",
153            {"C=C1NN=C2N=CNN12", "C=C1NN=C2NC=NN12", "C=C1NNc2ncnn21",
154             "Cc1n[nH]c2ncnn12", "Cc1nnc2[nH]cnn12", "Cc1nnc2nc[nH]n12"});
155 
156   // 1,5 aromatic heteroatom H shift
157   checkAns("Cc1nnc2nc[nH]n12",
158            {"C=C1NN=C2N=CNN12", "C=C1NN=C2NC=NN12", "C=C1NNc2ncnn21",
159             "Cc1n[nH]c2ncnn12", "Cc1nnc2[nH]cnn12", "Cc1nnc2nc[nH]n12"});
160 
161   // 1,5 aromatic heteroatom H shift
162   checkAns("Oc1ccncc1", {"O=c1cc[nH]cc1", "Oc1ccncc1"});
163 
164   // 1,5 aromatic heteroatom H shift
165   checkAns("Oc1c(cccc3)c3nc2ccncc12",
166            {"O=c1c2c[nH]ccc-2nc2ccccc12", "O=c1c2ccccc2[nH]c2ccncc12",
167             "Oc1c2ccccc2nc2ccncc12"});
168 
169   // 1,3 and 1,5 aromatic heteroatom H shift
170   checkAns("Oc1ncncc1", {"O=c1cc[nH]cn1", "O=c1ccnc[nH]1", "Oc1ccncn1"});
171 
172   // 1,5 aromatic heteroatom H shift
173   checkAns("C2(=C1C(=NC=N1)[NH]C(=N2)N)O",
174            {"N=c1[nH]c(=O)c2[nH]cnc2[nH]1", "N=c1[nH]c(=O)c2nc[nH]c2[nH]1",
175             "N=c1[nH]c2ncnc-2c(O)[nH]1", "N=c1nc(O)c2[nH]cnc2[nH]1",
176             "N=c1nc(O)c2nc[nH]c2[nH]1", "N=c1nc2[nH]cnc2c(O)[nH]1",
177             "N=c1nc2nc[nH]c2c(O)[nH]1", "Nc1nc(=O)c2[nH]cnc2[nH]1",
178             "Nc1nc(=O)c2nc[nH]c2[nH]1", "Nc1nc(O)c2[nH]cnc2n1",
179             "Nc1nc(O)c2nc[nH]c2n1", "Nc1nc(O)c2ncnc-2[nH]1",
180             "Nc1nc2[nH]cnc2c(=O)[nH]1", "Nc1nc2nc[nH]c2c(=O)[nH]1",
181             "Nc1nc2ncnc-2c(O)[nH]1"});
182 
183   // 1,5 aromatic heteroatom H shift
184   checkAns("C2(C1=C([NH]C=N1)[NH]C(=N2)N)=O",
185            {"N=c1[nH]c(=O)c2[nH]cnc2[nH]1", "N=c1[nH]c(=O)c2nc[nH]c2[nH]1",
186             "N=c1[nH]c2ncnc-2c(O)[nH]1", "N=c1nc(O)c2[nH]cnc2[nH]1",
187             "N=c1nc(O)c2nc[nH]c2[nH]1", "N=c1nc2[nH]cnc2c(O)[nH]1",
188             "N=c1nc2nc[nH]c2c(O)[nH]1", "Nc1nc(=O)c2[nH]cnc2[nH]1",
189             "Nc1nc(=O)c2nc[nH]c2[nH]1", "Nc1nc(O)c2[nH]cnc2n1",
190             "Nc1nc(O)c2nc[nH]c2n1", "Nc1nc(O)c2ncnc-2[nH]1",
191             "Nc1nc2[nH]cnc2c(=O)[nH]1", "Nc1nc2nc[nH]c2c(=O)[nH]1",
192             "Nc1nc2ncnc-2c(O)[nH]1"});
193 
194   // 1,5 aromatic heteroatom H shift
195   checkAns("Oc1n(C)ncc1", {"CN1N=CCC1=O", "Cn1[nH]ccc1=O", "Cn1nccc1O"});
196 
197   // 1,5 aromatic heteroatom H shift
198   checkAns("O=c1nc2[nH]ccn2cc1",
199            {"O=c1ccn2cc[nH]c2n1", "O=c1ccn2ccnc2[nH]1", "Oc1ccn2ccnc2n1"});
200 
201   // 1,5 aromatic heteroatom H shift
202   checkAns("N=c1nc[nH]cc1", {"N=c1cc[nH]cn1", "N=c1ccnc[nH]1", "Nc1ccncn1"});
203 
204   // 1,5 aromatic heteroatom H shift
205   checkAns("N=c(c1)ccn2cc[nH]c12", {"N=c1ccn2cc[nH]c2c1", "Nc1ccn2ccnc2c1"});
206 
207   // 1,5 aromatic heteroatom H shift
208   checkAns("CN=c1nc[nH]cc1",
209            {"CN=c1cc[nH]cn1", "CN=c1ccnc[nH]1", "CNc1ccncn1"});
210 
211   // 1,7 aromatic heteroatom H shift
212   checkAns("c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
213            {"c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
214             "c1ccc2c(c1)=NC(c1nc3ccccc3[nH]1)N=2",
215             "c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2"});
216 
217   // 1,7 aromatic heteroatom H shift
218   checkAns("c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2",
219            {"c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
220             "c1ccc2c(c1)=NC(c1nc3ccccc3[nH]1)N=2",
221             "c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2"});
222 
223   // 1,9 aromatic heteroatom H shift
224   checkAns("CNc1ccnc2ncnn21", {"CN=c1cc[nH]c2ncnn12", "CN=c1ccnc2[nH]cnn12",
225                                "CN=c1ccnc2nc[nH]n12", "CNc1ccnc2ncnn12"});
226 
227   // 1,9 aromatic heteroatom H shift
228   checkAns("CN=c1ccnc2nc[nH]n21", {"CN=c1cc[nH]c2ncnn12", "CN=c1ccnc2[nH]cnn12",
229                                    "CN=c1ccnc2nc[nH]n12", "CNc1ccnc2ncnn12"});
230 
231   // 1,11 aromatic heteroatom H shift
232   checkAns("Nc1ccc(C=C2C=CC(=O)C=C2)cc1",
233            {"N=C1C=CC(=CC2C=CC(=O)C=C2)C=C1", "N=C1C=CC(=Cc2ccc(O)cc2)C=C1",
234             "N=C1C=CC(C=C2C=CC(=O)C=C2)C=C1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"});
235 
236   // 1,11 aromatic heteroatom H shift
237   checkAns("N=C1C=CC(=Cc2ccc(O)cc2)C=C1",
238            {"N=C1C=CC(=CC2C=CC(=O)C=C2)C=C1", "N=C1C=CC(=Cc2ccc(O)cc2)C=C1",
239             "N=C1C=CC(C=C2C=CC(=O)C=C2)C=C1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"});
240 
241   // heterocyclic tautomer
242   checkAns("n1ccc2ccc[nH]c12", {"c1c[nH]c2nccc-2c1", "c1cnc2[nH]ccc2c1"});
243 
244   // heterocyclic tautomer
245   checkAns("c1cc(=O)[nH]c2nccn12",
246            {"O=c1ccn2cc[nH]c2n1", "O=c1ccn2ccnc2[nH]1", "Oc1ccn2ccnc2n1"});
247 
248   // heterocyclic tautomer
249   checkAns("c1cnc2c[nH]ccc12", {"c1cc2cc[nH]c2cn1", "c1cc2cc[nH]cc-2n1"});
250 
251   // heterocyclic tautomer
252   checkAns("n1ccc2c[nH]ccc12", {"c1cc2[nH]ccc2cn1", "c1cc2c[nH]ccc-2n1"});
253 
254   // heterocyclic tautomer
255   checkAns("c1cnc2ccc[nH]c12", {"c1c[nH]c2ccnc-2c1", "c1cnc2cc[nH]c2c1"});
256 
257   // furanone tautomer
258   checkAns("C1=CC=C(O1)O", {"O=C1CC=CO1", "Oc1ccco1"});
259 
260   // furanone tautomer
261   checkAns("O=C1CC=CO1", {"O=C1CC=CO1", "Oc1ccco1"});
262 
263   // keten/ynol tautomer
264   checkAns("CC=C=O", {"CC#CO", "CC=C=O"});
265 
266   // keten/ynol tautomer
267   checkAns("CC#CO", {"CC#CO", "CC=C=O"});
268 
269   // ionic nitro/aci-nitro tautomer
270   checkAns("C([N+](=O)[O-])C", {"CC=[N+]([O-])O", "CC[N+](=O)[O-]"});
271 
272   // ionic nitro/aci-nitro tautomer
273   checkAns("C(=[N+](O)[O-])C", {"CC=[N+]([O-])O", "CC[N+](=O)[O-]"});
274 
275   // oxim nitroso tautomer
276   checkAns("CC(C)=NO", {"C=C(C)NO", "CC(C)=NO", "CC(C)N=O"});
277 
278   // oxim nitroso tautomer
279   checkAns("CC(C)N=O", {"C=C(C)NO", "CC(C)=NO", "CC(C)N=O"});
280 
281   // oxim/nitroso tautomer via phenol
282   checkAns("O=Nc1ccc(O)cc1",
283            {"O=C1C=CC(=NO)C=C1", "O=NC1C=CC(=O)C=C1", "O=Nc1ccc(O)cc1"});
284 
285   // oxim/nitroso tautomer via phenol
286   checkAns("O=C1C=CC(=NO)C=C1",
287            {"O=C1C=CC(=NO)C=C1", "O=NC1C=CC(=O)C=C1", "O=Nc1ccc(O)cc1"});
288 
289   // cyano/iso-cyanic acid tautomer
290   checkAns("C(#N)O", {"N#CO", "N=C=O"});
291 
292   // cyano/iso-cyanic acid tautomer
293   checkAns("C(=N)=O", {"N#CO", "N=C=O"});
294 
295   // formamidinesulfinic acid tautomer
296   checkAns("NC(N)=S(=O)=O",
297            {"N=C(N)S(=O)O", "N=C(N)[SH](=O)=O", "NC(N)=S(=O)=O"});
298 
299   // formamidinesulfinic acid tautomer
300   checkAns("NC(=N)S(=O)O",
301            {"N=C(N)S(=O)O", "N=C(N)[SH](=O)=O", "NC(N)=S(=O)=O"});
302 
303   // formamidinesulfonic acid tautomer
304   checkAns("NC(=N)S(=O)(=O)O", {"N=C(N)S(=O)(=O)O"});
305 
306   // isocyanide tautomer
307   checkAns("C#N", {"C#N", "[C-]#[NH+]"});
308 
309   // isocyanide tautomer
310   checkAns("[C-]#[NH+]", {"C#N", "[C-]#[NH+]"});
311 
312   // phosphonic acid tautomer
313   checkAns("[PH](=O)(O)(O)", {"O=[PH](O)O", "OP(O)O"});
314 
315   // phosphonic acid tautomer
316   checkAns("P(O)(O)O", {"O=[PH](O)O", "OP(O)O"});
317 
318   // Remove stereochemistry from mobile double bonds
319   checkAns("c1(ccccc1)/C=C(/O)\\C",
320            {"C=C(O)Cc1ccccc1", "CC(=O)Cc1ccccc1", "CC(O)=Cc1ccccc1"});
321 
322   // Remove stereochemistry from mobile double bonds
323   checkAns("C/C=C/C(C)=O", {"C=C(O)C=CC", "C=CC=C(C)O", "C=CCC(=C)O",
324                             "C=CCC(C)=O", "CC=CC(C)=O"});
325 
326   // Remove stereochemistry from mobile double bonds
327   std::string smi66 = "C/C=C\\C(C)=O";
328   ROMOL_SPTR m66(SmilesToMol(smi66));
329   TautomerEnumeratorResult res66 = te.enumerate(*m66);
330   std::vector<std::string> ans66 = {"C=C(O)C=CC", "C=CC=C(C)O", "C=CCC(=C)O",
331                                     "C=CCC(C)=O", "CC=CC(C)=O"};
332   TEST_ASSERT(res66.size() == ans66.size());
333   TEST_ASSERT(res66.status() == TautomerEnumeratorStatus::Completed);
334 
335   std::vector<std::string> sm66;
336   for (const auto &r : res66) {
337     sm66.push_back(MolToSmiles(*r));
338   }
339   // sort both for alphabetical order
340   std::sort(sm66.begin(), sm66.end());
341   std::sort(ans66.begin(), ans66.end());
342   TEST_ASSERT(sm66 == ans66);
343 
344   // Gaunine tautomers
345   std::string smi67 = "N1C(N)=NC=2N=CNC2C1=O";
346   ROMOL_SPTR m67(SmilesToMol(smi67));
347   TautomerEnumeratorResult res67 = te.enumerate(*m67);
348   std::vector<std::string> ans67 = {
349       "N=c1[nH]c(=O)c2[nH]cnc2[nH]1", "N=c1[nH]c(=O)c2nc[nH]c2[nH]1",
350       "N=c1[nH]c2ncnc-2c(O)[nH]1",    "N=c1nc(O)c2[nH]cnc2[nH]1",
351       "N=c1nc(O)c2nc[nH]c2[nH]1",     "N=c1nc2[nH]cnc2c(O)[nH]1",
352       "N=c1nc2nc[nH]c2c(O)[nH]1",     "Nc1nc(=O)c2[nH]cnc2[nH]1",
353       "Nc1nc(=O)c2nc[nH]c2[nH]1",     "Nc1nc(O)c2[nH]cnc2n1",
354       "Nc1nc(O)c2nc[nH]c2n1",         "Nc1nc(O)c2ncnc-2[nH]1",
355       "Nc1nc2[nH]cnc2c(=O)[nH]1",     "Nc1nc2nc[nH]c2c(=O)[nH]1",
356       "Nc1nc2ncnc-2c(O)[nH]1"};
357   TEST_ASSERT(res67.size() == ans67.size());
358   TEST_ASSERT(res67.status() == TautomerEnumeratorStatus::Completed);
359   std::vector<std::string> sm67;
360   for (const auto &r : res67) {
361     sm67.push_back(MolToSmiles(*r));
362   }
363   // sort both by alphabetical order
364   std::sort(sm67.begin(), sm67.end());
365   std::sort(ans67.begin(), ans67.end());
366   TEST_ASSERT(sm67 == ans67);
367 
368   // Test a structure with hundreds of tautomers.
369   std::string smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O";
370   ROMOL_SPTR m68(SmilesToMol(smi68));
371   TautomerEnumeratorResult res68 = te.enumerate(*m68);
372   // the maxTransforms limit is hit before the maxTautomers one
373   TEST_ASSERT(res68.size() == 292);
374   TEST_ASSERT(res68.status() == TautomerEnumeratorStatus::MaxTransformsReached);
375   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
376 }
377 
testEnumeratorParams()378 void testEnumeratorParams() {
379   BOOST_LOG(rdInfoLog)
380       << "-----------------------\n Testing TautomerEnumerator params"
381       << std::endl;
382 
383   // Test a structure with hundreds of tautomers.
384   std::string smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O";
385   ROMOL_SPTR m68(SmilesToMol(smi68));
386 
387   {
388     TautomerEnumerator te;
389     TautomerEnumeratorResult res68 = te.enumerate(*m68);
390     TEST_ASSERT(res68.size() == 292);
391     TEST_ASSERT(res68.status() ==
392                 TautomerEnumeratorStatus::MaxTransformsReached);
393   }
394   {
395     CleanupParameters params;
396     params.maxTautomers = 50;
397     TautomerEnumerator te(params);
398     TautomerEnumeratorResult res68 = te.enumerate(*m68);
399     TEST_ASSERT(res68.size() == 50);
400     TEST_ASSERT(res68.status() ==
401                 TautomerEnumeratorStatus::MaxTautomersReached);
402   }
403   std::string sAlaSmi = "C[C@H](N)C(=O)O";
404   ROMOL_SPTR sAla(SmilesToMol(sAlaSmi));
405   {
406     // test remove (S)-Ala stereochemistry
407     TEST_ASSERT(sAla->getAtomWithIdx(1)->getChiralTag() ==
408                 Atom::CHI_TETRAHEDRAL_CCW);
409     TEST_ASSERT(sAla->getAtomWithIdx(1)->getProp<std::string>(
410                     common_properties::_CIPCode) == "S");
411     CleanupParameters params;
412     params.tautomerRemoveSp3Stereo = true;
413     TautomerEnumerator te(params);
414     TautomerEnumeratorResult res = te.enumerate(*sAla);
415     for (const auto &taut : res) {
416       TEST_ASSERT(taut->getAtomWithIdx(1)->getChiralTag() ==
417                   Atom::CHI_UNSPECIFIED);
418       TEST_ASSERT(
419           !taut->getAtomWithIdx(1)->hasProp(common_properties::_CIPCode));
420     }
421   }
422   {
423     // test retain (S)-Ala stereochemistry
424     TEST_ASSERT(sAla->getAtomWithIdx(1)->getChiralTag() ==
425                 Atom::CHI_TETRAHEDRAL_CCW);
426     TEST_ASSERT(sAla->getAtomWithIdx(1)->getProp<std::string>(
427                     common_properties::_CIPCode) == "S");
428     CleanupParameters params;
429     params.tautomerRemoveSp3Stereo = false;
430     TautomerEnumerator te(params);
431     TautomerEnumeratorResult res = te.enumerate(*sAla);
432     for (const auto &taut : res) {
433       const auto tautAtom = taut->getAtomWithIdx(1);
434       if (tautAtom->getHybridization() == Atom::SP3) {
435         TEST_ASSERT(tautAtom->hasProp(common_properties::_CIPCode));
436         TEST_ASSERT(
437             tautAtom->getProp<std::string>(common_properties::_CIPCode) == "S");
438         TEST_ASSERT(tautAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
439       } else {
440         TEST_ASSERT(!tautAtom->hasProp(common_properties::_CIPCode));
441         TEST_ASSERT(tautAtom->getChiralTag() == Atom::CHI_UNSPECIFIED);
442       }
443     }
444   }
445   std::string eEnolSmi = "C/C=C/O";
446   ROMOL_SPTR eEnol(SmilesToMol(eEnolSmi));
447   TEST_ASSERT(eEnol->getBondWithIdx(1)->getStereo() == Bond::STEREOE);
448   {
449     // test remove enol E stereochemistry
450     CleanupParameters params;
451     params.tautomerRemoveBondStereo = true;
452     TautomerEnumerator te(params);
453     TautomerEnumeratorResult res = te.enumerate(*eEnol);
454     for (const auto &taut : res) {
455       TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREONONE);
456     }
457   }
458   {
459     // test retain enol E stereochemistry
460     CleanupParameters params;
461     params.tautomerRemoveBondStereo = false;
462     TautomerEnumerator te(params);
463     TautomerEnumeratorResult res = te.enumerate(*eEnol);
464     for (const auto &taut : res) {
465       if (taut->getBondWithIdx(1)->getBondType() == Bond::DOUBLE) {
466         TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREOE);
467       }
468     }
469   }
470   ROMOL_SPTR zEnol = "C/C=C\\O"_smiles;
471   TEST_ASSERT(zEnol->getBondWithIdx(1)->getStereo() == Bond::STEREOZ);
472   {
473     // test remove enol Z stereochemistry
474     CleanupParameters params;
475     params.tautomerRemoveBondStereo = true;
476     TautomerEnumerator te(params);
477     TautomerEnumeratorResult res = te.enumerate(*zEnol);
478     for (const auto &taut : res) {
479       TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREONONE);
480     }
481   }
482   {
483     // test retain enol Z stereochemistry
484     CleanupParameters params;
485     params.tautomerRemoveBondStereo = false;
486     TautomerEnumerator te(params);
487     TautomerEnumeratorResult res = te.enumerate(*zEnol);
488     for (const auto &taut : res) {
489       if (taut->getBondWithIdx(1)->getBondType() == Bond::DOUBLE) {
490         TEST_ASSERT(taut->getBondWithIdx(1)->getStereo() == Bond::STEREOZ);
491       }
492     }
493   }
494   ROMOL_SPTR chembl2024142 =
495       "[2H]C1=C(C(=C2C(=C1[2H])C(=O)C(=C(C2=O)C([2H])([2H])[2H])C/C=C(\\C)/CC([2H])([2H])/C=C(/CC/C=C(\\C)/CCC=C(C)C)\\C([2H])([2H])[2H])[2H])[2H]"_smiles;
496   MolOps::RemoveHsParameters hparams;
497   hparams.removeAndTrackIsotopes = true;
498   chembl2024142.reset(MolOps::removeHs(*chembl2024142, hparams));
499   TEST_ASSERT(chembl2024142->getAtomWithIdx(12)->hasProp(
500       common_properties::_isotopicHs));
501   {
502     // test remove isotopic Hs involved in tautomerism
503     CleanupParameters params;
504     params.tautomerRemoveIsotopicHs = true;
505     TautomerEnumerator te(params);
506     TautomerEnumeratorResult res = te.enumerate(*chembl2024142);
507     for (const auto &taut : res) {
508       const auto tautAtom = taut->getAtomWithIdx(12);
509       TEST_ASSERT(!tautAtom->hasProp(common_properties::_isotopicHs));
510     }
511   }
512   {
513     // test retain isotopic Hs involved in tautomerism
514     CleanupParameters params;
515     params.tautomerRemoveIsotopicHs = false;
516     TautomerEnumerator te(params);
517     TautomerEnumeratorResult res = te.enumerate(*chembl2024142);
518     for (const auto &taut : res) {
519       const auto tautAtom = taut->getAtomWithIdx(12);
520       TEST_ASSERT(tautAtom->hasProp(common_properties::_isotopicHs));
521     }
522   }
523   ROMOL_SPTR enolexample = "[2H]OC=C"_smiles;
524   enolexample.reset(MolOps::removeHs(*enolexample, hparams));
525   TEST_ASSERT(
526       enolexample->getAtomWithIdx(0)->hasProp(common_properties::_isotopicHs));
527   {
528     CleanupParameters params;
529     params.tautomerRemoveIsotopicHs = true;
530     TautomerEnumerator te(params);
531     TautomerEnumeratorResult res = te.enumerate(*enolexample);
532     for (const auto &taut : res) {
533       const auto tautAtom = taut->getAtomWithIdx(0);
534       TEST_ASSERT(!(tautAtom->hasProp(common_properties::_isotopicHs) &&
535                     !tautAtom->getTotalNumHs()));
536     }
537   }
538   {
539     CleanupParameters params;
540     params.tautomerRemoveIsotopicHs = false;
541     TautomerEnumerator te(params);
542     TautomerEnumeratorResult res = te.enumerate(*enolexample);
543     for (const auto &taut : res) {
544       const auto tautAtom = taut->getAtomWithIdx(0);
545       TEST_ASSERT(!(tautAtom->hasProp(common_properties::_isotopicHs) &&
546                     !tautAtom->getTotalNumHs()));
547     }
548   }
549   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
550 }
551 
testEnumeratorCallback()552 void testEnumeratorCallback() {
553   class MyTautomerEnumeratorCallback : public TautomerEnumeratorCallback {
554    public:
555     MyTautomerEnumeratorCallback(double timeoutMs)
556         : d_timeoutMs(timeoutMs), d_start(std::chrono::system_clock::now()) {}
557     bool operator()(const ROMol &, const TautomerEnumeratorResult &) override {
558       double elapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(
559                              std::chrono::system_clock::now() - d_start)
560                              .count();
561       return (elapsedMs < d_timeoutMs);
562     }
563 
564    private:
565     double d_timeoutMs;
566     std::chrono::time_point<std::chrono::system_clock> d_start;
567   };
568 
569   BOOST_LOG(rdInfoLog)
570       << "-----------------------\n Testing TautomerEnumerator callback"
571       << std::endl;
572 
573   // Test a structure with hundreds of tautomers.
574   std::string smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O";
575   ROMOL_SPTR m68(SmilesToMol(smi68));
576 
577   CleanupParameters params;
578   params.maxTransforms = 10000;
579   params.maxTautomers = 10000;
580   {
581     TautomerEnumerator te(params);
582     te.setCallback(new MyTautomerEnumeratorCallback(50.0));
583     TautomerEnumeratorResult res68 = te.enumerate(*m68);
584     // either the enumeration was canceled due to timeout
585     // or it has completed very quickly
586     bool hasReachedTimeout =
587         (res68.size() < 375 &&
588          res68.status() == TautomerEnumeratorStatus::Canceled);
589     bool hasCompleted = (res68.size() == 375 &&
590                          res68.status() == TautomerEnumeratorStatus::Completed);
591     if (hasReachedTimeout) {
592       std::cerr << "Enumeration was canceled due to timeout (50 ms)"
593                 << std::endl;
594     }
595     if (hasCompleted) {
596       std::cerr << "Enumeration has completed" << std::endl;
597     }
598     TEST_ASSERT(hasReachedTimeout || hasCompleted);
599     TEST_ASSERT(hasReachedTimeout ^ hasCompleted);
600   }
601   {
602     TautomerEnumerator te(params);
603     te.setCallback(new MyTautomerEnumeratorCallback(10000.0));
604     TautomerEnumeratorResult res68 = te.enumerate(*m68);
605     // either the enumeration completed
606     // or it ran very slowly and was canceled due to timeout
607     bool hasReachedTimeout =
608         (res68.size() < 375 &&
609          res68.status() == TautomerEnumeratorStatus::Canceled);
610     bool hasCompleted = (res68.size() == 375 &&
611                          res68.status() == TautomerEnumeratorStatus::Completed);
612     if (hasReachedTimeout) {
613       std::cerr << "Enumeration was canceled due to timeout (10 s)"
614                 << std::endl;
615     }
616     if (hasCompleted) {
617       std::cerr << "Enumeration has completed" << std::endl;
618     }
619     TEST_ASSERT(hasReachedTimeout || hasCompleted);
620     TEST_ASSERT(hasReachedTimeout ^ hasCompleted);
621   }
622 
623   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
624 }
625 
626 // tests from the molvs repo:
627 // https://github.com/mcs07/MolVS/blob/456f2fe723acfedbf634a8bcfe943b83ea7d4c20/tests/test_tautomer.py
628 std::vector<std::pair<std::string, std::string>> canonTautomerData{
629     {"C1(=CCCCC1)O", "O=C1CCCCC1"},
630     {"C1(CCCCC1)=O", "O=C1CCCCC1"},
631     {"C(=C)(O)C1=CC=CC=C1", "CC(=O)c1ccccc1"},
632     {"CC(C)=O", "CC(C)=O"},
633     {"OC(C)=C(C)C", "CC(=O)C(C)C"},
634     {"c1(ccccc1)CC(=O)C", "CC(=O)Cc1ccccc1"},
635     {"Oc1nccc2cc[nH]c(=N)c12", "Nc1nccc2cc[nH]c(=O)c12"},
636     {"C1(C=CCCC1)=O", "O=C1C=CCCC1"},
637     {"C1(CCCCC1)=N", "N=C1CCCCC1"},
638     {"C1(=CCCCC1)N", "N=C1CCCCC1"},
639     {"C1(C=CC=CN1)=CC", "CCc1ccccn1"},
640     {"C1(=NC=CC=C1)CC", "CCc1ccccn1"},
641     {"O=c1cccc[nH]1", "O=c1cccc[nH]1"},
642     {"Oc1ccccn1", "O=c1cccc[nH]1"},
643     {"Oc1ncc[nH]1", "O=c1[nH]cc[nH]1"},
644     {"OC(C)=NC", "CNC(C)=O"},
645     {"CNC(C)=O", "CNC(C)=O"},
646     {"S=C(N)N", "NC(N)=S"},
647     {"SC(N)=N", "NC(N)=S"},
648     {"N=c1[nH]ccn(C)1", "Cn1ccnc1N"},
649     {"CN=c1[nH]cncc1", "CNc1ccncn1"},
650     {"Oc1cccc2ccncc12", "Oc1cccc2ccncc12"},
651     {"O=c1cccc2cc[nH]cc1-2", "Oc1cccc2ccncc12"},
652     {"Cc1n[nH]c2ncnn12", "Cc1n[nH]c2ncnn12"},
653     {"Cc1nnc2nc[nH]n12", "Cc1n[nH]c2ncnn12"},
654     {"Oc1ccncc1", "O=c1cc[nH]cc1"},
655     {"Oc1c(cccc3)c3nc2ccncc12", "O=c1c2ccccc2[nH]c2ccncc12"},
656     {"Oc1ncncc1", "O=c1cc[nH]cn1"},
657     {"C2(=C1C(=NC=N1)[NH]C(=N2)N)O", "Nc1nc(=O)c2[nH]cnc2[nH]1"},
658     {"C2(C1=C([NH]C=N1)[NH]C(=N2)N)=O", "Nc1nc(=O)c2[nH]cnc2[nH]1"},
659     {"Oc1n(C)ncc1", "Cn1[nH]ccc1=O"},
660     {"O=c1nc2[nH]ccn2cc1", "O=c1ccn2cc[nH]c2n1"},
661     {"N=c1nc[nH]cc1", "Nc1ccncn1"},
662     {"N=c(c1)ccn2cc[nH]c12", "Nc1ccn2ccnc2c1"},
663     {"CN=c1nc[nH]cc1", "CNc1ccncn1"},
664     {"c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1",
665      "c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1"},
666     {"c1ccc2c(c1)NC(=C1N=c3ccccc3=N1)N2",
667      "c1ccc2[nH]c(-c3nc4ccccc4[nH]3)nc2c1"},
668     {"CNc1ccnc2ncnn21", "CNc1ccnc2ncnn12"},
669     {"CN=c1ccnc2nc[nH]n21", "CNc1ccnc2ncnn12"},
670     {"Nc1ccc(C=C2C=CC(=O)C=C2)cc1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"},
671     {"N=C1C=CC(=Cc2ccc(O)cc2)C=C1", "Nc1ccc(C=C2C=CC(=O)C=C2)cc1"},
672     {"n1ccc2ccc[nH]c12", "c1cnc2[nH]ccc2c1"},
673     {"c1cc(=O)[nH]c2nccn12", "O=c1ccn2cc[nH]c2n1"},
674     {"c1cnc2c[nH]ccc12", "c1cc2cc[nH]c2cn1"},
675     {"n1ccc2c[nH]ccc12", "c1cc2[nH]ccc2cn1"},
676     {"c1cnc2ccc[nH]c12", "c1cnc2cc[nH]c2c1"},
677     {"C1=CC=C(O1)O", "Oc1ccco1"},
678     {"O=C1CC=CO1", "Oc1ccco1"},
679     {"CC=C=O", "CC=C=O"},
680     {"CC#CO", "CC=C=O"},
681     {"C([N+](=O)[O-])C", "CC[N+](=O)[O-]"},
682     {"C(=[N+](O)[O-])C", "CC[N+](=O)[O-]"},
683     {"CC(C)=NO", "CC(C)=NO"},
684     {"CC(C)N=O", "CC(C)=NO"},
685     {"O=Nc1ccc(O)cc1", "O=Nc1ccc(O)cc1"},
686     {"O=C1C=CC(=NO)C=C1", "O=Nc1ccc(O)cc1"},
687     {"C(#N)O", "N=C=O"},
688     {"C(=N)=O", "N=C=O"},
689     {"N=C(N)S(=O)O", "N=C(N)S(=O)O"},
690     {"C#N", "C#N"},
691     {"[C-]#[NH+]", "C#N"},
692     {"[PH](=O)(O)(O)", "O=[PH](O)O"},
693     {"P(O)(O)O", "O=[PH](O)O"}};
694 
testCanonicalize()695 void testCanonicalize() {
696   BOOST_LOG(rdInfoLog)
697       << "-----------------------\n Testing tautomer canonicalization"
698       << std::endl;
699 
700   std::string rdbase = getenv("RDBASE");
701   std::string tautomerFile =
702       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
703   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
704       new TautomerCatalogParams(tautomerFile));
705   // DEPRECATED, remove from here in release 2021.01
706   {
707     unsigned int ntransforms = tautparams->getNumTautomers();
708     TEST_ASSERT(ntransforms == 36);
709   }
710   // DEPRECATED, remove until here in release 2021.01
711 
712   unsigned int ntransforms = tautparams->getTransforms().size();
713   TEST_ASSERT(ntransforms == 36);
714 
715   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
716 
717   for (const auto &itm : canonTautomerData) {
718     std::unique_ptr<ROMol> mol{SmilesToMol(itm.first)};
719     TEST_ASSERT(mol);
720     std::unique_ptr<ROMol> res{te.canonicalize(*mol)};
721     TEST_ASSERT(res);
722     TEST_ASSERT(MolToSmiles(*res) == itm.second);
723   }
724   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
725 }
726 
testPickCanonical()727 void testPickCanonical() {
728   BOOST_LOG(rdInfoLog) << "-----------------------\n Testing pickCanonical"
729                        << std::endl;
730 
731   std::string rdbase = getenv("RDBASE");
732   std::string tautomerFile =
733       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
734   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
735       new TautomerCatalogParams(tautomerFile));
736   // DEPRECATED, remove from here in release 2021.01
737   {
738     unsigned int ntransforms = tautparams->getNumTautomers();
739     TEST_ASSERT(ntransforms == 36);
740   }
741   // DEPRECATED, remove until here in release 2021.01
742 
743   unsigned int ntransforms = tautparams->getTransforms().size();
744   TEST_ASSERT(ntransforms == 36);
745 
746   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
747 
748   for (const auto &itm : canonTautomerData) {
749     std::unique_ptr<ROMol> mol{SmilesToMol(itm.first)};
750     TEST_ASSERT(mol);
751     auto tautRes = te.enumerate(*mol);
752     std::unique_ptr<ROMol> res{te.pickCanonical(tautRes)};
753     TEST_ASSERT(res);
754     // std::cerr << itm.first<<" -> "<<MolToSmiles(*res)<<std::endl;
755     TEST_ASSERT(MolToSmiles(*res) == itm.second);
756   }
757   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
758 }
759 
testCustomScoreFunc()760 void testCustomScoreFunc() {
761   BOOST_LOG(rdInfoLog)
762       << "-----------------------\n Testing custom scoring functions"
763       << std::endl;
764 
765   std::string rdbase = getenv("RDBASE");
766   std::string tautomerFile =
767       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
768   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
769       new TautomerCatalogParams(tautomerFile));
770   // DEPRECATED, remove from here in release 2021.01
771   {
772     unsigned int ntransforms = tautparams->getNumTautomers();
773     TEST_ASSERT(ntransforms == 36);
774   }
775   // DEPRECATED, remove until here in release 2021.01
776 
777   unsigned int ntransforms = tautparams->getTransforms().size();
778   TEST_ASSERT(ntransforms == 36);
779 
780   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
781 
782   // silly examples just using the scoreRings() function
783   std::vector<std::pair<std::string, std::string>> subsetTautomerData{
784       {"C1(=CCCCC1)O", "O=C1CCCCC1"},
785       {"C1(CCCCC1)=O", "O=C1CCCCC1"},
786       {"C(=C)(O)C1=CC=CC=C1", "C=C(O)c1ccccc1"},
787       {"CC(C)=O", "C=C(C)O"},
788       {"OC(C)=C(C)C", "C=C(O)C(C)C"},
789   };
790   for (const auto &itm : subsetTautomerData) {
791     std::unique_ptr<ROMol> mol{SmilesToMol(itm.first)};
792     TEST_ASSERT(mol);
793     {
794       // this uses the non-templated pickCanonical() function
795       std::unique_ptr<ROMol> res{
796           te.canonicalize(*mol, [](const ROMol &m) -> int {
797             return MolStandardize::TautomerScoringFunctions::scoreRings(m);
798           })};
799       TEST_ASSERT(res);
800       TEST_ASSERT(MolToSmiles(*res) == itm.second);
801     }
802     {
803       // this uses the non-templated pickCanonical() overload
804       auto tautRes = te.enumerate(*mol);
805       std::unique_ptr<ROMol> res{
806           te.pickCanonical(tautRes, [](const ROMol &m) -> int {
807             return MolStandardize::TautomerScoringFunctions::scoreRings(m);
808           })};
809       TEST_ASSERT(res);
810       TEST_ASSERT(MolToSmiles(*res) == itm.second);
811     }
812     {
813       // this tests the templated pickCanonical() overload on a std::vector
814       auto tautRes = te.enumerate(*mol);
815       std::unique_ptr<ROMol> res{
816           te.pickCanonical(tautRes.tautomers(), [](const ROMol &m) -> int {
817             return MolStandardize::TautomerScoringFunctions::scoreRings(m);
818           })};
819       TEST_ASSERT(res);
820       TEST_ASSERT(MolToSmiles(*res) == itm.second);
821     }
822     {
823       // this tests the templated pickCanonical() overload
824       // with a different iterable container
825       auto tautRes = te.enumerate(*mol);
826       std::set<ROMOL_SPTR> tautomerSet(tautRes.begin(), tautRes.end());
827       std::unique_ptr<ROMol> res{
828           te.pickCanonical(tautomerSet, [](const ROMol &m) -> int {
829             return MolStandardize::TautomerScoringFunctions::scoreRings(m);
830           })};
831       TEST_ASSERT(res);
832       TEST_ASSERT(MolToSmiles(*res) == itm.second);
833     }
834   }
835   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
836 }
837 
testEnumerationProblems()838 void testEnumerationProblems() {
839   BOOST_LOG(rdInfoLog)
840       << "-----------------------\n Testing tautomer enumeration problems"
841       << std::endl;
842 
843   std::string rdbase = getenv("RDBASE");
844   std::string tautomerFile =
845       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
846   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
847       new TautomerCatalogParams(tautomerFile));
848   // DEPRECATED, remove from here in release 2021.01
849   {
850     unsigned int ntransforms = tautparams->getNumTautomers();
851     TEST_ASSERT(ntransforms == 36);
852   }
853   // DEPRECATED, remove until here in release 2021.01
854 
855   unsigned int ntransforms = tautparams->getTransforms().size();
856   TEST_ASSERT(ntransforms == 36);
857 
858   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
859 #if 1
860   {  // from the discussion of #2908
861     auto mol = "O=C(C1=C[NH+]=CC=C1)[O-]"_smiles;
862     auto tautRes = te.enumerate(*mol);
863     TEST_ASSERT(tautRes.size() == 1);
864   }
865 #endif
866   {  // one of the examples from the tautobase paper
867     auto m =
868         "[S:1]=[c:2]1[nH+:3][c:5]([NH2:9])[nH:8][c:7]2[c:4]1[n:6][nH:10][n:11]2"_smiles;
869     TEST_ASSERT(m);
870 
871     auto tautRes = te.enumerate(*m);
872     // for (auto taut : tauts) {
873     //   std::cerr << MolToSmiles(*taut) << std::endl;
874     // }
875     TEST_ASSERT(tautRes.size() == 12);
876   }
877 
878   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
879 }
880 
testPickCanonical2()881 void testPickCanonical2() {
882   BOOST_LOG(rdInfoLog) << "-----------------------\n Testing pickCanonical"
883                        << std::endl;
884 
885   std::string rdbase = getenv("RDBASE");
886   std::string tautomerFile =
887       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
888   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
889       new TautomerCatalogParams(tautomerFile));
890   // DEPRECATED, remove from here in release 2021.01
891   {
892     unsigned int ntransforms = tautparams->getNumTautomers();
893     TEST_ASSERT(ntransforms == 36);
894   }
895   // DEPRECATED, remove until here in release 2021.01
896 
897   unsigned int ntransforms = tautparams->getTransforms().size();
898   TEST_ASSERT(ntransforms == 36);
899 
900   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
901   {
902     auto mol = "CN=c1nc[nH]cc1"_smiles;
903     TEST_ASSERT(mol);
904     auto tautRes = te.enumerate(*mol);
905     for (const auto &taut : tautRes) {
906       std::cerr << MolToSmiles(*taut) << std::endl;
907     }
908     std::unique_ptr<ROMol> canon{te.pickCanonical(tautRes)};
909     std::cerr << "res: " << MolToSmiles(*canon) << std::endl;
910   }
911   {
912     auto mol = "CN=c1[nH]cccc1"_smiles;
913     TEST_ASSERT(mol);
914     auto tautRes = te.enumerate(*mol);
915     for (const auto &taut : tautRes) {
916       std::cerr << MolToSmiles(*taut) << std::endl;
917     }
918     std::unique_ptr<ROMol> canon{te.pickCanonical(tautRes)};
919     std::cerr << "res: " << MolToSmiles(*canon) << std::endl;
920   }
921   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
922 }
923 
testEnumerateDetails()924 void testEnumerateDetails() {
925   BOOST_LOG(rdInfoLog)
926       << "-----------------------\n Testing getting details back "
927          "from tautomer enumeration"
928       << std::endl;
929   std::string rdbase = getenv("RDBASE");
930   std::string tautomerFile =
931       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
932   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
933       new TautomerCatalogParams(tautomerFile));
934   // DEPRECATED, remove from here in release 2021.01
935   {
936     unsigned int ntransforms = tautparams->getNumTautomers();
937     TEST_ASSERT(ntransforms == 36);
938   }
939   // DEPRECATED, remove until here in release 2021.01
940 
941   unsigned int ntransforms = tautparams->getTransforms().size();
942   TEST_ASSERT(ntransforms == 36);
943   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
944   {
945     auto mol = "c1ccccc1CN=c1[nH]cccc1"_smiles;
946     TEST_ASSERT(mol);
947 
948     auto tautRes = te.enumerate(*mol);
949     TEST_ASSERT(tautRes.size() == 2);
950     TEST_ASSERT(tautRes.modifiedAtoms().count() == 2);
951     TEST_ASSERT(tautRes.modifiedBonds().count() == 7);
952     TEST_ASSERT(tautRes.modifiedAtoms().test(7));
953     TEST_ASSERT(tautRes.modifiedAtoms().test(9));
954     TEST_ASSERT(!tautRes.modifiedBonds().test(0));
955     TEST_ASSERT(tautRes.modifiedBonds().test(7));
956     TEST_ASSERT(tautRes.modifiedBonds().test(8));
957     TEST_ASSERT(tautRes.modifiedBonds().test(14));
958   }
959   {
960     // test the deprecated form
961     auto mol = "c1ccccc1CN=c1[nH]cccc1"_smiles;
962     TEST_ASSERT(mol);
963     boost::dynamic_bitset<> atomsModified(mol->getNumAtoms());
964     boost::dynamic_bitset<> bondsModified(mol->getNumBonds());
965 
966 #if defined(_MSC_VER)
967 #pragma warning(suppress : 4996)
968 #elif defined(__GNUC__)
969 #pragma GCC diagnostic push
970 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
971 #endif
972     auto tauts = te.enumerate(*mol, &atomsModified, &bondsModified);
973 #if defined(__GNUC__)
974 #pragma GCC diagnostic pop
975 #endif
976     TEST_ASSERT(tauts.size() == 2);
977     TEST_ASSERT(atomsModified.count() == 2);
978     TEST_ASSERT(bondsModified.count() == 7);
979     TEST_ASSERT(atomsModified[7]);
980     TEST_ASSERT(atomsModified[9]);
981     TEST_ASSERT(!bondsModified[0]);
982     TEST_ASSERT(bondsModified[7]);
983     TEST_ASSERT(bondsModified[8]);
984     TEST_ASSERT(bondsModified[14]);
985   }
986   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
987 }
988 
testGithub2990()989 void testGithub2990() {
990   BOOST_LOG(rdInfoLog) << "-----------------------\n Testing Github #2990: "
991                           "Tautomer enumeration "
992                           "should remove stereo in all tautomers"
993                        << std::endl;
994   std::string rdbase = getenv("RDBASE");
995   std::string tautomerFile =
996       rdbase + "/Data/MolStandardize/tautomerTransforms.in";
997   auto tautparams = std::unique_ptr<TautomerCatalogParams>(
998       new TautomerCatalogParams(tautomerFile));
999   // DEPRECATED, remove from here in release 2021.01
1000   {
1001     unsigned int ntransforms = tautparams->getNumTautomers();
1002     TEST_ASSERT(ntransforms == 36);
1003   }
1004   // DEPRECATED, remove until here in release 2021.01
1005 
1006   unsigned int ntransforms = tautparams->getTransforms().size();
1007   TEST_ASSERT(ntransforms == 36);
1008   TautomerEnumerator te(new TautomerCatalog(tautparams.get()));
1009   {
1010     // atom stereo
1011     auto mol = "COC(=O)[C@@H](N)CO"_smiles;
1012     TEST_ASSERT(mol);
1013     auto res = te.enumerate(*mol);
1014     for (const auto &taut : res) {
1015       auto smi = MolToSmiles(*taut);
1016       // std::cerr << smi << std::endl;
1017       TEST_ASSERT(smi.find("@H") == std::string::npos);
1018     }
1019   }
1020   {
1021     // atom stereo, atoms not in the tautomer zone are still ok
1022     auto mol = "C[C@](Cl)(F)COC(=O)[C@@H](N)CO"_smiles;
1023     TEST_ASSERT(mol);
1024     auto res = te.enumerate(*mol);
1025     for (const auto &taut : res) {
1026       auto smi = MolToSmiles(*taut);
1027       // std::cerr << smi << std::endl;
1028       TEST_ASSERT(smi.find("@H") == std::string::npos);
1029       TEST_ASSERT(smi.find("@]") != std::string::npos);
1030     }
1031   }
1032   {
1033     // bond stereo
1034     auto mol = "C/C=C/C/N=c1/[nH]cccc1"_smiles;
1035     TEST_ASSERT(mol);
1036     TEST_ASSERT(mol->getBondBetweenAtoms(0, 1)->getBondDir() !=
1037                 Bond::BondDir::NONE);
1038     TEST_ASSERT(mol->getBondBetweenAtoms(2, 3)->getBondDir() !=
1039                 Bond::BondDir::NONE);
1040     TEST_ASSERT(mol->getBondBetweenAtoms(3, 4)->getBondDir() !=
1041                 Bond::BondDir::NONE);
1042     TEST_ASSERT(mol->getBondBetweenAtoms(5, 6)->getBondDir() !=
1043                 Bond::BondDir::NONE);
1044     TEST_ASSERT(mol->getBondBetweenAtoms(1, 2)->getStereo() >
1045                 Bond::BondStereo::STEREOANY);
1046     TEST_ASSERT(mol->getBondBetweenAtoms(4, 5)->getStereo() >
1047                 Bond::BondStereo::STEREOANY);
1048 
1049     auto res = te.enumerate(*mol);
1050     for (const auto &taut : res) {
1051       TEST_ASSERT(taut->getBondBetweenAtoms(0, 1)->getBondDir() !=
1052                   Bond::BondDir::NONE);
1053       TEST_ASSERT(taut->getBondBetweenAtoms(2, 3)->getBondDir() !=
1054                   Bond::BondDir::NONE);
1055       TEST_ASSERT(taut->getBondBetweenAtoms(3, 4)->getBondDir() ==
1056                   Bond::BondDir::NONE);
1057       TEST_ASSERT(taut->getBondBetweenAtoms(5, 6)->getBondDir() ==
1058                   Bond::BondDir::NONE);
1059       TEST_ASSERT(taut->getBondBetweenAtoms(1, 2)->getStereo() >
1060                   Bond::BondStereo::STEREOANY);
1061       TEST_ASSERT(taut->getBondBetweenAtoms(4, 5)->getStereo() ==
1062                   Bond::BondStereo::STEREONONE);
1063     }
1064   }
1065 
1066   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
1067 }
1068 
testPickCanonicalCIPChangeOnChiralCenter()1069 void testPickCanonicalCIPChangeOnChiralCenter() {
1070   BOOST_LOG(rdInfoLog)
1071       << "-----------------------\n testPickCanonicalCIPChangeOnChiralCenter"
1072       << std::endl;
1073 
1074   struct CanonicalTaut {
1075     static ROMOL_SPTR get(const TautomerEnumeratorResult &res) {
1076       std::vector<int> scores;
1077       scores.reserve(res.size());
1078       std::transform(res.begin(), res.end(), std::back_inserter(scores),
1079                      [](const ROMOL_SPTR &m) {
1080                        return TautomerScoringFunctions::scoreTautomer(*m);
1081                      });
1082       std::vector<size_t> indices(res.size());
1083       std::iota(indices.begin(), indices.end(), 0);
1084       int bestIdx =
1085           *std::max_element(indices.begin(), indices.end(),
1086                             [scores](const size_t &a, const size_t &b) {
1087                               if (scores.at(a) != scores.at(b)) {
1088                                 return (scores.at(a) < scores.at(b));
1089                               }
1090                               return (a < b);
1091                             });
1092       TEST_ASSERT(*std::max_element(scores.begin(), scores.end()) ==
1093                   scores.at(bestIdx));
1094       return res.at(bestIdx);
1095     }
1096   };
1097 
1098   auto mol = "CC\\C=C(/O)[C@@H](C)C(C)=O"_smiles;
1099   TEST_ASSERT(mol.get());
1100   TEST_ASSERT(mol->getAtomWithIdx(5)->getChiralTag() ==
1101               Atom::CHI_TETRAHEDRAL_CW);
1102   TEST_ASSERT(mol->getAtomWithIdx(5)->getProp<std::string>(
1103                   common_properties::_CIPCode) == "R");
1104   {
1105     // here the chirality disappears as the chiral center is itself involved in
1106     // tautomerism
1107     TautomerEnumerator te;
1108     ROMOL_SPTR canTaut(te.canonicalize(*mol));
1109     TEST_ASSERT(canTaut.get());
1110     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1111                 Atom::CHI_UNSPECIFIED);
1112     TEST_ASSERT(
1113         !canTaut->getAtomWithIdx(5)->hasProp(common_properties::_CIPCode));
1114     TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)C(C)C(C)=O");
1115   }
1116   {
1117     // here the chirality stays even if the chiral center is itself involved in
1118     // tautomerism because of the tautomerRemoveSp3Stereo parameter being set to
1119     // false
1120     CleanupParameters params;
1121     params.tautomerRemoveSp3Stereo = false;
1122     TautomerEnumerator te(params);
1123     ROMOL_SPTR canTaut(te.canonicalize(*mol));
1124     TEST_ASSERT(canTaut.get());
1125     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1126                 Atom::CHI_TETRAHEDRAL_CW);
1127     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getProp<std::string>(
1128                     common_properties::_CIPCode) == "S");
1129     TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)[C@@H](C)C(C)=O");
1130   }
1131   {
1132     // here the chirality disappears as the chiral center is itself involved in
1133     // tautomerism; the reassignStereo setting has no influence
1134     TautomerEnumerator te;
1135     auto res = te.enumerate(*mol);
1136     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1137     TEST_ASSERT(res.size() == 8);
1138     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1139     TEST_ASSERT(bestTaut.get());
1140     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1141                 Atom::CHI_UNSPECIFIED);
1142     TEST_ASSERT(
1143         !bestTaut->getAtomWithIdx(5)->hasProp(common_properties::_CIPCode));
1144     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)C(C)C(C)=O");
1145   }
1146   {
1147     // here the chirality disappears as the chiral center is itself involved in
1148     // tautomerism; the reassignStereo setting has no influence
1149     CleanupParameters params;
1150     params.tautomerReassignStereo = false;
1151     TautomerEnumerator te(params);
1152     auto res = te.enumerate(*mol);
1153     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1154     TEST_ASSERT(res.size() == 8);
1155     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1156     TEST_ASSERT(bestTaut.get());
1157     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1158                 Atom::CHI_UNSPECIFIED);
1159     TEST_ASSERT(
1160         !bestTaut->getAtomWithIdx(5)->hasProp(common_properties::_CIPCode));
1161     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)C(C)C(C)=O");
1162   }
1163   {
1164     // here the chirality stays even if the chiral center is itself involved in
1165     // tautomerism because of the tautomerRemoveSp3Stereo parameter being set to
1166     // false. As reassignStereo by default is true, the CIP code has  been
1167     // recomputed and therefore it is now S (correct)
1168     CleanupParameters params;
1169     params.tautomerRemoveSp3Stereo = false;
1170     TautomerEnumerator te(params);
1171     auto res = te.enumerate(*mol);
1172     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1173     TEST_ASSERT(res.size() == 8);
1174     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1175     TEST_ASSERT(bestTaut.get());
1176     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1177                 Atom::CHI_TETRAHEDRAL_CW);
1178     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1179                     common_properties::_CIPCode) == "S");
1180     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@@H](C)C(C)=O");
1181   }
1182   {
1183     // here the chirality stays even if the chiral center is itself involved in
1184     // tautomerism because of the tautomerRemoveSp3Stereo parameter being set to
1185     // false. As reassignStereo is false, the CIP code has not been recomputed
1186     // and therefore it is still R (incorrect)
1187     CleanupParameters params;
1188     params.tautomerRemoveSp3Stereo = false;
1189     params.tautomerReassignStereo = false;
1190     TautomerEnumerator te(params);
1191     auto res = te.enumerate(*mol);
1192     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1193     TEST_ASSERT(res.size() == 8);
1194     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1195     TEST_ASSERT(bestTaut.get());
1196     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1197                 Atom::CHI_TETRAHEDRAL_CW);
1198     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1199                     common_properties::_CIPCode) == "R");
1200     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@@H](C)C(C)=O");
1201   }
1202 
1203   mol = "CC\\C=C(/O)[C@@](CC)(C)C(C)=O"_smiles;
1204   TEST_ASSERT(mol.get());
1205   TEST_ASSERT(mol->getAtomWithIdx(5)->getChiralTag() ==
1206               Atom::CHI_TETRAHEDRAL_CW);
1207   TEST_ASSERT(mol->getAtomWithIdx(5)->getProp<std::string>(
1208                   common_properties::_CIPCode) == "S");
1209   // here the chirality stays no matter how tautomerRemoveSp3Stereo
1210   // is set as the chiral center is not involved in tautomerism
1211   {
1212     TautomerEnumerator te;
1213     ROMOL_SPTR canTaut(te.canonicalize(*mol));
1214     TEST_ASSERT(canTaut.get());
1215     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1216                 Atom::CHI_TETRAHEDRAL_CW);
1217     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getProp<std::string>(
1218                     common_properties::_CIPCode) == "R");
1219     TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1220   }
1221   {
1222     CleanupParameters params;
1223     params.tautomerRemoveSp3Stereo = false;
1224     TautomerEnumerator te(params);
1225     ROMOL_SPTR canTaut(te.canonicalize(*mol));
1226     TEST_ASSERT(canTaut.get());
1227     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getChiralTag() ==
1228                 Atom::CHI_TETRAHEDRAL_CW);
1229     TEST_ASSERT(canTaut->getAtomWithIdx(5)->getProp<std::string>(
1230                     common_properties::_CIPCode) == "R");
1231     TEST_ASSERT(MolToSmiles(*canTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1232   }
1233   {
1234     // as reassignStereo by default is true, the CIP code has been recomputed
1235     // and therefore it is now R (correct)
1236     TautomerEnumerator te;
1237     auto res = te.enumerate(*mol);
1238     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1239     TEST_ASSERT(res.size() == 4);
1240     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1241     TEST_ASSERT(bestTaut.get());
1242     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1243                 Atom::CHI_TETRAHEDRAL_CW);
1244     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1245                     common_properties::_CIPCode) == "R");
1246     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1247   }
1248   {
1249     // as reassignStereo is false, the CIP code has not been recomputed
1250     // and therefore it is still S (incorrect)
1251     CleanupParameters params;
1252     params.tautomerReassignStereo = false;
1253     TautomerEnumerator te(params);
1254     auto res = te.enumerate(*mol);
1255     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1256     TEST_ASSERT(res.size() == 4);
1257     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1258     TEST_ASSERT(bestTaut.get());
1259     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1260                 Atom::CHI_TETRAHEDRAL_CW);
1261     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1262                     common_properties::_CIPCode) == "S");
1263     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1264   }
1265   {
1266     // as reassignStereo by default is true, the CIP code has  been recomputed
1267     // and therefore it is now R (correct)
1268     CleanupParameters params;
1269     params.tautomerRemoveSp3Stereo = false;
1270     TautomerEnumerator te(params);
1271     auto res = te.enumerate(*mol);
1272     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1273     TEST_ASSERT(res.size() == 4);
1274     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1275     TEST_ASSERT(bestTaut.get());
1276     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1277                 Atom::CHI_TETRAHEDRAL_CW);
1278     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1279                     common_properties::_CIPCode) == "R");
1280     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1281   }
1282   {
1283     // here the chirality stays even if the tautomerRemoveSp3Stereo parameter
1284     // is set to false as the chiral center is not involved in tautomerism.
1285     // As reassignStereo is false, the CIP code has not been recomputed
1286     // and therefore it is still S (incorrect)
1287     CleanupParameters params;
1288     params.tautomerRemoveSp3Stereo = false;
1289     params.tautomerReassignStereo = false;
1290     TautomerEnumerator te(params);
1291     auto res = te.enumerate(*mol);
1292     TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1293     TEST_ASSERT(res.size() == 4);
1294     ROMOL_SPTR bestTaut = CanonicalTaut::get(res);
1295     TEST_ASSERT(bestTaut.get());
1296     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getChiralTag() ==
1297                 Atom::CHI_TETRAHEDRAL_CW);
1298     TEST_ASSERT(bestTaut->getAtomWithIdx(5)->getProp<std::string>(
1299                     common_properties::_CIPCode) == "S");
1300     TEST_ASSERT(MolToSmiles(*bestTaut) == "CCCC(=O)[C@](C)(CC)C(C)=O");
1301   }
1302 
1303   BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
1304 }
1305 
testTautomerEnumeratorResult_const_iterator()1306 void testTautomerEnumeratorResult_const_iterator() {
1307   BOOST_LOG(rdInfoLog)
1308       << "-----------------------\n testTautomerEnumeratorResult_const_iterator"
1309       << std::endl;
1310   // CHEMBL3480964
1311   RWMOL_SPTR mol = "Cc1nnc(NC(=O)N2CCN(Cc3ccc(F)cc3)C(=O)C2)s1"_smiles;
1312   TautomerEnumerator te;
1313   auto res = te.enumerate(*mol);
1314   TEST_ASSERT(res.status() == TautomerEnumeratorStatus::Completed);
1315   TEST_ASSERT(res.size() == 12);
1316   auto it = res.begin();
1317   auto it2 = res.begin();
1318   // Test semantic requirements of bidirectional_iterator
1319   // https://en.cppreference.com/w/cpp/iterator/bidirectional_iterator
1320   TEST_ASSERT(it == it2);
1321   TEST_ASSERT(it++ == it2);
1322   TEST_ASSERT(it == ++it2);
1323   TEST_ASSERT(it == it2);
1324   TEST_ASSERT(it-- == it2);
1325   TEST_ASSERT(it == --it2);
1326   TEST_ASSERT(it == it2);
1327   ++it;
1328   ++it2;
1329   TEST_ASSERT(++(--it) == it2);
1330   TEST_ASSERT(--(++it) == it2);
1331   TEST_ASSERT(std::addressof(--it) == std::addressof(it));
1332   ++it;
1333   TEST_ASSERT(it == it2);
1334   it--;
1335   --it2;
1336   TEST_ASSERT(it == it2);
1337   TEST_ASSERT(*it == res[0]);
1338   TEST_ASSERT(*it++ == res.at(0));
1339   TEST_ASSERT(*it == res[1]);
1340   TEST_ASSERT(*++it == res.at(2));
1341   TEST_ASSERT(*it == res[2]);
1342   ++it;
1343   TEST_ASSERT(*it == res[3]);
1344   ++it;
1345   TEST_ASSERT(*it == res[4]);
1346   it++;
1347   TEST_ASSERT(*it == res[5]);
1348   TEST_ASSERT(*it-- == res.at(5));
1349   TEST_ASSERT(*it == res[4]);
1350   TEST_ASSERT(*--it == res.at(3));
1351   TEST_ASSERT(*it == res[3]);
1352   --it;
1353   TEST_ASSERT(*it == res[2]);
1354   --it;
1355   TEST_ASSERT(*it == res[1]);
1356   it--;
1357   TEST_ASSERT(*it == res[0]);
1358   std::ptrdiff_t i = 0;
1359   for (auto t : res) {
1360     TEST_ASSERT(t == res[i++]);
1361   }
1362   i = 0;
1363   for (auto it = res.begin(); it != res.end(); ++it) {
1364     TEST_ASSERT(std::distance(res.begin(), it) == i);
1365     TEST_ASSERT(*it == res[i]);
1366     TEST_ASSERT(it->getNumAtoms() == res[i++]->getNumAtoms());
1367   }
1368   i = res.size();
1369   for (auto it = res.end(); it != res.begin();) {
1370     TEST_ASSERT(std::distance(res.begin(), it) == i);
1371     TEST_ASSERT(*--it == res[--i]);
1372     TEST_ASSERT(it->getNumAtoms() == res[i]->getNumAtoms());
1373   }
1374   i = 0;
1375   for (const auto &pair : res.smilesTautomerMap()) {
1376     TEST_ASSERT(pair.first == MolToSmiles(*res[i]));
1377     TEST_ASSERT(pair.second.tautomer == res[i++]);
1378   }
1379   i = 0;
1380   for (auto it = res.smilesTautomerMap().begin();
1381        it != res.smilesTautomerMap().end(); ++it) {
1382     TEST_ASSERT(std::distance(res.smilesTautomerMap().begin(), it) == i);
1383     TEST_ASSERT(it->first == MolToSmiles(*res[i]));
1384     TEST_ASSERT(it->second.tautomer == res[i++]);
1385   }
1386   i = res.smilesTautomerMap().size();
1387   for (auto it = res.smilesTautomerMap().end();
1388        it != res.smilesTautomerMap().begin();) {
1389     TEST_ASSERT(std::distance(res.smilesTautomerMap().begin(), it) == i);
1390     TEST_ASSERT((--it)->first == MolToSmiles(*res[--i]));
1391     TEST_ASSERT(it->second.tautomer == res[i]);
1392   }
1393 }
1394 
testGithub3430()1395 void testGithub3430() {
1396   BOOST_LOG(rdInfoLog) << "-----------------------\n testGithub3430"
1397                        << std::endl;
1398   // The "guanidine terminal=N" rule should not apply to aromatic C
1399   // as this balances the "aromatic C = exocyclic N" rule with no net
1400   // effect on the score
1401   std::vector<ROMOL_SPTR> mols{"Cc1ccc(NC(=O)N=c2[nH]c(C)cn2C)nc1"_smiles,
1402                                "CCCCC(=O)N=c1nc(C)c2ncn(C)c2[nH]1"_smiles,
1403                                "c12ccccc1[nH]c(=N)[nH]2"_smiles};
1404   for (auto mol : mols) {
1405     TEST_ASSERT(mol);
1406     TautomerEnumerator te;
1407     auto res = te.enumerate(*mol);
1408     std::vector<int> scores;
1409     scores.reserve(res.size());
1410     std::transform(res.begin(), res.end(), std::back_inserter(scores),
1411                    [](const ROMOL_SPTR &m) {
1412                      return TautomerScoringFunctions::scoreTautomer(*m);
1413                    });
1414     std::sort(scores.begin(), scores.end(), std::greater<int>());
1415     TEST_ASSERT(scores[1] < scores[0]);
1416   }
1417 }
1418 
testGithub3755()1419 void testGithub3755() {
1420   BOOST_LOG(rdInfoLog) << "-----------------------\n testGithub3755"
1421                        << std::endl;
1422   // hydrates, aminals and hemiaminals should be scored lower than
1423   // carboxylic acids, amides, amidines, and guanidines
1424   std::vector<std::pair<std::string, std::string>> orig_vs_expected{
1425       {"OC(=O)C(N)CO", "NC(CO)C(=O)O"}, {"C([C@@H](C(=O)O)N)O", "NC(CO)C(=O)O"},
1426       {"OC(=O)C(N)CN", "NCC(N)C(=O)O"}, {"NC(=O)C(N)CO", "NC(=O)C(N)CO"},
1427       {"NC(=N)C(N)CO", "N=C(N)C(N)CO"}, {"NC(=N)NC(N)CO", "N=C(N)NC(N)CO"}};
1428   TautomerEnumerator te;
1429   for (const auto &pair : orig_vs_expected) {
1430     ROMOL_SPTR orig(SmilesToMol(pair.first));
1431     TEST_ASSERT(orig);
1432     ROMOL_SPTR canonical(te.canonicalize(*orig));
1433     TEST_ASSERT(MolToSmiles(*canonical) == pair.second);
1434   }
1435 }
1436 
main()1437 int main() {
1438   RDLog::InitLogs();
1439 #if 1
1440   testEnumerator();
1441   testEnumeratorParams();
1442   testEnumeratorCallback();
1443   testCanonicalize();
1444   testPickCanonical();
1445   testCustomScoreFunc();
1446   testEnumerationProblems();
1447 #endif
1448   testPickCanonical2();
1449   testEnumerateDetails();
1450   testGithub2990();
1451   testPickCanonicalCIPChangeOnChiralCenter();
1452   testTautomerEnumeratorResult_const_iterator();
1453   testGithub3430();
1454   testGithub3755();
1455   return 0;
1456 }
1457