1 //
2 //
3 //  Copyright (C) 2018-2021 Greg Landrum and T5 Informatics GmbH
4 //
5 //   @@ All Rights Reserved @@
6 //  This file is part of the RDKit.
7 //  The contents are covered by the terms of the BSD license
8 //  which is included in the file license.txt, found at the root
9 //  of the RDKit source tree.
10 //
11 
12 #include "catch.hpp"
13 
14 #include <GraphMol/RDKitBase.h>
15 #include <GraphMol/MolPickler.h>
16 #include <GraphMol/QueryAtom.h>
17 #include <GraphMol/QueryBond.h>
18 #include <GraphMol/SmilesParse/SmilesParse.h>
19 #include <GraphMol/SmilesParse/SmilesWrite.h>
20 #include <GraphMol/SmilesParse/SmartsWrite.h>
21 #include <GraphMol/Substruct/SubstructMatch.h>
22 
23 using namespace RDKit;
24 
25 TEST_CASE("Github #1972", "[SMILES][bug]") {
26   SECTION("basics") {
27     std::vector<std::vector<std::string>> smiles = {
28         {"[C@@]1(Cl)(F)(I).Br1", "[C@@](Br)(Cl)(F)(I)"},
29         {"[C@@](Cl)(F)(I)1.Br1", "[C@@](Cl)(F)(I)Br"},
30         {"[C@@](Cl)1(F)(I).Br1", "[C@@](Cl)(Br)(F)(I)"},
31         {"[C@@](Cl)(F)1(I).Br1", "[C@@](Cl)(F)(Br)(I)"}};
32     for (const auto &pr : smiles) {
33       std::unique_ptr<ROMol> m1(SmilesToMol(pr[0]));
34       std::unique_ptr<ROMol> m2(SmilesToMol(pr[1]));
35       REQUIRE(m1);
36       REQUIRE(m2);
37       auto csmi1 = MolToSmiles(*m1);
38       auto csmi2 = MolToSmiles(*m2);
39       CHECK(csmi1 == csmi2);
40     }
41   }
42   SECTION("further examples") {
43     std::vector<std::vector<std::string>> smiles = {
44         {"[C@@]1(Cl)2(I).Br1.F2", "[C@@](Br)(Cl)(F)(I)"},
45         {"[C@@](Cl)2(I)1.Br1.F2", "[C@@](Cl)(F)(I)Br"},
46         {"[C@@]12(Cl)(I).Br1.F2", "[C@@](Br)(F)(Cl)(I)"},
47         {"[C@@]21(Cl)(I).Br1.F2", "[C@@](F)(Br)(Cl)(I)"},
48         {"[C@@](Cl)12(I).Br1.F2", "[C@@](Cl)(Br)(F)(I)"},
49         {"[C@@](Cl)21(I).Br1.F2", "[C@@](Cl)(F)(Br)(I)"},
50         {"[C@@](Cl)(I)21.Br1.F2", "[C@@](Cl)(I)(F)(Br)"},
51         {"[C@@](Cl)(I)12.Br1.F2", "[C@@](Cl)(I)(Br)(F)"}};
52     for (const auto &pr : smiles) {
53       std::unique_ptr<ROMol> m1(SmilesToMol(pr[0]));
54       std::unique_ptr<ROMol> m2(SmilesToMol(pr[1]));
55       REQUIRE(m1);
56       REQUIRE(m2);
57       auto csmi1 = MolToSmiles(*m1);
58       auto csmi2 = MolToSmiles(*m2);
59       CHECK(csmi1 == csmi2);
60     }
61   }
62 }
63 
64 TEST_CASE("Github #2029", "[SMILES][bug]") {
65   SECTION("wedging") {
66     std::unique_ptr<ROMol> m1(SmilesToMol("CN[C@H](Cl)C(=O)O"));
67     REQUIRE(m1);
68     m1->getBondWithIdx(1)->setBondDir(Bond::BEGINWEDGE);
69     bool doKekule = false, allBondsExplicit = false;
70     CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule,
71                                            allBondsExplicit));
72     allBondsExplicit = true;
73     CHECK("-" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule,
74                                             allBondsExplicit));
75   }
76   SECTION("direction") {
77     std::unique_ptr<ROMol> m1(SmilesToMol("C/C=C/C"));
78     REQUIRE(m1);
79     bool doKekule = false, allBondsExplicit = false;
80     CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule,
81                                            allBondsExplicit));
82     CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(2), -1, doKekule,
83                                            allBondsExplicit));
84     allBondsExplicit = true;
85     CHECK("/" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule,
86                                             allBondsExplicit));
87     CHECK("/" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(2), -1, doKekule,
88                                             allBondsExplicit));
89   }
90   SECTION("aromatic double bonds") {
91     std::unique_ptr<RWMol> m1(SmilesToMol("c1ccccc1"));
92     REQUIRE(m1);
93     bool markAtomsBonds = false;
94     MolOps::Kekulize(*m1, markAtomsBonds);
95     bool doKekule = false, allBondsExplicit = false;
96     CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule,
97                                            allBondsExplicit));
98     CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule,
99                                            allBondsExplicit));
100     allBondsExplicit = true;
101     CHECK("=" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule,
102                                             allBondsExplicit));
103     CHECK("-" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule,
104                                             allBondsExplicit));
105   }
106 }
107 
108 TEST_CASE("Smiles literals", "[SMILES]") {
109   auto mol = "c1ccccc1"_smiles;
110   REQUIRE(mol);
111   CHECK(6 == mol->getNumAtoms());
112   auto fail1 = "c1ccccc"_smiles;
113   REQUIRE(!fail1);
114   auto fail2 = "c1cccn1"_smiles;
115   REQUIRE(!fail2);
116 }
117 
118 TEST_CASE("Smarts literals", "[Smarts]") {
119   auto mol = "c1ccc[c,n]c1"_smarts;
120   REQUIRE(mol);
121   CHECK(6 == mol->getNumAtoms());
122   auto fail1 = "c1ccccc"_smarts;
123   REQUIRE(!fail1);
124   auto mol2 = "c1cccn1"_smarts;
125   REQUIRE(mol2);
126 }
127 
128 TEST_CASE(
129     "github #2197 and #2237: handling of aromatic main group atoms in SMARTS",
130     "[Smarts]") {
131   std::vector<std::string> smarts = {
132       "[si]1ccccc1",
133       "[as]1ccccc1",
134       "[se]1ccccc1",
135       "[te]1ccccc1",
136 
137   };
138   SECTION("#2197") {
139     for (const auto &sma : smarts) {
140       std::unique_ptr<ROMol> mol(SmartsToMol(sma));
141       REQUIRE(mol);
142       CHECK(6 == mol->getNumAtoms());
143       REQUIRE(mol->getAtomWithIdx(0)->hasQuery());
144       REQUIRE(static_cast<QueryAtom *>(mol->getAtomWithIdx(0))
145                   ->getQuery()
146                   ->getDescription() == "AtomType");
147     }
148   }
149   SECTION("#2237") {
150     for (const auto &sma : smarts) {
151       std::unique_ptr<ROMol> mol(SmartsToMol(sma));
152       REQUIRE(mol);
153       REQUIRE(MolToSmarts(*mol) == sma);
154     }
155   }
156 }
157 
158 TEST_CASE("github #2257: writing cxsmiles", "[smiles][cxsmiles]") {
159   SECTION("basics") {
160     auto mol = "OCC"_smiles;
161     REQUIRE(mol);
162     auto smi = MolToCXSmiles(*mol);
163     CHECK(smi == "CCO");
164   }
165   SECTION("atom labels") {
166     auto mol = "CCC |$R1;;R2$|"_smiles;
167     REQUIRE(mol);
168     CHECK(mol->getAtomWithIdx(0)->getProp<std::string>(
169               common_properties::atomLabel) == "R1");
170     CHECK(mol->getAtomWithIdx(2)->getProp<std::string>(
171               common_properties::atomLabel) == "R2");
172     auto smi = MolToCXSmiles(*mol);
173     CHECK(smi == "CCC |$R1;;R2$|");
174   }
175   SECTION("atom ordering") {
176     auto mol = "OC(F)C |$R1;;R2;R3$|"_smiles;
177     REQUIRE(mol);
178     CHECK(mol->getAtomWithIdx(0)->getProp<std::string>(
179               common_properties::atomLabel) == "R1");
180     CHECK(mol->getAtomWithIdx(2)->getProp<std::string>(
181               common_properties::atomLabel) == "R2");
182     CHECK(mol->getAtomWithIdx(3)->getProp<std::string>(
183               common_properties::atomLabel) == "R3");
184     auto smi = MolToCXSmiles(*mol);
185     CHECK(smi == "CC(O)F |$R3;;R1;R2$|");
186   }
187   SECTION("atom values") {
188     auto mol = "COCC |$_AV:;bar;;foo$|"_smiles;
189     REQUIRE(mol);
190     CHECK(mol->getAtomWithIdx(3)->getProp<std::string>(
191               common_properties::molFileValue) == "foo");
192     CHECK(mol->getAtomWithIdx(1)->getProp<std::string>(
193               common_properties::molFileValue) == "bar");
194     auto smi = MolToCXSmiles(*mol);
195     CHECK(smi == "CCOC |$_AV:foo;;bar;$|");
196   }
197 
198   SECTION("radicals") {
199     auto mol = "[Fe]N([O])[O] |^1:2,3|"_smiles;
200     REQUIRE(mol);
201     CHECK(mol->getAtomWithIdx(1)->getNumRadicalElectrons() == 0);
202     CHECK(mol->getAtomWithIdx(2)->getNumRadicalElectrons() == 1);
203     CHECK(mol->getAtomWithIdx(3)->getNumRadicalElectrons() == 1);
204 
205     auto smi = MolToCXSmiles(*mol);
206     CHECK(smi == "[O]N([O])[Fe] |^1:0,2|");
207   }
208   SECTION("radicals2") {
209     auto mol = "[CH]C[CH2] |^1:2,^2:0|"_smiles;
210     REQUIRE(mol);
211     CHECK(mol->getAtomWithIdx(1)->getNumRadicalElectrons() == 0);
212     CHECK(mol->getAtomWithIdx(2)->getNumRadicalElectrons() == 1);
213     CHECK(mol->getAtomWithIdx(0)->getNumRadicalElectrons() == 2);
214 
215     auto smi = MolToCXSmiles(*mol);
216     CHECK(smi == "[CH]C[CH2] |^1:2,^2:0|");
217   }
218   SECTION("coordinates") {
219     auto mol = "OC |(0,.75,;0,-.75,)|"_smiles;
220     REQUIRE(mol);
221     CHECK(mol->getNumConformers() == 1);
222 
223     auto smi = MolToCXSmiles(*mol);
224     CHECK(smi == "CO |(0,-0.75,;0,0.75,)|");
225   }
226   SECTION("coordinates3d") {
227     auto mol = "OC |(0,.75,0.1;0,-.75,-0.1)|"_smiles;
228     REQUIRE(mol);
229     CHECK(mol->getNumConformers() == 1);
230 
231     auto smi = MolToCXSmiles(*mol);
232     CHECK(smi == "CO |(0,-0.75,-0.1;0,0.75,0.1)|");
233   }
234   SECTION("atom props") {
235     auto mol = "N1CC1C |atomProp:0.p2.v2:0.p1.v1:1.p2.v2:1.p1.v1;2;3|"_smiles;
236     REQUIRE(mol);
237     CHECK(mol->getNumAtoms() == 4);
238     CHECK(mol->getAtomWithIdx(0)->hasProp("p1"));
239     CHECK(mol->getAtomWithIdx(0)->getProp<std::string>("p1") == "v1");
240     CHECK(mol->getAtomWithIdx(0)->hasProp("p2"));
241     CHECK(mol->getAtomWithIdx(0)->getProp<std::string>("p2") == "v2");
242     CHECK(mol->getAtomWithIdx(1)->hasProp("p2"));
243     CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p2") == "v2");
244     CHECK(mol->getAtomWithIdx(1)->hasProp("p1"));
245     CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p1") == "v1;2;3");
246 
247     auto smi = MolToCXSmiles(*mol);
248     CHECK(smi == "CC1CN1 |atomProp:2.p2.v2:2.p1.v1;2;3:3.p2.v2:3.p1.v1|");
249   }
250   SECTION("atom props and values") {
251     //"CN |$_AV:atomv0;atomv1$,atomProp:0.p2.v2:1.p2.v1|";
252     auto mol = "CN |atomProp:0.p2.v2:1.p1.v1,$_AV:val1;val2$|"_smiles;
253     REQUIRE(mol);
254     auto smi = MolToCXSmiles(*mol);
255     CHECK(smi == "CN |$_AV:val1;val2$,atomProp:0.p2.v2:1.p1.v1|");
256   }
257   SECTION("enhanced stereo 1") {
258     auto mol = "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:4,5|"_smiles;
259     REQUIRE(mol);
260     auto smi = MolToCXSmiles(*mol);
261     CHECK(smi == "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:4,5|");
262   }
263 
264   SECTION("enhanced stereo 2") {
265     auto mol = "C[C@H](O)[C@H](CC)F |o1:1,3|"_smiles;
266     REQUIRE(mol);
267     auto smi = MolToCXSmiles(*mol);
268     CHECK(smi == "CC[C@H](F)[C@H](C)O |o1:2,4|");
269   }
270 
271   SECTION("enhanced stereo 3") {
272     auto mol =
273         "C[C@@H]1N[C@H](C)[C@@H]([C@H](C)[C@@H]1C)C1[C@@H](C)O[C@@H](C)[C@@H](C)[C@H]1C |a:5,o1:1,8,o2:14,16,&1:11,18,&2:3,6,r|"_smiles;
274     REQUIRE(mol);
275     auto smi = MolToCXSmiles(*mol);
276     CHECK(smi ==
277           "C[C@@H]1N[C@H](C)[C@H](C2[C@@H](C)O[C@@H](C)[C@@H](C)[C@H]2C)[C@H]("
278           "C)[C@@H]1C |a:5,o1:1,18,o2:10,12,&1:3,16,&2:7,14|");
279   }
280 
281   SECTION("enhanced stereo 4") {
282     auto mol = "C[C@@H]1CCO[C@H](C)C1 |a:1,5,r|"_smiles;
283     REQUIRE(mol);
284     auto smi = MolToCXSmiles(*mol);
285     CHECK(smi == "C[C@@H]1CCO[C@H](C)C1 |a:1,5|");
286   }
287 
288   SECTION("enhanced stereo with other properties") {
289     auto mol = "CC[C@H](C)O |atomProp:3.p2.v2,o1:2|"_smiles;
290     REQUIRE(mol);
291     auto smi = MolToCXSmiles(*mol);
292     CHECK(smi == "CC[C@H](C)O |atomProp:3.p2.v2,o1:2|");
293   }
294 
295   SECTION("mol fragments1") {
296     auto mol = "Cl.OC |(1,0,0;0,.75,0.1;0,-.75,-0.1)|"_smiles;
297     REQUIRE(mol);
298     CHECK(mol->getNumConformers() == 1);
299 
300     std::vector<int> atomsToUse = {1, 2};
301     auto smi = MolFragmentToCXSmiles(*mol, atomsToUse);
302     CHECK(smi == "CO |(0,-0.75,-0.1;0,0.75,0.1)|");
303   }
304   SECTION("mol fragments2") {
305     auto mol = "Cl.N1CC1C |atomProp:1.p2.v1:1.p1.v1:2.p2.v2:2.p1.v2|"_smiles;
306     REQUIRE(mol);
307     CHECK(mol->getNumAtoms() == 5);
308     CHECK(!mol->getAtomWithIdx(0)->hasProp("p1"));
309     CHECK(mol->getAtomWithIdx(1)->hasProp("p1"));
310     CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p1") == "v1");
311 
312     std::vector<int> atomsToUse = {1, 2, 3, 4};
313     auto smi = MolFragmentToCXSmiles(*mol, atomsToUse);
314     CHECK(smi == "CC1CN1 |atomProp:2.p2.v2:2.p1.v2:3.p2.v1:3.p1.v1|");
315   }
316 
317   SECTION("mol fragments3") {
318     auto mol = "Cl.[CH]C[CH2] |^1:3,^2:1|"_smiles;
319     REQUIRE(mol);
320     CHECK(mol->getAtomWithIdx(2)->getNumRadicalElectrons() == 0);
321     CHECK(mol->getAtomWithIdx(3)->getNumRadicalElectrons() == 1);
322     CHECK(mol->getAtomWithIdx(1)->getNumRadicalElectrons() == 2);
323 
324     std::vector<int> atomsToUse = {1, 2, 3};
325     auto smi = MolFragmentToCXSmiles(*mol, atomsToUse);
326     CHECK(smi == "[CH]C[CH2] |^1:2,^2:0|");
327   }
328 }
329 
330 TEST_CASE("Github #2148", "[bug][Smiles][Smarts]") {
331   SECTION("SMILES") {
332     auto mol = "C(=C\\F)\\4.O=C1C=4CCc2ccccc21"_smiles;
333     REQUIRE(mol);
334     REQUIRE(mol->getBondBetweenAtoms(0, 5));
335     CHECK(mol->getBondBetweenAtoms(0, 5)->getBondType() == Bond::DOUBLE);
336   }
337   SECTION("SMILES edges") {
338     auto m1 = "C/C=C/C"_smiles;
339     REQUIRE(m1);
340     CHECK(m1->getBondBetweenAtoms(2, 1)->getBondType() == Bond::DOUBLE);
341     CHECK(m1->getBondBetweenAtoms(2, 1)->getStereo() != Bond::STEREONONE);
342 
343     {
344       std::vector<std::string> smis = {"C1=C/C.C/1", "C/1=C/C.C1",
345                                        "C-1=C/C.C/1", "C/1=C/C.C-1"};
346       for (auto smi : smis) {
347         std::unique_ptr<RWMol> mol(SmilesToMol(smi));
348         REQUIRE(mol);
349         CHECK(mol->getBondBetweenAtoms(0, 3)->getBondType() == Bond::SINGLE);
350         CHECK(mol->getBondBetweenAtoms(0, 3)->getBondDir() != Bond::NONE);
351         CHECK(mol->getBondBetweenAtoms(0, 1)->getBondType() == Bond::DOUBLE);
352         CHECK(mol->getBondBetweenAtoms(0, 1)->getStereo() != Bond::STEREONONE);
353       }
354     }
355   }
356 
357   SECTION("Writing SMILES") {
358     auto mol = "C/C=c1/ncc(=C)cc1"_smiles;
359     REQUIRE(mol);
360     REQUIRE(mol->getBondBetweenAtoms(1, 2));
361     CHECK(mol->getBondBetweenAtoms(1, 2)->getBondType() == Bond::DOUBLE);
362     CHECK(mol->getBondBetweenAtoms(1, 2)->getStereo() == Bond::STEREOE);
363     auto smi = MolToSmiles(*mol);
364     CHECK(smi == "C=c1cc/c(=C\\C)nc1");
365   }
366 }
367 
368 TEST_CASE("Github #2298", "[bug][Smarts][substructure]") {
369   SubstructMatchParameters ps;
370   ps.useQueryQueryMatches = true;
371   SECTION("basics") {
372     auto m1 = "[#6]"_smarts;
373     REQUIRE(m1);
374     CHECK(SubstructMatch(*m1, *m1, ps).size() == 1);
375     auto m2 = "[C]"_smarts;
376     REQUIRE(m2);
377     CHECK(SubstructMatch(*m2, *m2, ps).size() == 1);
378     auto m3 = "[C]"_smarts;
379     REQUIRE(m3);
380     CHECK(SubstructMatch(*m3, *m3, ps).size() == 1);
381   }
382   SECTION("a bit more complex") {
383     auto m1 = "[CH0+2]"_smarts;
384     REQUIRE(m1);
385     CHECK(SubstructMatch(*m1, *m1, ps).size() == 1);
386   }
387 }
388 
389 TEST_CASE("dative ring closures", "[bug][smiles]") {
390   SECTION("first closure1") {
391     auto m1 = "N->1CCN->[Pt]1"_smiles;
392     REQUIRE(m1);
393     REQUIRE(m1->getBondBetweenAtoms(0, 4));
394     CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE);
395     CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 0);
396   }
397   SECTION("first closure2") {
398     auto m1 = "[Pt]<-1CCCN1"_smiles;
399     REQUIRE(m1);
400     REQUIRE(m1->getBondBetweenAtoms(0, 4));
401     CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE);
402     CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 4);
403   }
404   SECTION("second closure1") {
405     auto m1 = "N1CCN->[Pt]<-1"_smiles;
406     REQUIRE(m1);
407     REQUIRE(m1->getBondBetweenAtoms(0, 4));
408     CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE);
409     CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 0);
410   }
411   SECTION("second closure2") {
412     auto m1 = "[Pt]1CCCN->1"_smiles;
413     REQUIRE(m1);
414     REQUIRE(m1->getBondBetweenAtoms(0, 4));
415     CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE);
416     CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 4);
417   }
418   SECTION("branch1") {
419     auto m1 = "N(->[Pt])C"_smiles;
420     REQUIRE(m1);
421     REQUIRE(m1->getBondBetweenAtoms(0, 1));
422     CHECK(m1->getBondBetweenAtoms(0, 1)->getBondType() == Bond::DATIVE);
423     CHECK(m1->getBondBetweenAtoms(0, 1)->getBeginAtomIdx() == 0);
424   }
425   SECTION("branch2") {
426     auto m1 = "N(->[Pt])C"_smiles;
427     REQUIRE(m1);
428     REQUIRE(m1->getBondBetweenAtoms(0, 1));
429     CHECK(m1->getBondBetweenAtoms(0, 1)->getBondType() == Bond::DATIVE);
430     CHECK(m1->getBondBetweenAtoms(0, 1)->getBeginAtomIdx() == 0);
431   }
432 }
433 
434 TEST_CASE("github#2450: getAtomSmarts() fails for free atoms", "[bug]") {
435   SECTION("original report") {
436     std::unique_ptr<QueryAtom> qat(new QueryAtom());
437     qat->setQuery(makeAtomNumQuery(6));
438     auto smarts = SmartsWrite::GetAtomSmarts(qat.get());
439     CHECK(smarts == "[#6]");
440   }
441   SECTION("query bonds") {
442     std::unique_ptr<QueryBond> qbnd(new QueryBond(Bond::AROMATIC));
443     auto smarts = SmartsWrite::GetBondSmarts(qbnd.get());
444     CHECK(smarts == ":");
445   }
446   SECTION("SMILES works too") {
447     std::unique_ptr<Bond> bnd(new Bond(Bond::AROMATIC));
448     auto smiles = SmilesWrite::GetBondSmiles(bnd.get());
449     CHECK(smiles == ":");
450   }
451 }
452 
453 TEST_CASE("MolFragmentToSmarts", "[Smarts]") {
454   SECTION("BasicFragment") {
455     auto m = "CCCCCN"_smiles;
456     std::vector<int> indices = {3, 4, 5};
457     const auto smarts = MolFragmentToSmarts(*m, indices);
458     CHECK(smarts == "[#6]-[#6]-[#7]");
459   }
460   SECTION("FragmentWithParity1") {
461     auto m = "C[C@H](F)CCCN"_smiles;
462     std::vector<int> indices = {0, 1, 2, 3};
463     const auto smarts = MolFragmentToSmarts(*m, indices);
464     CHECK(smarts == "[#6]-[#6@H](-[#9])-[#6]");
465   }
466   SECTION("FragmentWithParity2") {
467     auto m = "C[C@](F)(Cl)CCCN"_smiles;
468     std::vector<int> indices = {0, 1, 2, 4};
469     const auto smarts = MolFragmentToSmarts(*m, indices);
470     CHECK(smarts == "[#6]-[#6@@](-[#9])-[#6]");
471   }
472   SECTION("FragmentLosingParity") {
473     auto m = "C[C@H](F)CCCN"_smiles;
474     std::vector<int> indices = {0, 1, 2};
475     const auto smarts = MolFragmentToSmarts(*m, indices);
476     CHECK(smarts == "[#6]-[#6@H]-[#9]");
477   }
478   SECTION("FragmentWithSpecifiedBonds") {
479     auto m = "C1CC1O"_smiles;
480     std::vector<int> atomIndices = {0, 1, 2};
481     std::vector<int> bondIndices = {0};
482     const auto smarts = MolFragmentToSmarts(*m, atomIndices, &bondIndices);
483     CHECK(smarts == "[#6]-[#6].[#6]");
484   }
485   SECTION("SmartsFragmentFromQueryMol") {
486     auto m = "CCCC[C,N]N"_smarts;
487     std::vector<int> indices = {3, 4, 5};
488     const auto smarts = MolFragmentToSmarts(*m, indices);
489     CHECK(smarts == "C[C,N]N");
490   }
491 }
492 
493 TEST_CASE("github #2667: MolToCXSmiles generates error for empty molecule",
494           "[bug][cxsmiles]") {
495   SECTION("basics") {
496     auto mol = ""_smiles;
497     REQUIRE(mol);
498     auto smi = MolToCXSmiles(*mol);
499     CHECK(smi == "");
500   }
501 }
502 
503 TEST_CASE("github #2604: support range-based charge queries from SMARTS",
504           "[ranges][smarts]") {
505   SECTION("positive") {
506     auto query = "[N+{0-1}]"_smarts;
507     REQUIRE(query);
508     {
509       auto m1 = "CN"_smiles;
510       REQUIRE(m1);
511       CHECK(SubstructMatch(*m1, *query).size() == 1);
512     }
513     {
514       auto m1 = "C[NH3+]"_smiles;
515       REQUIRE(m1);
516       CHECK(SubstructMatch(*m1, *query).size() == 1);
517     }
518     {
519       auto m1 = "C[NH4+2]"_smiles;
520       REQUIRE(m1);
521       CHECK(SubstructMatch(*m1, *query).empty());
522     }
523     {
524       auto m1 = "C[NH-]"_smiles;
525       REQUIRE(m1);
526       CHECK(SubstructMatch(*m1, *query).empty());
527     }
528   }
529   SECTION("negative") {
530     auto query = "[N-{0-1}]"_smarts;
531     REQUIRE(query);
532     {
533       auto m1 = "CN"_smiles;
534       REQUIRE(m1);
535       CHECK(SubstructMatch(*m1, *query).size() == 1);
536     }
537     {
538       auto m1 = "C[NH-]"_smiles;
539       REQUIRE(m1);
540       CHECK(SubstructMatch(*m1, *query).size() == 1);
541     }
542     {
543       auto m1 = "C[N-2]"_smiles;
544       REQUIRE(m1);
545       CHECK(SubstructMatch(*m1, *query).empty());
546     }
547     {
548       auto m1 = "C[NH3+]"_smiles;
549       REQUIRE(m1);
550       CHECK(SubstructMatch(*m1, *query).empty());
551     }
552   }
553 }
554 
555 TEST_CASE("_smarts fails gracefully", "[smarts]") {
556   SECTION("empty") {
557     auto mol = ""_smarts;
558     REQUIRE(mol);
559   }
560   SECTION("syntax error") {
561     auto mol = "C1C"_smarts;
562     REQUIRE(!mol);
563   }
564 }
565 
566 TEST_CASE(
567     "github #2801: MolToSmarts may generate invalid SMARTS for bond queries",
568     "[bug][smarts]") {
569   SECTION("original_report") {
570     auto q1 = "*~CCC"_smarts;
571     REQUIRE(q1);
572     Bond *qb = q1->getBondBetweenAtoms(0, 1);
573     BOND_EQUALS_QUERY *bq1 = makeBondOrderEqualsQuery(qb->getBondType());
574     qb->setQuery(bq1);
575     BOND_EQUALS_QUERY *bq2 = makeBondIsInRingQuery();
576     bq2->setNegation(true);
577     qb->expandQuery(bq2, Queries::COMPOSITE_AND, true);
578     std::string smarts = MolToSmarts(*q1);
579     CHECK(smarts == "*!@CCC");
580     std::unique_ptr<RWMol> q2(SmartsToMol(smarts));
581     REQUIRE(q2);
582   }
583   SECTION("composite_or") {
584     auto q1 = "*~CCC"_smarts;
585     REQUIRE(q1);
586     Bond *qb = q1->getBondBetweenAtoms(0, 1);
587     BOND_EQUALS_QUERY *bq1 = makeBondOrderEqualsQuery(qb->getBondType());
588     qb->setQuery(bq1);
589     BOND_EQUALS_QUERY *bq2 = makeBondIsInRingQuery();
590     bq2->setNegation(true);
591     qb->expandQuery(bq2, Queries::COMPOSITE_OR, true);
592     // this used to yield *,!@CCC
593     std::string smarts = MolToSmarts(*q1);
594     CHECK(smarts == "*!@CCC");
595     std::unique_ptr<RWMol> q2(SmartsToMol(smarts));
596     REQUIRE(q2);
597   }
598   SECTION("composite_lowand") {
599     auto q1 = "*~CCC"_smarts;
600     REQUIRE(q1);
601     Bond *qb = q1->getBondBetweenAtoms(0, 1);
602     BOND_EQUALS_QUERY *bq1 = makeBondOrderEqualsQuery(qb->getBondType());
603     qb->setQuery(bq1);
604     BOND_EQUALS_QUERY *bq2 = makeBondOrderEqualsQuery(qb->getBondType());
605     qb->expandQuery(bq2, Queries::COMPOSITE_OR, true);
606     BOND_EQUALS_QUERY *bq3 = makeBondIsInRingQuery();
607     bq3->setNegation(true);
608     qb->expandQuery(bq3, Queries::COMPOSITE_AND, true);
609     std::string smarts = MolToSmarts(*q1);
610     CHECK(smarts == "*!@CCC");
611     std::unique_ptr<RWMol> q2(SmartsToMol(smarts));
612     REQUIRE(q2);
613   }
614 }
615 
616 TEST_CASE("large rings", "[smarts]") {
617   auto query = "[r24]"_smarts;
618   auto m_r24 = "C1CCCCCCCCCCCCCCCCCCCCCCC1"_smiles;
619   auto m_r23 = "C1CCCCCCCCCCCCCCCCCCCCCC1"_smiles;
620 
621   CHECK(SubstructMatch(*m_r23, *query).empty());
622   CHECK(SubstructMatch(*m_r24, *query).size() == 24);
623 }
624 
625 TEST_CASE("random smiles vectors", "[smiles]") {
626   auto m = "C1OCC1N(CO)(Cc1ccccc1NCCl)"_smiles;
627   REQUIRE(m);
628   SECTION("basics") {
629     std::vector<std::string> tgt = {
630         "c1cc(CN(C2COC2)CO)c(cc1)NCCl", "N(CCl)c1c(CN(C2COC2)CO)cccc1",
631         "N(CCl)c1ccccc1CN(C1COC1)CO", "OCN(Cc1ccccc1NCCl)C1COC1",
632         "C(N(C1COC1)Cc1c(cccc1)NCCl)O"};
633     unsigned int randomSeed = 0xf00d;
634     auto smiV = MolToRandomSmilesVect(*m, 5, randomSeed);
635     CHECK(smiV == tgt);
636   }
637   SECTION("options1") {
638     std::vector<std::string> tgt = {
639         "C1-C=C(-C-N(-C2-C-O-C-2)-C-O)-C(=C-C=1)-N-C-Cl",
640         "N(-C-Cl)-C1-C(-C-N(-C2-C-O-C-2)-C-O)=C-C=C-C=1",
641         "N(-C-Cl)-C1=C-C=C-C=C-1-C-N(-C1-C-O-C-1)-C-O",
642         "O-C-N(-C-C1=C-C=C-C=C-1-N-C-Cl)-C1-C-O-C-1",
643         "C(-N(-C1-C-O-C-1)-C-C1-C(=C-C=C-C=1)-N-C-Cl)-O"};
644     RWMol nm(*m);
645     MolOps::Kekulize(nm, true);
646     unsigned int randomSeed = 0xf00d;
647     bool isomericSmiles = true;
648     bool kekuleSmiles = true;
649     bool allBondsExplicit = true;
650     bool allHsExplicit = false;
651     auto smiV =
652         MolToRandomSmilesVect(nm, 5, randomSeed, isomericSmiles, kekuleSmiles,
653                               allBondsExplicit, allHsExplicit);
654     CHECK(smiV == tgt);
655   }
656   SECTION("options2") {
657     std::vector<std::string> tgt = {
658         "[cH]1[cH][c]([CH2][N]([CH]2[CH2][O][CH2]2)[CH2][OH])[c]([cH][cH]1)[NH]"
659         "[CH2][Cl]",
660         "[NH]([CH2][Cl])[c]1[c]([CH2][N]([CH]2[CH2][O][CH2]2)[CH2][OH])[cH][cH]"
661         "[cH][cH]1",
662         "[NH]([CH2][Cl])[c]1[cH][cH][cH][cH][c]1[CH2][N]([CH]1[CH2][O][CH2]1)["
663         "CH2][OH]",
664         "[OH][CH2][N]([CH2][c]1[cH][cH][cH][cH][c]1[NH][CH2][Cl])[CH]1[CH2][O]["
665         "CH2]1",
666         "[CH2]([N]([CH]1[CH2][O][CH2]1)[CH2][c]1[c]([cH][cH][cH][cH]1)[NH][CH2]"
667         "[Cl])[OH]"};
668     RWMol nm(*m);
669     MolOps::Kekulize(nm, false);
670     unsigned int randomSeed = 0xf00d;
671     bool isomericSmiles = true;
672     bool kekuleSmiles = false;
673     bool allBondsExplicit = false;
674     bool allHsExplicit = true;
675     auto smiV =
676         MolToRandomSmilesVect(nm, 5, randomSeed, isomericSmiles, kekuleSmiles,
677                               allBondsExplicit, allHsExplicit);
678     CHECK(smiV == tgt);
679   }
680 }
681 
682 TEST_CASE(
683     "github #3197: Molecule constructed from CXSMILES cannot be translated to "
684     "SMARTS",
685     "[smarts][bug]") {
686   auto m = "C* |$;M_p$|"_smiles;
687   REQUIRE(m);
688   SECTION("smarts writing") {
689     auto smarts = MolToSmarts(*m);
690     // this will change if/when the definition of the query changes, just have
691     // to update then
692     CHECK(smarts ==
693           "[#6]-[!#2&!#5&!#6&!#7&!#8&!#9&!#10&!#14&!#15&!#16&!#17&!#18&!#33&!#"
694           "34&!#35&!#36&!#52&!#53&!#54&!#85&!#86&!#1]");
695   }
696   SECTION("serialization") {
697     std::string pkl;
698     MolPickler::pickleMol(*m, pkl, PicklerOps::PropertyPickleOptions::AllProps);
699     ROMol cpy(pkl);
700     auto osmi = MolToCXSmiles(*m);
701     CHECK(osmi == "*C |$M_p;$|");
702     auto smi = MolToCXSmiles(cpy);
703     CHECK(smi == osmi);
704     QueryAtom *oa1 = static_cast<QueryAtom *>(m->getAtomWithIdx(1));
705     QueryAtom *a1 = static_cast<QueryAtom *>(m->getAtomWithIdx(1));
706     REQUIRE(oa1->hasQuery());
707     REQUIRE(a1->hasQuery());
708     size_t osz =
709         oa1->getQuery()->endChildren() - oa1->getQuery()->beginChildren();
710     size_t sz = a1->getQuery()->endChildren() - a1->getQuery()->beginChildren();
711     // we don't need to test the exact size (since that may change), but let's
712     // at least be sure it's not unreasonable:
713     CHECK(osz > 0);
714     CHECK(osz < 200);
715     CHECK(osz == sz);
716   }
717 }
718 
719 TEST_CASE("d primitive in SMARTS", "[smarts][extension]") {
720   SmilesParserParams ps;
721   ps.removeHs = false;
722   std::unique_ptr<ROMol> m(SmilesToMol("[H]OCO[2H]", ps));
723   REQUIRE(m);
724   CHECK(m->getNumAtoms() == 5);
725   SECTION("basics") {
726     auto q = "[d2]"_smarts;
727     REQUIRE(q);
728     CHECK(SubstructMatch(*m, *q).size() == 2);
729   }
730   SECTION("comparison to D") {
731     auto q = "[D2]"_smarts;
732     REQUIRE(q);
733     CHECK(SubstructMatch(*m, *q).size() == 3);
734   }
735 }
736 
737 TEST_CASE(
738     "github #3342: unspecified branch bonds in SMARTS don't have aromaticity "
739     "set",
740     "[smarts][bug]") {
741   SECTION("as reported") {
742     auto m = "c1(ccccc1)"_smarts;
743     REQUIRE(m);
744     REQUIRE(m->getBondBetweenAtoms(0, 1));
745     CHECK(m->getBondBetweenAtoms(0, 1)->getBondType() ==
746           Bond::BondType::AROMATIC);
747     CHECK(m->getBondBetweenAtoms(0, 1)->getIsAromatic());
748   }
749 }
750 
751 TEST_CASE("github #3320: incorrect bond properties from CXSMILES",
752           "[cxsmiles][bug]") {
753   SECTION("as reported") {
754     auto m = "[Cl-][Pt++]1([Cl-])NCCN1C1CCCCC1 |C:6.6,3.2,0.0,2.1|"_smiles;
755     REQUIRE(m);
756     std::vector<std::pair<unsigned, unsigned>> bonds = {
757         {0, 1}, {3, 1}, {2, 1}, {6, 1}};
758     for (const auto &pr : bonds) {
759       auto bnd = m->getBondBetweenAtoms(pr.first, pr.second);
760       REQUIRE(bnd);
761       CHECK(bnd->getBondType() == Bond::BondType::DATIVE);
762       CHECK(bnd->getBeginAtomIdx() == pr.first);
763     }
764   }
765   SECTION("as reported") {
766     auto m = "[Cl-][Pt++]1([Cl-])NCC3C2CCCCC2.N13 |C:12.12,3.2,0.0,2.1|"_smiles;
767     REQUIRE(m);
768     std::vector<std::pair<unsigned, unsigned>> bonds = {
769         {0, 1}, {3, 1}, {2, 1}, {12, 1}};
770     for (const auto &pr : bonds) {
771       auto bnd = m->getBondBetweenAtoms(pr.first, pr.second);
772       REQUIRE(bnd);
773       CHECK(bnd->getBondType() == Bond::BondType::DATIVE);
774       CHECK(bnd->getBeginAtomIdx() == pr.first);
775     }
776   }
777 }
778 
779 TEST_CASE("github #3774: MolToSmarts inverts direction of dative bond",
780           "[smarts][bug]") {
781   SECTION("as reported") {
782     {
783       auto m = "N->[Cu+]"_smiles;
784       REQUIRE(m);
785       CHECK(MolToSmarts(*m) == "[#7]->[Cu+]");
786       CHECK(MolToSmiles(*m) == "N->[Cu+]");
787     }
788     {
789       auto m = "N<-[Cu+]"_smiles;
790       REQUIRE(m);
791       CHECK(MolToSmarts(*m) == "[#7]<-[Cu+]");
792       CHECK(MolToSmiles(*m) == "N<-[Cu+]");
793     }
794   }
795   SECTION("from smarts") {
796     {
797       auto m = "N->[Cu+]"_smarts;
798       REQUIRE(m);
799       CHECK(MolToSmarts(*m) == "N->[#29&+]");
800     }
801     {
802       auto m = "N<-[Cu+]"_smarts;
803       REQUIRE(m);
804       CHECK(MolToSmarts(*m) == "N<-[#29&+]");
805     }
806   }
807 }
808 
809 TEST_CASE("Hydrogen bonds", "[smiles]") {
810   SECTION("basics") {
811     auto m = "CC1O[H]O=C(C)C1 |H:4.3|"_smiles;
812     REQUIRE(m);
813     REQUIRE(m->getBondBetweenAtoms(3, 4));
814     CHECK(m->getBondBetweenAtoms(3, 4)->getBondType() ==
815           Bond::BondType::HYDROGEN);
816   }
817 }
818 
819 TEST_CASE("Github #2788: doKekule=true should kekulize the molecule",
820           "[smiles]") {
821   SECTION("basics1") {
822     auto m = "c1ccccc1"_smiles;
823     REQUIRE(m);
824     bool doIsomeric = true;
825     bool doKekule = true;
826     CHECK(MolToSmiles(*m, doIsomeric, doKekule) == "C1=CC=CC=C1");
827   }
828   SECTION("basics2") {
829     auto m = "c1cc[nH]c1"_smiles;
830     REQUIRE(m);
831     bool doIsomeric = true;
832     bool doKekule = true;
833     CHECK(MolToSmiles(*m, doIsomeric, doKekule) == "C1=CNC=C1");
834   }
835 
836   SECTION("can thrown exceptions") {
837     int debugParse = 0;
838     bool sanitize = false;
839     std::unique_ptr<RWMol> m{SmilesToMol("c1ccnc1", debugParse, sanitize)};
840     REQUIRE(m);
841     bool doIsomeric = true;
842     bool doKekule = false;
843     {
844       RWMol tm(*m);
845       CHECK(MolToSmiles(tm, doIsomeric, doKekule) == "c1ccnc1");
846     }
847     doKekule = true;
848     {
849       RWMol tm(*m);
850       CHECK_THROWS_AS(MolToSmiles(tm, doIsomeric, doKekule), KekulizeException);
851     }
852   }
853 }
854 
855 TEST_CASE("bogus recursive SMARTS", "[smarts]") {
856   std::string sma = "C)foo";
857   CHECK(SmartsToMol(sma) == nullptr);
858 }
859 
860 TEST_CASE(
861     "Github #3998 MolFragmentToSmiles failing in Kekulization with "
862     "kekuleSmiles=true") {
863   auto mol = "Cc1ccccc1"_smiles;
864   REQUIRE(mol);
865   SECTION("normal") {
866     std::vector<int> ats{0};
867     std::string smi = MolFragmentToSmiles(*mol, ats);
868     CHECK(smi == "C");
869   }
870   SECTION("kekulized") {
871     std::vector<int> ats{0};
872     bool doIsomericSmiles = true;
873     bool doKekule = true;
874     std::string smi = MolFragmentToSmiles(*mol, ats, nullptr, nullptr, nullptr,
875                                           doIsomericSmiles, doKekule);
876     CHECK(smi == "C");
877   }
878   SECTION("including ring parts") {
879     std::vector<int> ats{0, 1, 2};
880     bool doIsomericSmiles = true;
881     bool doKekule = true;
882     std::string smi = MolFragmentToSmiles(*mol, ats, nullptr, nullptr, nullptr,
883                                           doIsomericSmiles, doKekule);
884     CHECK(smi == "C:CC");
885   }
886 }
887