1 // 2 // 3 // Copyright (C) 2018-2021 Greg Landrum and T5 Informatics GmbH 4 // 5 // @@ All Rights Reserved @@ 6 // This file is part of the RDKit. 7 // The contents are covered by the terms of the BSD license 8 // which is included in the file license.txt, found at the root 9 // of the RDKit source tree. 10 // 11 12 #include "catch.hpp" 13 14 #include <GraphMol/RDKitBase.h> 15 #include <GraphMol/MolPickler.h> 16 #include <GraphMol/QueryAtom.h> 17 #include <GraphMol/QueryBond.h> 18 #include <GraphMol/SmilesParse/SmilesParse.h> 19 #include <GraphMol/SmilesParse/SmilesWrite.h> 20 #include <GraphMol/SmilesParse/SmartsWrite.h> 21 #include <GraphMol/Substruct/SubstructMatch.h> 22 23 using namespace RDKit; 24 25 TEST_CASE("Github #1972", "[SMILES][bug]") { 26 SECTION("basics") { 27 std::vector<std::vector<std::string>> smiles = { 28 {"[C@@]1(Cl)(F)(I).Br1", "[C@@](Br)(Cl)(F)(I)"}, 29 {"[C@@](Cl)(F)(I)1.Br1", "[C@@](Cl)(F)(I)Br"}, 30 {"[C@@](Cl)1(F)(I).Br1", "[C@@](Cl)(Br)(F)(I)"}, 31 {"[C@@](Cl)(F)1(I).Br1", "[C@@](Cl)(F)(Br)(I)"}}; 32 for (const auto &pr : smiles) { 33 std::unique_ptr<ROMol> m1(SmilesToMol(pr[0])); 34 std::unique_ptr<ROMol> m2(SmilesToMol(pr[1])); 35 REQUIRE(m1); 36 REQUIRE(m2); 37 auto csmi1 = MolToSmiles(*m1); 38 auto csmi2 = MolToSmiles(*m2); 39 CHECK(csmi1 == csmi2); 40 } 41 } 42 SECTION("further examples") { 43 std::vector<std::vector<std::string>> smiles = { 44 {"[C@@]1(Cl)2(I).Br1.F2", "[C@@](Br)(Cl)(F)(I)"}, 45 {"[C@@](Cl)2(I)1.Br1.F2", "[C@@](Cl)(F)(I)Br"}, 46 {"[C@@]12(Cl)(I).Br1.F2", "[C@@](Br)(F)(Cl)(I)"}, 47 {"[C@@]21(Cl)(I).Br1.F2", "[C@@](F)(Br)(Cl)(I)"}, 48 {"[C@@](Cl)12(I).Br1.F2", "[C@@](Cl)(Br)(F)(I)"}, 49 {"[C@@](Cl)21(I).Br1.F2", "[C@@](Cl)(F)(Br)(I)"}, 50 {"[C@@](Cl)(I)21.Br1.F2", "[C@@](Cl)(I)(F)(Br)"}, 51 {"[C@@](Cl)(I)12.Br1.F2", "[C@@](Cl)(I)(Br)(F)"}}; 52 for (const auto &pr : smiles) { 53 std::unique_ptr<ROMol> m1(SmilesToMol(pr[0])); 54 std::unique_ptr<ROMol> m2(SmilesToMol(pr[1])); 55 REQUIRE(m1); 56 REQUIRE(m2); 57 auto csmi1 = MolToSmiles(*m1); 58 auto csmi2 = MolToSmiles(*m2); 59 CHECK(csmi1 == csmi2); 60 } 61 } 62 } 63 64 TEST_CASE("Github #2029", "[SMILES][bug]") { 65 SECTION("wedging") { 66 std::unique_ptr<ROMol> m1(SmilesToMol("CN[C@H](Cl)C(=O)O")); 67 REQUIRE(m1); 68 m1->getBondWithIdx(1)->setBondDir(Bond::BEGINWEDGE); 69 bool doKekule = false, allBondsExplicit = false; 70 CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule, 71 allBondsExplicit)); 72 allBondsExplicit = true; 73 CHECK("-" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule, 74 allBondsExplicit)); 75 } 76 SECTION("direction") { 77 std::unique_ptr<ROMol> m1(SmilesToMol("C/C=C/C")); 78 REQUIRE(m1); 79 bool doKekule = false, allBondsExplicit = false; 80 CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule, 81 allBondsExplicit)); 82 CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(2), -1, doKekule, 83 allBondsExplicit)); 84 allBondsExplicit = true; 85 CHECK("/" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule, 86 allBondsExplicit)); 87 CHECK("/" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(2), -1, doKekule, 88 allBondsExplicit)); 89 } 90 SECTION("aromatic double bonds") { 91 std::unique_ptr<RWMol> m1(SmilesToMol("c1ccccc1")); 92 REQUIRE(m1); 93 bool markAtomsBonds = false; 94 MolOps::Kekulize(*m1, markAtomsBonds); 95 bool doKekule = false, allBondsExplicit = false; 96 CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule, 97 allBondsExplicit)); 98 CHECK("" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule, 99 allBondsExplicit)); 100 allBondsExplicit = true; 101 CHECK("=" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(0), -1, doKekule, 102 allBondsExplicit)); 103 CHECK("-" == SmilesWrite::GetBondSmiles(m1->getBondWithIdx(1), -1, doKekule, 104 allBondsExplicit)); 105 } 106 } 107 108 TEST_CASE("Smiles literals", "[SMILES]") { 109 auto mol = "c1ccccc1"_smiles; 110 REQUIRE(mol); 111 CHECK(6 == mol->getNumAtoms()); 112 auto fail1 = "c1ccccc"_smiles; 113 REQUIRE(!fail1); 114 auto fail2 = "c1cccn1"_smiles; 115 REQUIRE(!fail2); 116 } 117 118 TEST_CASE("Smarts literals", "[Smarts]") { 119 auto mol = "c1ccc[c,n]c1"_smarts; 120 REQUIRE(mol); 121 CHECK(6 == mol->getNumAtoms()); 122 auto fail1 = "c1ccccc"_smarts; 123 REQUIRE(!fail1); 124 auto mol2 = "c1cccn1"_smarts; 125 REQUIRE(mol2); 126 } 127 128 TEST_CASE( 129 "github #2197 and #2237: handling of aromatic main group atoms in SMARTS", 130 "[Smarts]") { 131 std::vector<std::string> smarts = { 132 "[si]1ccccc1", 133 "[as]1ccccc1", 134 "[se]1ccccc1", 135 "[te]1ccccc1", 136 137 }; 138 SECTION("#2197") { 139 for (const auto &sma : smarts) { 140 std::unique_ptr<ROMol> mol(SmartsToMol(sma)); 141 REQUIRE(mol); 142 CHECK(6 == mol->getNumAtoms()); 143 REQUIRE(mol->getAtomWithIdx(0)->hasQuery()); 144 REQUIRE(static_cast<QueryAtom *>(mol->getAtomWithIdx(0)) 145 ->getQuery() 146 ->getDescription() == "AtomType"); 147 } 148 } 149 SECTION("#2237") { 150 for (const auto &sma : smarts) { 151 std::unique_ptr<ROMol> mol(SmartsToMol(sma)); 152 REQUIRE(mol); 153 REQUIRE(MolToSmarts(*mol) == sma); 154 } 155 } 156 } 157 158 TEST_CASE("github #2257: writing cxsmiles", "[smiles][cxsmiles]") { 159 SECTION("basics") { 160 auto mol = "OCC"_smiles; 161 REQUIRE(mol); 162 auto smi = MolToCXSmiles(*mol); 163 CHECK(smi == "CCO"); 164 } 165 SECTION("atom labels") { 166 auto mol = "CCC |$R1;;R2$|"_smiles; 167 REQUIRE(mol); 168 CHECK(mol->getAtomWithIdx(0)->getProp<std::string>( 169 common_properties::atomLabel) == "R1"); 170 CHECK(mol->getAtomWithIdx(2)->getProp<std::string>( 171 common_properties::atomLabel) == "R2"); 172 auto smi = MolToCXSmiles(*mol); 173 CHECK(smi == "CCC |$R1;;R2$|"); 174 } 175 SECTION("atom ordering") { 176 auto mol = "OC(F)C |$R1;;R2;R3$|"_smiles; 177 REQUIRE(mol); 178 CHECK(mol->getAtomWithIdx(0)->getProp<std::string>( 179 common_properties::atomLabel) == "R1"); 180 CHECK(mol->getAtomWithIdx(2)->getProp<std::string>( 181 common_properties::atomLabel) == "R2"); 182 CHECK(mol->getAtomWithIdx(3)->getProp<std::string>( 183 common_properties::atomLabel) == "R3"); 184 auto smi = MolToCXSmiles(*mol); 185 CHECK(smi == "CC(O)F |$R3;;R1;R2$|"); 186 } 187 SECTION("atom values") { 188 auto mol = "COCC |$_AV:;bar;;foo$|"_smiles; 189 REQUIRE(mol); 190 CHECK(mol->getAtomWithIdx(3)->getProp<std::string>( 191 common_properties::molFileValue) == "foo"); 192 CHECK(mol->getAtomWithIdx(1)->getProp<std::string>( 193 common_properties::molFileValue) == "bar"); 194 auto smi = MolToCXSmiles(*mol); 195 CHECK(smi == "CCOC |$_AV:foo;;bar;$|"); 196 } 197 198 SECTION("radicals") { 199 auto mol = "[Fe]N([O])[O] |^1:2,3|"_smiles; 200 REQUIRE(mol); 201 CHECK(mol->getAtomWithIdx(1)->getNumRadicalElectrons() == 0); 202 CHECK(mol->getAtomWithIdx(2)->getNumRadicalElectrons() == 1); 203 CHECK(mol->getAtomWithIdx(3)->getNumRadicalElectrons() == 1); 204 205 auto smi = MolToCXSmiles(*mol); 206 CHECK(smi == "[O]N([O])[Fe] |^1:0,2|"); 207 } 208 SECTION("radicals2") { 209 auto mol = "[CH]C[CH2] |^1:2,^2:0|"_smiles; 210 REQUIRE(mol); 211 CHECK(mol->getAtomWithIdx(1)->getNumRadicalElectrons() == 0); 212 CHECK(mol->getAtomWithIdx(2)->getNumRadicalElectrons() == 1); 213 CHECK(mol->getAtomWithIdx(0)->getNumRadicalElectrons() == 2); 214 215 auto smi = MolToCXSmiles(*mol); 216 CHECK(smi == "[CH]C[CH2] |^1:2,^2:0|"); 217 } 218 SECTION("coordinates") { 219 auto mol = "OC |(0,.75,;0,-.75,)|"_smiles; 220 REQUIRE(mol); 221 CHECK(mol->getNumConformers() == 1); 222 223 auto smi = MolToCXSmiles(*mol); 224 CHECK(smi == "CO |(0,-0.75,;0,0.75,)|"); 225 } 226 SECTION("coordinates3d") { 227 auto mol = "OC |(0,.75,0.1;0,-.75,-0.1)|"_smiles; 228 REQUIRE(mol); 229 CHECK(mol->getNumConformers() == 1); 230 231 auto smi = MolToCXSmiles(*mol); 232 CHECK(smi == "CO |(0,-0.75,-0.1;0,0.75,0.1)|"); 233 } 234 SECTION("atom props") { 235 auto mol = "N1CC1C |atomProp:0.p2.v2:0.p1.v1:1.p2.v2:1.p1.v1;2;3|"_smiles; 236 REQUIRE(mol); 237 CHECK(mol->getNumAtoms() == 4); 238 CHECK(mol->getAtomWithIdx(0)->hasProp("p1")); 239 CHECK(mol->getAtomWithIdx(0)->getProp<std::string>("p1") == "v1"); 240 CHECK(mol->getAtomWithIdx(0)->hasProp("p2")); 241 CHECK(mol->getAtomWithIdx(0)->getProp<std::string>("p2") == "v2"); 242 CHECK(mol->getAtomWithIdx(1)->hasProp("p2")); 243 CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p2") == "v2"); 244 CHECK(mol->getAtomWithIdx(1)->hasProp("p1")); 245 CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p1") == "v1;2;3"); 246 247 auto smi = MolToCXSmiles(*mol); 248 CHECK(smi == "CC1CN1 |atomProp:2.p2.v2:2.p1.v1;2;3:3.p2.v2:3.p1.v1|"); 249 } 250 SECTION("atom props and values") { 251 //"CN |$_AV:atomv0;atomv1$,atomProp:0.p2.v2:1.p2.v1|"; 252 auto mol = "CN |atomProp:0.p2.v2:1.p1.v1,$_AV:val1;val2$|"_smiles; 253 REQUIRE(mol); 254 auto smi = MolToCXSmiles(*mol); 255 CHECK(smi == "CN |$_AV:val1;val2$,atomProp:0.p2.v2:1.p1.v1|"); 256 } 257 SECTION("enhanced stereo 1") { 258 auto mol = "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:4,5|"_smiles; 259 REQUIRE(mol); 260 auto smi = MolToCXSmiles(*mol); 261 CHECK(smi == "C[C@H](F)[C@H](C)[C@@H](C)Br |a:1,o1:4,5|"); 262 } 263 264 SECTION("enhanced stereo 2") { 265 auto mol = "C[C@H](O)[C@H](CC)F |o1:1,3|"_smiles; 266 REQUIRE(mol); 267 auto smi = MolToCXSmiles(*mol); 268 CHECK(smi == "CC[C@H](F)[C@H](C)O |o1:2,4|"); 269 } 270 271 SECTION("enhanced stereo 3") { 272 auto mol = 273 "C[C@@H]1N[C@H](C)[C@@H]([C@H](C)[C@@H]1C)C1[C@@H](C)O[C@@H](C)[C@@H](C)[C@H]1C |a:5,o1:1,8,o2:14,16,&1:11,18,&2:3,6,r|"_smiles; 274 REQUIRE(mol); 275 auto smi = MolToCXSmiles(*mol); 276 CHECK(smi == 277 "C[C@@H]1N[C@H](C)[C@H](C2[C@@H](C)O[C@@H](C)[C@@H](C)[C@H]2C)[C@H](" 278 "C)[C@@H]1C |a:5,o1:1,18,o2:10,12,&1:3,16,&2:7,14|"); 279 } 280 281 SECTION("enhanced stereo 4") { 282 auto mol = "C[C@@H]1CCO[C@H](C)C1 |a:1,5,r|"_smiles; 283 REQUIRE(mol); 284 auto smi = MolToCXSmiles(*mol); 285 CHECK(smi == "C[C@@H]1CCO[C@H](C)C1 |a:1,5|"); 286 } 287 288 SECTION("enhanced stereo with other properties") { 289 auto mol = "CC[C@H](C)O |atomProp:3.p2.v2,o1:2|"_smiles; 290 REQUIRE(mol); 291 auto smi = MolToCXSmiles(*mol); 292 CHECK(smi == "CC[C@H](C)O |atomProp:3.p2.v2,o1:2|"); 293 } 294 295 SECTION("mol fragments1") { 296 auto mol = "Cl.OC |(1,0,0;0,.75,0.1;0,-.75,-0.1)|"_smiles; 297 REQUIRE(mol); 298 CHECK(mol->getNumConformers() == 1); 299 300 std::vector<int> atomsToUse = {1, 2}; 301 auto smi = MolFragmentToCXSmiles(*mol, atomsToUse); 302 CHECK(smi == "CO |(0,-0.75,-0.1;0,0.75,0.1)|"); 303 } 304 SECTION("mol fragments2") { 305 auto mol = "Cl.N1CC1C |atomProp:1.p2.v1:1.p1.v1:2.p2.v2:2.p1.v2|"_smiles; 306 REQUIRE(mol); 307 CHECK(mol->getNumAtoms() == 5); 308 CHECK(!mol->getAtomWithIdx(0)->hasProp("p1")); 309 CHECK(mol->getAtomWithIdx(1)->hasProp("p1")); 310 CHECK(mol->getAtomWithIdx(1)->getProp<std::string>("p1") == "v1"); 311 312 std::vector<int> atomsToUse = {1, 2, 3, 4}; 313 auto smi = MolFragmentToCXSmiles(*mol, atomsToUse); 314 CHECK(smi == "CC1CN1 |atomProp:2.p2.v2:2.p1.v2:3.p2.v1:3.p1.v1|"); 315 } 316 317 SECTION("mol fragments3") { 318 auto mol = "Cl.[CH]C[CH2] |^1:3,^2:1|"_smiles; 319 REQUIRE(mol); 320 CHECK(mol->getAtomWithIdx(2)->getNumRadicalElectrons() == 0); 321 CHECK(mol->getAtomWithIdx(3)->getNumRadicalElectrons() == 1); 322 CHECK(mol->getAtomWithIdx(1)->getNumRadicalElectrons() == 2); 323 324 std::vector<int> atomsToUse = {1, 2, 3}; 325 auto smi = MolFragmentToCXSmiles(*mol, atomsToUse); 326 CHECK(smi == "[CH]C[CH2] |^1:2,^2:0|"); 327 } 328 } 329 330 TEST_CASE("Github #2148", "[bug][Smiles][Smarts]") { 331 SECTION("SMILES") { 332 auto mol = "C(=C\\F)\\4.O=C1C=4CCc2ccccc21"_smiles; 333 REQUIRE(mol); 334 REQUIRE(mol->getBondBetweenAtoms(0, 5)); 335 CHECK(mol->getBondBetweenAtoms(0, 5)->getBondType() == Bond::DOUBLE); 336 } 337 SECTION("SMILES edges") { 338 auto m1 = "C/C=C/C"_smiles; 339 REQUIRE(m1); 340 CHECK(m1->getBondBetweenAtoms(2, 1)->getBondType() == Bond::DOUBLE); 341 CHECK(m1->getBondBetweenAtoms(2, 1)->getStereo() != Bond::STEREONONE); 342 343 { 344 std::vector<std::string> smis = {"C1=C/C.C/1", "C/1=C/C.C1", 345 "C-1=C/C.C/1", "C/1=C/C.C-1"}; 346 for (auto smi : smis) { 347 std::unique_ptr<RWMol> mol(SmilesToMol(smi)); 348 REQUIRE(mol); 349 CHECK(mol->getBondBetweenAtoms(0, 3)->getBondType() == Bond::SINGLE); 350 CHECK(mol->getBondBetweenAtoms(0, 3)->getBondDir() != Bond::NONE); 351 CHECK(mol->getBondBetweenAtoms(0, 1)->getBondType() == Bond::DOUBLE); 352 CHECK(mol->getBondBetweenAtoms(0, 1)->getStereo() != Bond::STEREONONE); 353 } 354 } 355 } 356 357 SECTION("Writing SMILES") { 358 auto mol = "C/C=c1/ncc(=C)cc1"_smiles; 359 REQUIRE(mol); 360 REQUIRE(mol->getBondBetweenAtoms(1, 2)); 361 CHECK(mol->getBondBetweenAtoms(1, 2)->getBondType() == Bond::DOUBLE); 362 CHECK(mol->getBondBetweenAtoms(1, 2)->getStereo() == Bond::STEREOE); 363 auto smi = MolToSmiles(*mol); 364 CHECK(smi == "C=c1cc/c(=C\\C)nc1"); 365 } 366 } 367 368 TEST_CASE("Github #2298", "[bug][Smarts][substructure]") { 369 SubstructMatchParameters ps; 370 ps.useQueryQueryMatches = true; 371 SECTION("basics") { 372 auto m1 = "[#6]"_smarts; 373 REQUIRE(m1); 374 CHECK(SubstructMatch(*m1, *m1, ps).size() == 1); 375 auto m2 = "[C]"_smarts; 376 REQUIRE(m2); 377 CHECK(SubstructMatch(*m2, *m2, ps).size() == 1); 378 auto m3 = "[C]"_smarts; 379 REQUIRE(m3); 380 CHECK(SubstructMatch(*m3, *m3, ps).size() == 1); 381 } 382 SECTION("a bit more complex") { 383 auto m1 = "[CH0+2]"_smarts; 384 REQUIRE(m1); 385 CHECK(SubstructMatch(*m1, *m1, ps).size() == 1); 386 } 387 } 388 389 TEST_CASE("dative ring closures", "[bug][smiles]") { 390 SECTION("first closure1") { 391 auto m1 = "N->1CCN->[Pt]1"_smiles; 392 REQUIRE(m1); 393 REQUIRE(m1->getBondBetweenAtoms(0, 4)); 394 CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE); 395 CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 0); 396 } 397 SECTION("first closure2") { 398 auto m1 = "[Pt]<-1CCCN1"_smiles; 399 REQUIRE(m1); 400 REQUIRE(m1->getBondBetweenAtoms(0, 4)); 401 CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE); 402 CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 4); 403 } 404 SECTION("second closure1") { 405 auto m1 = "N1CCN->[Pt]<-1"_smiles; 406 REQUIRE(m1); 407 REQUIRE(m1->getBondBetweenAtoms(0, 4)); 408 CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE); 409 CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 0); 410 } 411 SECTION("second closure2") { 412 auto m1 = "[Pt]1CCCN->1"_smiles; 413 REQUIRE(m1); 414 REQUIRE(m1->getBondBetweenAtoms(0, 4)); 415 CHECK(m1->getBondBetweenAtoms(0, 4)->getBondType() == Bond::DATIVE); 416 CHECK(m1->getBondBetweenAtoms(0, 4)->getBeginAtomIdx() == 4); 417 } 418 SECTION("branch1") { 419 auto m1 = "N(->[Pt])C"_smiles; 420 REQUIRE(m1); 421 REQUIRE(m1->getBondBetweenAtoms(0, 1)); 422 CHECK(m1->getBondBetweenAtoms(0, 1)->getBondType() == Bond::DATIVE); 423 CHECK(m1->getBondBetweenAtoms(0, 1)->getBeginAtomIdx() == 0); 424 } 425 SECTION("branch2") { 426 auto m1 = "N(->[Pt])C"_smiles; 427 REQUIRE(m1); 428 REQUIRE(m1->getBondBetweenAtoms(0, 1)); 429 CHECK(m1->getBondBetweenAtoms(0, 1)->getBondType() == Bond::DATIVE); 430 CHECK(m1->getBondBetweenAtoms(0, 1)->getBeginAtomIdx() == 0); 431 } 432 } 433 434 TEST_CASE("github#2450: getAtomSmarts() fails for free atoms", "[bug]") { 435 SECTION("original report") { 436 std::unique_ptr<QueryAtom> qat(new QueryAtom()); 437 qat->setQuery(makeAtomNumQuery(6)); 438 auto smarts = SmartsWrite::GetAtomSmarts(qat.get()); 439 CHECK(smarts == "[#6]"); 440 } 441 SECTION("query bonds") { 442 std::unique_ptr<QueryBond> qbnd(new QueryBond(Bond::AROMATIC)); 443 auto smarts = SmartsWrite::GetBondSmarts(qbnd.get()); 444 CHECK(smarts == ":"); 445 } 446 SECTION("SMILES works too") { 447 std::unique_ptr<Bond> bnd(new Bond(Bond::AROMATIC)); 448 auto smiles = SmilesWrite::GetBondSmiles(bnd.get()); 449 CHECK(smiles == ":"); 450 } 451 } 452 453 TEST_CASE("MolFragmentToSmarts", "[Smarts]") { 454 SECTION("BasicFragment") { 455 auto m = "CCCCCN"_smiles; 456 std::vector<int> indices = {3, 4, 5}; 457 const auto smarts = MolFragmentToSmarts(*m, indices); 458 CHECK(smarts == "[#6]-[#6]-[#7]"); 459 } 460 SECTION("FragmentWithParity1") { 461 auto m = "C[C@H](F)CCCN"_smiles; 462 std::vector<int> indices = {0, 1, 2, 3}; 463 const auto smarts = MolFragmentToSmarts(*m, indices); 464 CHECK(smarts == "[#6]-[#6@H](-[#9])-[#6]"); 465 } 466 SECTION("FragmentWithParity2") { 467 auto m = "C[C@](F)(Cl)CCCN"_smiles; 468 std::vector<int> indices = {0, 1, 2, 4}; 469 const auto smarts = MolFragmentToSmarts(*m, indices); 470 CHECK(smarts == "[#6]-[#6@@](-[#9])-[#6]"); 471 } 472 SECTION("FragmentLosingParity") { 473 auto m = "C[C@H](F)CCCN"_smiles; 474 std::vector<int> indices = {0, 1, 2}; 475 const auto smarts = MolFragmentToSmarts(*m, indices); 476 CHECK(smarts == "[#6]-[#6@H]-[#9]"); 477 } 478 SECTION("FragmentWithSpecifiedBonds") { 479 auto m = "C1CC1O"_smiles; 480 std::vector<int> atomIndices = {0, 1, 2}; 481 std::vector<int> bondIndices = {0}; 482 const auto smarts = MolFragmentToSmarts(*m, atomIndices, &bondIndices); 483 CHECK(smarts == "[#6]-[#6].[#6]"); 484 } 485 SECTION("SmartsFragmentFromQueryMol") { 486 auto m = "CCCC[C,N]N"_smarts; 487 std::vector<int> indices = {3, 4, 5}; 488 const auto smarts = MolFragmentToSmarts(*m, indices); 489 CHECK(smarts == "C[C,N]N"); 490 } 491 } 492 493 TEST_CASE("github #2667: MolToCXSmiles generates error for empty molecule", 494 "[bug][cxsmiles]") { 495 SECTION("basics") { 496 auto mol = ""_smiles; 497 REQUIRE(mol); 498 auto smi = MolToCXSmiles(*mol); 499 CHECK(smi == ""); 500 } 501 } 502 503 TEST_CASE("github #2604: support range-based charge queries from SMARTS", 504 "[ranges][smarts]") { 505 SECTION("positive") { 506 auto query = "[N+{0-1}]"_smarts; 507 REQUIRE(query); 508 { 509 auto m1 = "CN"_smiles; 510 REQUIRE(m1); 511 CHECK(SubstructMatch(*m1, *query).size() == 1); 512 } 513 { 514 auto m1 = "C[NH3+]"_smiles; 515 REQUIRE(m1); 516 CHECK(SubstructMatch(*m1, *query).size() == 1); 517 } 518 { 519 auto m1 = "C[NH4+2]"_smiles; 520 REQUIRE(m1); 521 CHECK(SubstructMatch(*m1, *query).empty()); 522 } 523 { 524 auto m1 = "C[NH-]"_smiles; 525 REQUIRE(m1); 526 CHECK(SubstructMatch(*m1, *query).empty()); 527 } 528 } 529 SECTION("negative") { 530 auto query = "[N-{0-1}]"_smarts; 531 REQUIRE(query); 532 { 533 auto m1 = "CN"_smiles; 534 REQUIRE(m1); 535 CHECK(SubstructMatch(*m1, *query).size() == 1); 536 } 537 { 538 auto m1 = "C[NH-]"_smiles; 539 REQUIRE(m1); 540 CHECK(SubstructMatch(*m1, *query).size() == 1); 541 } 542 { 543 auto m1 = "C[N-2]"_smiles; 544 REQUIRE(m1); 545 CHECK(SubstructMatch(*m1, *query).empty()); 546 } 547 { 548 auto m1 = "C[NH3+]"_smiles; 549 REQUIRE(m1); 550 CHECK(SubstructMatch(*m1, *query).empty()); 551 } 552 } 553 } 554 555 TEST_CASE("_smarts fails gracefully", "[smarts]") { 556 SECTION("empty") { 557 auto mol = ""_smarts; 558 REQUIRE(mol); 559 } 560 SECTION("syntax error") { 561 auto mol = "C1C"_smarts; 562 REQUIRE(!mol); 563 } 564 } 565 566 TEST_CASE( 567 "github #2801: MolToSmarts may generate invalid SMARTS for bond queries", 568 "[bug][smarts]") { 569 SECTION("original_report") { 570 auto q1 = "*~CCC"_smarts; 571 REQUIRE(q1); 572 Bond *qb = q1->getBondBetweenAtoms(0, 1); 573 BOND_EQUALS_QUERY *bq1 = makeBondOrderEqualsQuery(qb->getBondType()); 574 qb->setQuery(bq1); 575 BOND_EQUALS_QUERY *bq2 = makeBondIsInRingQuery(); 576 bq2->setNegation(true); 577 qb->expandQuery(bq2, Queries::COMPOSITE_AND, true); 578 std::string smarts = MolToSmarts(*q1); 579 CHECK(smarts == "*!@CCC"); 580 std::unique_ptr<RWMol> q2(SmartsToMol(smarts)); 581 REQUIRE(q2); 582 } 583 SECTION("composite_or") { 584 auto q1 = "*~CCC"_smarts; 585 REQUIRE(q1); 586 Bond *qb = q1->getBondBetweenAtoms(0, 1); 587 BOND_EQUALS_QUERY *bq1 = makeBondOrderEqualsQuery(qb->getBondType()); 588 qb->setQuery(bq1); 589 BOND_EQUALS_QUERY *bq2 = makeBondIsInRingQuery(); 590 bq2->setNegation(true); 591 qb->expandQuery(bq2, Queries::COMPOSITE_OR, true); 592 // this used to yield *,!@CCC 593 std::string smarts = MolToSmarts(*q1); 594 CHECK(smarts == "*!@CCC"); 595 std::unique_ptr<RWMol> q2(SmartsToMol(smarts)); 596 REQUIRE(q2); 597 } 598 SECTION("composite_lowand") { 599 auto q1 = "*~CCC"_smarts; 600 REQUIRE(q1); 601 Bond *qb = q1->getBondBetweenAtoms(0, 1); 602 BOND_EQUALS_QUERY *bq1 = makeBondOrderEqualsQuery(qb->getBondType()); 603 qb->setQuery(bq1); 604 BOND_EQUALS_QUERY *bq2 = makeBondOrderEqualsQuery(qb->getBondType()); 605 qb->expandQuery(bq2, Queries::COMPOSITE_OR, true); 606 BOND_EQUALS_QUERY *bq3 = makeBondIsInRingQuery(); 607 bq3->setNegation(true); 608 qb->expandQuery(bq3, Queries::COMPOSITE_AND, true); 609 std::string smarts = MolToSmarts(*q1); 610 CHECK(smarts == "*!@CCC"); 611 std::unique_ptr<RWMol> q2(SmartsToMol(smarts)); 612 REQUIRE(q2); 613 } 614 } 615 616 TEST_CASE("large rings", "[smarts]") { 617 auto query = "[r24]"_smarts; 618 auto m_r24 = "C1CCCCCCCCCCCCCCCCCCCCCCC1"_smiles; 619 auto m_r23 = "C1CCCCCCCCCCCCCCCCCCCCCC1"_smiles; 620 621 CHECK(SubstructMatch(*m_r23, *query).empty()); 622 CHECK(SubstructMatch(*m_r24, *query).size() == 24); 623 } 624 625 TEST_CASE("random smiles vectors", "[smiles]") { 626 auto m = "C1OCC1N(CO)(Cc1ccccc1NCCl)"_smiles; 627 REQUIRE(m); 628 SECTION("basics") { 629 std::vector<std::string> tgt = { 630 "c1cc(CN(C2COC2)CO)c(cc1)NCCl", "N(CCl)c1c(CN(C2COC2)CO)cccc1", 631 "N(CCl)c1ccccc1CN(C1COC1)CO", "OCN(Cc1ccccc1NCCl)C1COC1", 632 "C(N(C1COC1)Cc1c(cccc1)NCCl)O"}; 633 unsigned int randomSeed = 0xf00d; 634 auto smiV = MolToRandomSmilesVect(*m, 5, randomSeed); 635 CHECK(smiV == tgt); 636 } 637 SECTION("options1") { 638 std::vector<std::string> tgt = { 639 "C1-C=C(-C-N(-C2-C-O-C-2)-C-O)-C(=C-C=1)-N-C-Cl", 640 "N(-C-Cl)-C1-C(-C-N(-C2-C-O-C-2)-C-O)=C-C=C-C=1", 641 "N(-C-Cl)-C1=C-C=C-C=C-1-C-N(-C1-C-O-C-1)-C-O", 642 "O-C-N(-C-C1=C-C=C-C=C-1-N-C-Cl)-C1-C-O-C-1", 643 "C(-N(-C1-C-O-C-1)-C-C1-C(=C-C=C-C=1)-N-C-Cl)-O"}; 644 RWMol nm(*m); 645 MolOps::Kekulize(nm, true); 646 unsigned int randomSeed = 0xf00d; 647 bool isomericSmiles = true; 648 bool kekuleSmiles = true; 649 bool allBondsExplicit = true; 650 bool allHsExplicit = false; 651 auto smiV = 652 MolToRandomSmilesVect(nm, 5, randomSeed, isomericSmiles, kekuleSmiles, 653 allBondsExplicit, allHsExplicit); 654 CHECK(smiV == tgt); 655 } 656 SECTION("options2") { 657 std::vector<std::string> tgt = { 658 "[cH]1[cH][c]([CH2][N]([CH]2[CH2][O][CH2]2)[CH2][OH])[c]([cH][cH]1)[NH]" 659 "[CH2][Cl]", 660 "[NH]([CH2][Cl])[c]1[c]([CH2][N]([CH]2[CH2][O][CH2]2)[CH2][OH])[cH][cH]" 661 "[cH][cH]1", 662 "[NH]([CH2][Cl])[c]1[cH][cH][cH][cH][c]1[CH2][N]([CH]1[CH2][O][CH2]1)[" 663 "CH2][OH]", 664 "[OH][CH2][N]([CH2][c]1[cH][cH][cH][cH][c]1[NH][CH2][Cl])[CH]1[CH2][O][" 665 "CH2]1", 666 "[CH2]([N]([CH]1[CH2][O][CH2]1)[CH2][c]1[c]([cH][cH][cH][cH]1)[NH][CH2]" 667 "[Cl])[OH]"}; 668 RWMol nm(*m); 669 MolOps::Kekulize(nm, false); 670 unsigned int randomSeed = 0xf00d; 671 bool isomericSmiles = true; 672 bool kekuleSmiles = false; 673 bool allBondsExplicit = false; 674 bool allHsExplicit = true; 675 auto smiV = 676 MolToRandomSmilesVect(nm, 5, randomSeed, isomericSmiles, kekuleSmiles, 677 allBondsExplicit, allHsExplicit); 678 CHECK(smiV == tgt); 679 } 680 } 681 682 TEST_CASE( 683 "github #3197: Molecule constructed from CXSMILES cannot be translated to " 684 "SMARTS", 685 "[smarts][bug]") { 686 auto m = "C* |$;M_p$|"_smiles; 687 REQUIRE(m); 688 SECTION("smarts writing") { 689 auto smarts = MolToSmarts(*m); 690 // this will change if/when the definition of the query changes, just have 691 // to update then 692 CHECK(smarts == 693 "[#6]-[!#2&!#5&!#6&!#7&!#8&!#9&!#10&!#14&!#15&!#16&!#17&!#18&!#33&!#" 694 "34&!#35&!#36&!#52&!#53&!#54&!#85&!#86&!#1]"); 695 } 696 SECTION("serialization") { 697 std::string pkl; 698 MolPickler::pickleMol(*m, pkl, PicklerOps::PropertyPickleOptions::AllProps); 699 ROMol cpy(pkl); 700 auto osmi = MolToCXSmiles(*m); 701 CHECK(osmi == "*C |$M_p;$|"); 702 auto smi = MolToCXSmiles(cpy); 703 CHECK(smi == osmi); 704 QueryAtom *oa1 = static_cast<QueryAtom *>(m->getAtomWithIdx(1)); 705 QueryAtom *a1 = static_cast<QueryAtom *>(m->getAtomWithIdx(1)); 706 REQUIRE(oa1->hasQuery()); 707 REQUIRE(a1->hasQuery()); 708 size_t osz = 709 oa1->getQuery()->endChildren() - oa1->getQuery()->beginChildren(); 710 size_t sz = a1->getQuery()->endChildren() - a1->getQuery()->beginChildren(); 711 // we don't need to test the exact size (since that may change), but let's 712 // at least be sure it's not unreasonable: 713 CHECK(osz > 0); 714 CHECK(osz < 200); 715 CHECK(osz == sz); 716 } 717 } 718 719 TEST_CASE("d primitive in SMARTS", "[smarts][extension]") { 720 SmilesParserParams ps; 721 ps.removeHs = false; 722 std::unique_ptr<ROMol> m(SmilesToMol("[H]OCO[2H]", ps)); 723 REQUIRE(m); 724 CHECK(m->getNumAtoms() == 5); 725 SECTION("basics") { 726 auto q = "[d2]"_smarts; 727 REQUIRE(q); 728 CHECK(SubstructMatch(*m, *q).size() == 2); 729 } 730 SECTION("comparison to D") { 731 auto q = "[D2]"_smarts; 732 REQUIRE(q); 733 CHECK(SubstructMatch(*m, *q).size() == 3); 734 } 735 } 736 737 TEST_CASE( 738 "github #3342: unspecified branch bonds in SMARTS don't have aromaticity " 739 "set", 740 "[smarts][bug]") { 741 SECTION("as reported") { 742 auto m = "c1(ccccc1)"_smarts; 743 REQUIRE(m); 744 REQUIRE(m->getBondBetweenAtoms(0, 1)); 745 CHECK(m->getBondBetweenAtoms(0, 1)->getBondType() == 746 Bond::BondType::AROMATIC); 747 CHECK(m->getBondBetweenAtoms(0, 1)->getIsAromatic()); 748 } 749 } 750 751 TEST_CASE("github #3320: incorrect bond properties from CXSMILES", 752 "[cxsmiles][bug]") { 753 SECTION("as reported") { 754 auto m = "[Cl-][Pt++]1([Cl-])NCCN1C1CCCCC1 |C:6.6,3.2,0.0,2.1|"_smiles; 755 REQUIRE(m); 756 std::vector<std::pair<unsigned, unsigned>> bonds = { 757 {0, 1}, {3, 1}, {2, 1}, {6, 1}}; 758 for (const auto &pr : bonds) { 759 auto bnd = m->getBondBetweenAtoms(pr.first, pr.second); 760 REQUIRE(bnd); 761 CHECK(bnd->getBondType() == Bond::BondType::DATIVE); 762 CHECK(bnd->getBeginAtomIdx() == pr.first); 763 } 764 } 765 SECTION("as reported") { 766 auto m = "[Cl-][Pt++]1([Cl-])NCC3C2CCCCC2.N13 |C:12.12,3.2,0.0,2.1|"_smiles; 767 REQUIRE(m); 768 std::vector<std::pair<unsigned, unsigned>> bonds = { 769 {0, 1}, {3, 1}, {2, 1}, {12, 1}}; 770 for (const auto &pr : bonds) { 771 auto bnd = m->getBondBetweenAtoms(pr.first, pr.second); 772 REQUIRE(bnd); 773 CHECK(bnd->getBondType() == Bond::BondType::DATIVE); 774 CHECK(bnd->getBeginAtomIdx() == pr.first); 775 } 776 } 777 } 778 779 TEST_CASE("github #3774: MolToSmarts inverts direction of dative bond", 780 "[smarts][bug]") { 781 SECTION("as reported") { 782 { 783 auto m = "N->[Cu+]"_smiles; 784 REQUIRE(m); 785 CHECK(MolToSmarts(*m) == "[#7]->[Cu+]"); 786 CHECK(MolToSmiles(*m) == "N->[Cu+]"); 787 } 788 { 789 auto m = "N<-[Cu+]"_smiles; 790 REQUIRE(m); 791 CHECK(MolToSmarts(*m) == "[#7]<-[Cu+]"); 792 CHECK(MolToSmiles(*m) == "N<-[Cu+]"); 793 } 794 } 795 SECTION("from smarts") { 796 { 797 auto m = "N->[Cu+]"_smarts; 798 REQUIRE(m); 799 CHECK(MolToSmarts(*m) == "N->[#29&+]"); 800 } 801 { 802 auto m = "N<-[Cu+]"_smarts; 803 REQUIRE(m); 804 CHECK(MolToSmarts(*m) == "N<-[#29&+]"); 805 } 806 } 807 } 808 809 TEST_CASE("Hydrogen bonds", "[smiles]") { 810 SECTION("basics") { 811 auto m = "CC1O[H]O=C(C)C1 |H:4.3|"_smiles; 812 REQUIRE(m); 813 REQUIRE(m->getBondBetweenAtoms(3, 4)); 814 CHECK(m->getBondBetweenAtoms(3, 4)->getBondType() == 815 Bond::BondType::HYDROGEN); 816 } 817 } 818 819 TEST_CASE("Github #2788: doKekule=true should kekulize the molecule", 820 "[smiles]") { 821 SECTION("basics1") { 822 auto m = "c1ccccc1"_smiles; 823 REQUIRE(m); 824 bool doIsomeric = true; 825 bool doKekule = true; 826 CHECK(MolToSmiles(*m, doIsomeric, doKekule) == "C1=CC=CC=C1"); 827 } 828 SECTION("basics2") { 829 auto m = "c1cc[nH]c1"_smiles; 830 REQUIRE(m); 831 bool doIsomeric = true; 832 bool doKekule = true; 833 CHECK(MolToSmiles(*m, doIsomeric, doKekule) == "C1=CNC=C1"); 834 } 835 836 SECTION("can thrown exceptions") { 837 int debugParse = 0; 838 bool sanitize = false; 839 std::unique_ptr<RWMol> m{SmilesToMol("c1ccnc1", debugParse, sanitize)}; 840 REQUIRE(m); 841 bool doIsomeric = true; 842 bool doKekule = false; 843 { 844 RWMol tm(*m); 845 CHECK(MolToSmiles(tm, doIsomeric, doKekule) == "c1ccnc1"); 846 } 847 doKekule = true; 848 { 849 RWMol tm(*m); 850 CHECK_THROWS_AS(MolToSmiles(tm, doIsomeric, doKekule), KekulizeException); 851 } 852 } 853 } 854 855 TEST_CASE("bogus recursive SMARTS", "[smarts]") { 856 std::string sma = "C)foo"; 857 CHECK(SmartsToMol(sma) == nullptr); 858 } 859 860 TEST_CASE( 861 "Github #3998 MolFragmentToSmiles failing in Kekulization with " 862 "kekuleSmiles=true") { 863 auto mol = "Cc1ccccc1"_smiles; 864 REQUIRE(mol); 865 SECTION("normal") { 866 std::vector<int> ats{0}; 867 std::string smi = MolFragmentToSmiles(*mol, ats); 868 CHECK(smi == "C"); 869 } 870 SECTION("kekulized") { 871 std::vector<int> ats{0}; 872 bool doIsomericSmiles = true; 873 bool doKekule = true; 874 std::string smi = MolFragmentToSmiles(*mol, ats, nullptr, nullptr, nullptr, 875 doIsomericSmiles, doKekule); 876 CHECK(smi == "C"); 877 } 878 SECTION("including ring parts") { 879 std::vector<int> ats{0, 1, 2}; 880 bool doIsomericSmiles = true; 881 bool doKekule = true; 882 std::string smi = MolFragmentToSmiles(*mol, ats, nullptr, nullptr, nullptr, 883 doIsomericSmiles, doKekule); 884 CHECK(smi == "C:CC"); 885 } 886 } 887