1 //
2 //
3 // Copyright (C) 2018 Greg Landrum and T5 Informatics GmbH
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11
12 #include "catch.hpp"
13
14 #include <GraphMol/RDKitBase.h>
15 #include <GraphMol/SubstanceGroup.h>
16 #include <GraphMol/Chirality.h>
17 #include <GraphMol/SmilesParse/SmilesParse.h>
18 #include "GraphMol/FileParsers/FileParsers.h"
19
20 using namespace RDKit;
21
22 /* Auxiliary functions */
testIdxVector(const std::vector<unsigned int> & groupVector,const std::vector<unsigned int> & reference)23 void testIdxVector(const std::vector<unsigned int> &groupVector,
24 const std::vector<unsigned int> &reference) {
25 size_t vecSize = reference.size();
26 CHECK(groupVector.size() == vecSize);
27
28 auto sgItr = groupVector.begin();
29 for (auto refItr = reference.begin(); refItr != reference.end();
30 ++sgItr, ++refItr) {
31 CHECK(1 + *sgItr == *refItr);
32 }
33 }
34
testBrackets(const std::vector<SubstanceGroup::Bracket> & brackets,const std::vector<std::array<std::array<double,3>,3>> & reference)35 void testBrackets(
36 const std::vector<SubstanceGroup::Bracket> &brackets,
37 const std::vector<std::array<std::array<double, 3>, 3>> &reference) {
38 CHECK(brackets.size() == 2);
39 for (int i = 0; i < 2; ++i) {
40 for (int j = 0; j < 3; ++j) {
41 for (int k = 0; k < 3; ++k) {
42 CHECK(std::abs(brackets[i][j][k] - reference[i][j][k]) < 1.e-6);
43 }
44 }
45 }
46 }
47
buildSampleMolecule()48 RWMol buildSampleMolecule() {
49 // This builds a RDKit::RWMol with all implemented SubstanceGroup features in
50 // order to test them. SubstanceGroups and features probably do not make any
51 // sense.
52
53 //// Initialize Molecule ////
54 RWMol mol;
55
56 // Add some atoms and bonds
57 for (unsigned i = 0; i < 6; ++i) {
58 mol.addAtom(new Atom(6), false, true);
59
60 if (i > 0) {
61 mol.addBond(i - 1, i, Bond::SINGLE);
62 }
63 }
64
65 //// First SubstanceGroup ////
66 {
67 SubstanceGroup sg(&mol, "MUL");
68
69 sg.setProp("SUBTYPE", "BLO");
70 sg.setProp("MULT", "n");
71 sg.setProp("CONNECT", "HH");
72
73 // Add some atoms and bonds
74 for (unsigned i = 0; i < 3; ++i) {
75 sg.addAtomWithIdx(i);
76 sg.addParentAtomWithIdx(i);
77 sg.addBondWithIdx(i); // add 2 CBONDs + 1 XBOND
78 }
79
80 sg.setProp("COMPNO", 7u);
81 sg.setProp("ESTATE", "E");
82
83 SubstanceGroup::Bracket bracket1 = {{RDGeom::Point3D(1., 3., 0.),
84 RDGeom::Point3D(5., 7., 0.),
85 RDGeom::Point3D(0., 0., 0.)}};
86 sg.addBracket(bracket1);
87
88 SubstanceGroup::Bracket bracket2 = {{RDGeom::Point3D(2., 4., 0.),
89 RDGeom::Point3D(6., 8., 0.),
90 RDGeom::Point3D(0., 0., 0.)}};
91 sg.addBracket(bracket2);
92
93 // Vector should not be parsed (not a SUP group)
94 sg.addCState(2, RDGeom::Point3D());
95
96 sg.setProp("CLASS", "TEST CLASS");
97
98 sg.addAttachPoint(0, 0, "XX");
99
100 sg.setProp("BRKTYP", "PAREN");
101
102 addSubstanceGroup(mol, sg);
103 }
104 //// Second SubstanceGroup ////
105 {
106 SubstanceGroup sg(&mol, "SUP");
107
108 // Add some atoms and bonds
109 for (unsigned i = 3; i < 6; ++i) {
110 sg.addAtomWithIdx(i);
111 sg.addParentAtomWithIdx(i);
112 sg.addBondWithIdx(i - 1); // add 1 XBOND + 2 CBONDs
113 }
114
115 sg.setProp("LABEL", "TEST LABEL");
116
117 // V2000 has only x and y coords; z value restricted to 0.
118 RDGeom::Point3D vector(3., 4., 0.);
119 sg.addCState(2, vector); // Vector should be parsed now!
120
121 sg.addAttachPoint(3, -1, "YY");
122
123 addSubstanceGroup(mol, sg);
124 }
125 //// Third SubstanceGroup ////
126 {
127 SubstanceGroup sg(&mol, "DAT");
128
129 sg.setProp("FIELDNAME", "SAMPLE FIELD NAME"); // 30 char max
130 // Field Type is ignored in V3000
131 sg.setProp("FIELDINFO", "SAMPLE FIELD INFO"); // 20 char max
132 sg.setProp("QUERYTYPE", "PQ"); // 2 char max
133 sg.setProp("QUERYOP", "SAMPLE QUERY OP"); // 15 char max (rest of line)
134
135 // This should be properly formatted, but format is not checked
136 sg.setProp("FIELDDISP", "SAMPLE FIELD DISP");
137
138 STR_VECT dataFields = {"SAMPLE DATA FIELD 1", "SAMPLE DATA FIELD 2",
139 "SAMPLE DATA FIELD 3"};
140 sg.setProp("DATAFIELDS", dataFields);
141
142 addSubstanceGroup(mol, sg);
143 }
144
145 // Set a parent with higher index
146 const auto &sgroups = getSubstanceGroups(mol);
147 sgroups.at(0).setProp<unsigned int>("PARENT", 3);
148
149 return mol;
150 }
151
152 /* End Auxiliary functions */
153
154 TEST_CASE("Basic Sgroup creation", "[Sgroups]") {
155 // Create two SubstanceGroups and add them to a molecule
156 RWMol mol;
157
158 {
159 SubstanceGroup sg0(&mol, "DAT");
160 SubstanceGroup sg1(&mol, "SUP");
161 addSubstanceGroup(mol, sg0);
162 addSubstanceGroup(mol, sg1);
163 }
164
165 const auto &sgroups = getSubstanceGroups(mol);
166 REQUIRE(sgroups.size() == 2);
167 CHECK(sgroups.at(0).getProp<std::string>("TYPE") == "DAT");
168 CHECK(sgroups.at(1).getProp<std::string>("TYPE") == "SUP");
169 }
170
171 TEST_CASE("Build and test sample molecule", "[Sgroups]") {
172 auto mol = buildSampleMolecule();
173
174 const auto &sgroups = getSubstanceGroups(mol);
175 CHECK(sgroups.size() == 3);
176
177 SECTION("first sgroup") {
178 const auto &sg = sgroups.at(0);
179 CHECK(sg.getProp<std::string>("TYPE") == "MUL");
180
181 CHECK(sg.getProp<std::string>("SUBTYPE") == "BLO");
182 CHECK(sg.getProp<std::string>("MULT") == "n");
183 CHECK(sg.getProp<std::string>("CONNECT") == "HH");
184
185 std::vector<unsigned int> atoms_reference = {1, 2, 3};
186 auto atoms = sg.getAtoms();
187 testIdxVector(atoms, atoms_reference);
188
189 std::vector<unsigned int> patoms_reference = {1, 2, 3};
190 testIdxVector(sg.getParentAtoms(), patoms_reference);
191
192 std::vector<unsigned int> bonds_reference = {1, 2, 3};
193 auto bonds = sg.getBonds();
194
195 // bonds are not sorted in V3000; sort them here
196 std::sort(bonds.begin(), bonds.end());
197
198 testIdxVector(bonds, bonds_reference);
199
200 CHECK(sg.getBondType(bonds[0]) == SubstanceGroup::BondType::CBOND);
201 CHECK(sg.getBondType(bonds[1]) == SubstanceGroup::BondType::CBOND);
202 CHECK(sg.getBondType(bonds[2]) == SubstanceGroup::BondType::XBOND);
203
204 CHECK(sg.getProp<unsigned int>("COMPNO") == 7u);
205 CHECK(sg.getProp<std::string>("ESTATE") == "E");
206
207 std::vector<std::array<std::array<double, 3>, 3>> brackets_reference = {
208 {{{{1., 3., 0.}}, {{5., 7., 0.}}, {{0., 0., 0.}}}},
209 {{{{2., 4., 0.}}, {{6., 8., 0.}}, {{0., 0., 0.}}}},
210 };
211 testBrackets(sg.getBrackets(), brackets_reference);
212
213 auto cstates = sg.getCStates();
214 CHECK(cstates.size() == 1);
215 CHECK(cstates[0].bondIdx == bonds[2]);
216 CHECK(cstates[0].vector.x == 0.);
217 CHECK(cstates[0].vector.y == 0.);
218 CHECK(cstates[0].vector.z == 0.);
219
220 CHECK(sg.getProp<std::string>("CLASS") == "TEST CLASS");
221
222 auto ap = sg.getAttachPoints();
223 CHECK(ap.size() == 1);
224 CHECK(ap[0].aIdx == atoms[0]);
225 CHECK(ap[0].lvIdx == static_cast<int>(atoms[0]));
226 CHECK(ap[0].id == "XX");
227
228 CHECK(sg.getProp<std::string>("BRKTYP") == "PAREN");
229
230 CHECK(sg.getProp<unsigned int>("PARENT") == 3u);
231 }
232
233 SECTION("second sgroup") {
234 const auto &sg = sgroups.at(1);
235 CHECK(sg.getProp<std::string>("TYPE") == "SUP");
236
237 std::vector<unsigned int> atoms_reference = {4, 5, 6};
238 auto atoms = sg.getAtoms();
239 testIdxVector(atoms, atoms_reference);
240
241 std::vector<unsigned int> patoms_reference = {4, 5, 6};
242 testIdxVector(sg.getParentAtoms(), patoms_reference);
243
244 std::vector<unsigned int> bonds_reference = {3, 4, 5};
245 auto bonds = sg.getBonds();
246
247 // bonds are not sorted in V3000; sort them here
248 std::sort(bonds.begin(), bonds.end());
249
250 testIdxVector(bonds, bonds_reference);
251 CHECK(sg.getBondType(bonds[0]) == SubstanceGroup::BondType::XBOND);
252 CHECK(sg.getBondType(bonds[1]) == SubstanceGroup::BondType::CBOND);
253 CHECK(sg.getBondType(bonds[2]) == SubstanceGroup::BondType::CBOND);
254
255 CHECK(sg.getProp<std::string>("LABEL") == "TEST LABEL");
256
257 auto cstates = sg.getCStates();
258 CHECK(cstates.size() == 1);
259 CHECK(cstates[0].bondIdx == bonds[0]);
260 CHECK(cstates[0].vector.x == 3.);
261 CHECK(cstates[0].vector.y == 4.);
262 CHECK(cstates[0].vector.z == 0.);
263
264 auto ap = sg.getAttachPoints();
265 CHECK(ap.size() == 1);
266 CHECK(ap[0].aIdx == atoms[0]);
267 CHECK(ap[0].lvIdx == -1);
268 CHECK(ap[0].id == "YY");
269 }
270
271 SECTION("third sgroup") {
272 const auto &sg = sgroups.at(2);
273 CHECK(sg.getProp<std::string>("TYPE") == "DAT");
274
275 CHECK(sg.getProp<std::string>("FIELDNAME") == "SAMPLE FIELD NAME");
276 CHECK(sg.getProp<std::string>("FIELDINFO") == "SAMPLE FIELD INFO");
277 CHECK(sg.getProp<std::string>("QUERYTYPE") == "PQ");
278 CHECK(sg.getProp<std::string>("QUERYOP") == "SAMPLE QUERY OP");
279
280 CHECK(sg.getProp<std::string>("FIELDDISP") == "SAMPLE FIELD DISP");
281
282 auto dataFields = sg.getProp<STR_VECT>("DATAFIELDS");
283 CHECK(dataFields.size() == 3);
284 CHECK(dataFields[0] == "SAMPLE DATA FIELD 1");
285 CHECK(dataFields[1] == "SAMPLE DATA FIELD 2");
286 CHECK(dataFields[2] == "SAMPLE DATA FIELD 3");
287 }
288 }
289
290 TEST_CASE("Removing sgroups", "[Sgroups]") {
291 SECTION("basics") {
292 auto m1 = R"CTAB(
293 Mrv2014 07312005252D
294
295 0 0 0 0 0 999 V3000
296 M V30 BEGIN CTAB
297 M V30 COUNTS 7 6 3 0 0
298 M V30 BEGIN ATOM
299 M V30 1 * -12.75 11.5 0 0
300 M V30 2 O -11.4163 12.27 0 0
301 M V30 3 C -10.0826 11.5 0 0
302 M V30 4 C -8.749 12.27 0 0
303 M V30 5 O -10.0826 9.96 0 0
304 M V30 6 N -7.4153 11.5 0 0
305 M V30 7 C -6.0816 12.27 0 0
306 M V30 END ATOM
307 M V30 BEGIN BOND
308 M V30 1 1 1 2
309 M V30 2 1 2 3
310 M V30 3 1 3 4
311 M V30 4 2 3 5
312 M V30 5 1 4 6
313 M V30 6 1 6 7
314 M V30 END BOND
315 M V30 BEGIN SGROUP
316 M V30 1 SRU 0 ATOMS=(3 2 3 5) XBONDS=(2 1 3) BRKXYZ=(9 -9.9955 12.6173 0 -
317 M V30 -9.0715 11.0169 0 0 0 0) BRKXYZ=(9 -11.5035 11.1527 0 -12.4275 12.7531 -
318 M V30 0 0 0 0) CONNECT=HT LABEL=n
319 M V30 2 DAT 0 ATOMS=(1 6) FIELDNAME=foo_data -
320 M V30 FIELDDISP=" -7.4153 11.5000 DAU ALL 0 0" -
321 M V30 MRV_FIELDDISP=0 FIELDDATA=bar
322 M V30 3 DAT 0 ATOMS=(1 7) FIELDNAME=bar_data -
323 M V30 FIELDDISP=" -6.0816 12.2700 DAU ALL 0 0" -
324 M V30 MRV_FIELDDISP=0 FIELDDATA=baz
325 M V30 END SGROUP
326 M V30 END CTAB
327 M END
328 )CTAB"_ctab;
329 REQUIRE(m1);
330 auto &sgs = getSubstanceGroups(*m1);
331 CHECK(sgs.size() == 3);
332 sgs.erase(++sgs.begin());
333 CHECK(sgs.size() == 2);
334 CHECK(getSubstanceGroups(*m1).size() == 2);
335 auto molb = MolToV3KMolBlock(*m1);
336 CHECK(molb.find("foo_data") == std::string::npos);
337 CHECK(molb.find("M V30 2 DAT 0 ATOMS=(1 7) FIELDNAME=bar_data") !=
338 std::string::npos);
339 }
340 }
341
342 TEST_CASE(
343 "Github #3315: SubstanceGroups should not be written with quotes around "
344 "missing fields",
345 "[Sgroups][bug]") {
346 SECTION("basics") {
347 auto m1 = R"CTAB(
348 Mrv2014 07312012022D
349
350 2 1 0 0 0 0 999 V2000
351 1.4295 0.1449 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
352 1.4266 -0.6801 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
353 2 1 1 0 0 0 0
354 M STY 1 1 DAT
355 M SAL 1 1 1
356 M SDT 1 test sgroup
357 M SDD 1 0.5348 -0.3403 DRU ALL 0 0
358 M SED 1 sgroupval
359 M END
360 )CTAB"_ctab;
361 REQUIRE(m1);
362 auto molb = MolToV3KMolBlock(*m1);
363 CHECK(molb.find("FIELDINFO") == std::string::npos);
364 CHECK(molb.find("QUERYTYPE") == std::string::npos);
365 CHECK(molb.find("QUERYOP") == std::string::npos);
366 CHECK(molb.find("FIELDNAME") != std::string::npos);
367 }
368 }
369
370 TEST_CASE("Allow brackets, cstates, and attachment points to be removed",
371 "[Sgroups]") {
372 auto m1 = R"CTAB(example
373 -ISIS- 10171405052D
374
375 0 0 0 0 0 999 V3000
376 M V30 BEGIN CTAB
377 M V30 COUNTS 14 15 1 0 0
378 M V30 BEGIN ATOM
379 M V30 1 C 6.4292 -1.1916 0 0 CFG=3
380 M V30 2 C 7.0125 -0.6042 0 0
381 M V30 3 N 6.4292 -0.0250999 0 0
382 M V30 4 C 5.8416 -0.6042 0 0
383 M V30 5 C 5.8416 -1.7708 0 0
384 M V30 6 N 6.4292 -2.3584 0 0 CFG=3
385 M V30 7 C 7.0125 -1.7708 0 0
386 M V30 8 O 5.7166 -3.5875 0 0
387 M V30 9 C 5.7166 -4.4125 0 0 CFG=3
388 M V30 10 C 4.8875 -4.4125 0 0
389 M V30 11 C 6.5376 -4.4166 0 0
390 M V30 12 C 5.7166 -5.2376 0 0
391 M V30 13 C 6.4292 -3.175 0 0
392 M V30 14 O 7.1375 -3.5875 0 0
393 M V30 END ATOM
394 M V30 BEGIN BOND
395 M V30 1 1 1 2
396 M V30 2 1 2 3
397 M V30 3 1 3 4
398 M V30 4 1 4 1
399 M V30 5 1 1 5
400 M V30 6 1 5 6
401 M V30 7 1 6 7
402 M V30 8 1 7 1
403 M V30 9 1 6 13
404 M V30 10 1 8 9
405 M V30 11 1 9 10
406 M V30 12 1 9 11
407 M V30 13 1 9 12
408 M V30 14 2 13 14
409 M V30 15 1 8 13
410 M V30 END BOND
411 M V30 BEGIN SGROUP
412 M V30 1 SUP 0 ATOMS=(7 8 9 10 11 12 13 14) XBONDS=(1 9) BRKXYZ=(9 6.24 -2.9 0 -
413 M V30 6.24 -2.9 0 0 0 0) CSTATE=(4 9 0 0.82 0) LABEL=Boc SAP=(3 13 6 1)
414 M V30 END SGROUP
415 M V30 END CTAB
416 M END)CTAB"_ctab;
417
418 REQUIRE(m1);
419
420 SECTION("brackets") {
421 REQUIRE(m1);
422 auto &sgs = getSubstanceGroups(*m1);
423 REQUIRE(sgs.size() == 1);
424 CHECK(sgs[0].getBrackets().size() == 1);
425 sgs[0].clearBrackets();
426 CHECK(sgs[0].getBrackets().size() == 0);
427 }
428 SECTION("cstates") {
429 auto &sgs = getSubstanceGroups(*m1);
430 REQUIRE(sgs.size() == 1);
431 CHECK(sgs[0].getCStates().size() == 1);
432 sgs[0].clearCStates();
433 CHECK(sgs[0].getCStates().size() == 0);
434 }
435 SECTION("attachpts") {
436 auto &sgs = getSubstanceGroups(*m1);
437 REQUIRE(sgs.size() == 1);
438 CHECK(sgs[0].getAttachPoints().size() == 1);
439 sgs[0].clearAttachPoints();
440 CHECK(sgs[0].getAttachPoints().size() == 0);
441 }
442 }
443
444 TEST_CASE(
445 "GitHub Issue #4434: v2000 SGroups do not generate an \"index\" property",
446 "[Sgroups][bug]") {
447 SECTION("basics") {
448 auto v2000_mol = R"CTAB(
449 MJ211300
450
451 2 1 0 0 0 0 0 0 0 0999 V2000
452 1.4295 0.1449 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
453 1.4266 -0.6801 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
454 2 1 1 0 0 0 0
455 M STY 2 1 DAT 2 DAT
456 M SAL 1 1 1
457 M SDT 1 test sgroup 1
458 M SDD 1 0.0000 0.0000 DRU ALL 0 0
459 M SED 1 sgroupval1
460 M SAL 2 1 2
461 M SDT 2 test sgroup 2
462 M SDD 2 0.0000 0.0000 DRU ALL 0 0
463 M SED 2 sgroupval2
464 M END
465 )CTAB"_ctab;
466 auto v3000_mol = R"CTAB(
467 Mrv2113 08202115042D
468
469 0 0 0 0 0 999 V3000
470 M V30 BEGIN CTAB
471 M V30 COUNTS 2 1 2 0 0
472 M V30 BEGIN ATOM
473 M V30 1 C 2.6684 0.2705 0 0
474 M V30 2 C 2.663 -1.2695 0 0
475 M V30 END ATOM
476 M V30 BEGIN BOND
477 M V30 1 1 2 1
478 M V30 END BOND
479 M V30 BEGIN SGROUP
480 M V30 1 DAT 0 ATOMS=(1 1) FIELDNAME="test sgroup 1" -
481 M V30 FIELDDISP=" 0.0000 0.0000 DRU ALL 0 0" -
482 M V30 MRV_FIELDDISP=0 FIELDDATA=sgroupval1
483 M V30 2 DAT 0 ATOMS=(1 2) FIELDNAME="test sgroup 2" -
484 M V30 FIELDDISP=" 0.0000 0.0000 DRU ALL 0 0" -
485 M V30 MRV_FIELDDISP=0 FIELDDATA=sgroupval2
486 M V30 END SGROUP
487 M V30 END CTAB
488 M END
489 )CTAB"_ctab;
490
491 REQUIRE(v2000_mol);
492 REQUIRE(v3000_mol);
493
494 unsigned int index;
495 unsigned int count = 0;
496
497 for (const auto &sg : getSubstanceGroups(*v2000_mol)) {
498 ++count;
499 TEST_ASSERT(sg.getPropIfPresent("index", index));
500 TEST_ASSERT(index == count);
501 }
502
503 count = 0;
504 for (const auto &sg : getSubstanceGroups(*v3000_mol)) {
505 ++count;
506 TEST_ASSERT(sg.getPropIfPresent("index", index));
507 TEST_ASSERT(index == count);
508 }
509 }
510 }