1 //
2 // Copyright (C) 2002-2019 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/test.h>
11 #include <GraphMol/RDKitBase.h>
12 #include <string>
13 #include <iostream>
14 #include <fstream>
15 #include <map>
16 #include <memory>
17
18 #include "MolSupplier.h"
19 #include "MolWriters.h"
20 #include "FileParsers.h"
21 #include "FileParserUtils.h"
22 #include <RDGeneral/FileParseException.h>
23 #include <RDGeneral/BadFileException.h>
24 #include <RDGeneral/RDLog.h>
25 #include <RDStreams/streams.h>
26 #include <GraphMol/MonomerInfo.h>
27 #include <GraphMol/SmilesParse/SmilesWrite.h>
28 #include <GraphMol/SmilesParse/SmilesParse.h>
29 #include <GraphMol/Depictor/RDDepictor.h>
30
31 #include <boost/iostreams/device/file.hpp>
32 #include <boost/iostreams/filtering_stream.hpp>
33 namespace io = boost::iostreams;
34
35 using namespace RDKit;
36
testMolSup()37 int testMolSup() {
38 std::string rdbase = getenv("RDBASE");
39 std::string fname =
40 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
41
42 {
43 SDMolSupplier sdsup(fname);
44 unsigned int i = 0;
45 while (!sdsup.atEnd()) {
46 ROMol *nmol = sdsup.next();
47 if (nmol) {
48 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
49 TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
50 delete nmol;
51 }
52 i++;
53 }
54 TEST_ASSERT(i == 16);
55 }
56 {
57 SDMolSupplier sdsup(fname);
58 for (unsigned int i = 0; i < 16; ++i) {
59 ROMol *nmol = sdsup.next();
60 if (nmol) {
61 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
62 TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
63 delete nmol;
64 }
65 }
66 // test issue 3524949:
67 TEST_ASSERT(sdsup.atEnd());
68 bool ok = false;
69 try {
70 sdsup.next();
71 } catch (FileParseException &) {
72 ok = true;
73 }
74 TEST_ASSERT(ok);
75 }
76 {
77 std::ifstream strm(fname.c_str());
78 SDMolSupplier sdsup(&strm, false);
79 unsigned int i = 0;
80 while (!sdsup.atEnd()) {
81 ROMol *nmol = sdsup.next();
82 if (nmol) {
83 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
84 TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
85 delete nmol;
86 }
87 i++;
88 }
89 TEST_ASSERT(i == 16);
90 }
91 {
92 auto *strm = new std::ifstream(fname.c_str());
93 SDMolSupplier sdsup(strm, true);
94 unsigned int i = 0;
95 while (!sdsup.atEnd()) {
96 ROMol *nmol = sdsup.next();
97 if (nmol) {
98 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
99 TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
100 delete nmol;
101 }
102 i++;
103 }
104 TEST_ASSERT(i == 16);
105 }
106 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
107 { // Test reading properties
108 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/props_test.mae";
109
110 MaeMolSupplier maesup(fname);
111 std::unique_ptr<ROMol> nmol(maesup.next());
112 TEST_ASSERT(nmol);
113
114 // Test mol properties
115 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
116 TEST_ASSERT(nmol->hasProp("b_sd_chiral_flag"));
117 TEST_ASSERT(nmol->getProp<bool>("b_sd_chiral_flag") == false);
118 TEST_ASSERT(nmol->hasProp("i_sd_NSC"));
119 TEST_ASSERT(nmol->getProp<int>("i_sd_NSC") == 48);
120 TEST_ASSERT(nmol->hasProp("s_m_entry_name"));
121 TEST_ASSERT(nmol->getProp<std::string>("s_m_entry_name") ==
122 "NCI_aids_few.1");
123 TEST_ASSERT(nmol->hasProp("r_f3d_dummy"));
124 TEST_ASSERT(std::abs(nmol->getProp<double>("r_f3d_dummy") - 42.123) <
125 0.0001);
126
127 // Test atom properties
128 TEST_ASSERT(nmol->getNumAtoms() == 19);
129 for (int i = 0; i < 19; ++i) {
130 const auto *atom = nmol->getAtomWithIdx(i);
131
132 // The integer property is present for all atoms
133 TEST_ASSERT(atom->hasProp("i_m_minimize_atom_index"));
134 TEST_ASSERT(atom->getProp<int>("i_m_minimize_atom_index") == 1 + i);
135
136 // The bool property is only defined for i < 10
137 if (i < 10) {
138 TEST_ASSERT(atom->hasProp("b_m_dummy"));
139 TEST_ASSERT(atom->getProp<bool>("b_m_dummy") ==
140 static_cast<bool>(i % 2));
141 } else {
142 TEST_ASSERT(!atom->hasProp("b_m_dummy"));
143 }
144
145 // The real property is only defined for i >= 10
146 if (i >= 10) {
147 TEST_ASSERT(atom->hasProp("r_f3d_dummy"));
148 TEST_ASSERT(std::abs(atom->getProp<double>("r_f3d_dummy") -
149 (19.1 - i)) < 0.0001);
150 } else {
151 TEST_ASSERT(!atom->hasProp("r_f3d_dummy"));
152 }
153
154 // All atoms have the string prop
155 TEST_ASSERT(atom->hasProp("s_m_dummy"));
156 TEST_ASSERT(atom->getProp<std::string>("s_m_dummy") ==
157 std::to_string(19 - i));
158 }
159
160 TEST_ASSERT(maesup.atEnd());
161 }
162 { // Test parsing stereo properties. Mol is 2D and has stereo labels.
163 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/stereochem.mae";
164 MaeMolSupplier maesup(fname);
165
166 { // Stereo bonds. These get overwritten by the double bond detection.
167 std::unique_ptr<ROMol> nmol(maesup.next());
168 TEST_ASSERT(nmol);
169 {
170 Bond *bnd = nmol->getBondWithIdx(1);
171 TEST_ASSERT(bnd);
172 TEST_ASSERT(bnd->getStereoAtoms() == INT_VECT({0, 3}));
173 TEST_ASSERT(bnd->getStereo() == Bond::STEREOTRANS);
174 }
175 {
176 Bond *bnd = nmol->getBondWithIdx(3);
177 TEST_ASSERT(bnd);
178 TEST_ASSERT(bnd->getStereoAtoms() == INT_VECT({2, 5}));
179 TEST_ASSERT(bnd->getStereo() == Bond::STEREOCIS);
180 }
181 }
182 { // Chiralities (these get CIP codes)
183 std::unique_ptr<ROMol> nmol(maesup.next());
184 TEST_ASSERT(nmol);
185 {
186 Atom *at = nmol->getAtomWithIdx(1);
187 TEST_ASSERT(at);
188 TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
189 TEST_ASSERT(at->getProp<std::string>(common_properties::_CIPCode) ==
190 "R");
191 }
192 {
193 Atom *at = nmol->getAtomWithIdx(3);
194 TEST_ASSERT(at);
195 TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
196 TEST_ASSERT(at->getProp<std::string>(common_properties::_CIPCode) ==
197 "S");
198 }
199 }
200 { // Pseudochiralities (no CIP codes)
201 std::unique_ptr<ROMol> nmol(maesup.next());
202 TEST_ASSERT(nmol);
203 {
204 Atom *at = nmol->getAtomWithIdx(2);
205 TEST_ASSERT(at);
206 TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW);
207 TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
208 }
209 {
210 Atom *at = nmol->getAtomWithIdx(5);
211 TEST_ASSERT(at);
212 TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW);
213 TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
214 }
215 }
216 { // intentionally bad chirality label, intended to
217 // make sure we can step over parse errors
218 std::unique_ptr<ROMol> nmol;
219 try {
220 nmol.reset(maesup.next());
221 } catch (const Invar::Invariant &) {
222 // just ignore this failure
223 }
224 TEST_ASSERT(!nmol);
225 }
226 { // "Undefined" chirality label
227 std::unique_ptr<ROMol> nmol(maesup.next());
228 TEST_ASSERT(nmol);
229 {
230 Atom *at = nmol->getAtomWithIdx(2);
231 TEST_ASSERT(at);
232 TEST_ASSERT(at->getChiralTag() == Atom::CHI_UNSPECIFIED);
233 TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
234 }
235 {
236 Atom *at = nmol->getAtomWithIdx(5);
237 TEST_ASSERT(at);
238 TEST_ASSERT(at->getChiralTag() == Atom::CHI_UNSPECIFIED);
239 TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
240 }
241 }
242 TEST_ASSERT(maesup.atEnd());
243 }
244 { // Test loop reading
245 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.mae";
246 MaeMolSupplier maesup(fname);
247 std::shared_ptr<ROMol> nmol;
248 for (unsigned int i = 0; i < 16; ++i) {
249 nmol.reset(maesup.next());
250 if (nmol) {
251 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
252 TEST_ASSERT(nmol->getNumAtoms() > 0);
253 if (i == 0) {
254 auto smiles = MolToSmiles(*nmol);
255 TEST_ASSERT(smiles ==
256 "CCC1=[O+][Cu@]2([O+]=C(CC)CC(CC)=[O+]2)[O+]=C(CC)C1");
257 }
258 }
259 }
260 TEST_ASSERT(maesup.atEnd());
261 bool ok = false;
262 try {
263 maesup.next();
264 } catch (FileParseException &) {
265 ok = true;
266 }
267 TEST_ASSERT(ok);
268 }
269
270 {
271 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/bad_ppty.mae";
272 const std::string err_msg_substr = "Bad format for property";
273
274 bool ok = false;
275 std::unique_ptr<ROMol> mol;
276 MaeMolSupplier maesup(fname);
277
278 // This is in excess: there are only 3 mols in the file, and the second one
279 // has an invalid property name, so it won't be read
280 for (unsigned int i = 0; i < 5; ++i) {
281 try {
282 mol.reset(maesup.next());
283 } catch (const FileParseException &e) {
284 const std::string err_msg(e.what());
285 TEST_ASSERT(i == 1);
286 TEST_ASSERT(err_msg.find(err_msg_substr) != std::string::npos);
287 ok = true;
288 break;
289 }
290 TEST_ASSERT(mol);
291 TEST_ASSERT(mol->hasProp(common_properties::_Name));
292 TEST_ASSERT(mol->getNumAtoms() == 1);
293 TEST_ASSERT(!maesup.atEnd());
294 }
295 TEST_ASSERT(!maesup.atEnd());
296 TEST_ASSERT(ok);
297 }
298
299 { // Test Maestro PDB property reading
300 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/1kv1.maegz";
301 auto *strm = new gzstream(fname);
302 MaeMolSupplier maesup(strm);
303
304 std::shared_ptr<ROMol> nmol;
305 nmol.reset(maesup.next());
306 const Atom *atom = nmol->getAtomWithIdx(0);
307 auto *info = (AtomPDBResidueInfo *)(atom->getMonomerInfo());
308 TEST_ASSERT(info->getResidueName() == "ARG ");
309 TEST_ASSERT(info->getChainId() == "A");
310 TEST_ASSERT(info->getResidueNumber() == 5);
311 }
312 #endif // RDK_BUILD_MAEPARSER_SUPPORT
313 return 1;
314 }
315
testRandMolSup()316 void testRandMolSup() {
317 std::string rdbase = getenv("RDBASE");
318 std::string fname =
319 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
320 // std::string fname("../test_data/NCI_aids_few.sdf");
321 SDMolSupplier sdsup(fname);
322
323 ROMol *tmol = sdsup[7];
324 delete tmol;
325
326 CHECK_INVARIANT(sdsup.length() == 16, "");
327
328 STR_VECT names;
329 names.push_back(std::string("48"));
330 names.push_back(std::string("128"));
331 names.push_back(std::string("164"));
332 names.push_back(std::string("180"));
333 names.push_back(std::string("192"));
334 names.push_back(std::string("210"));
335 names.push_back(std::string("213"));
336 names.push_back(std::string("229"));
337
338 int i;
339 for (i = 0; i < 8; i++) {
340 ROMol *mol = sdsup[2 * i];
341 std::string mname;
342 mol->getProp(common_properties::_Name, mname);
343 CHECK_INVARIANT(mname == names[i], "");
344 delete mol;
345 }
346
347 // get a random molecule
348 ROMol *mol = sdsup[5];
349 TEST_ASSERT(mol);
350 std::string mname;
351 mol->getProp(common_properties::_Name, mname);
352 delete mol;
353 CHECK_INVARIANT(mname == "170", "");
354
355 // get the last molecule:
356 mol = sdsup[15];
357 TEST_ASSERT(mol);
358 delete mol;
359
360 // and make sure we're at the end:
361 TEST_ASSERT(sdsup.atEnd());
362 // now make sure we can grab earlier mols (was sf.net issue 1904170):
363 mol = sdsup[0];
364 TEST_ASSERT(mol);
365 delete mol;
366
367 // Issue 113: calling length before grabbing a molecule results in crashes:
368 SDMolSupplier sdsup2(fname);
369 CHECK_INVARIANT(sdsup2.length() == 16, "");
370 }
371
testSmilesSup()372 void testSmilesSup() {
373 std::string mname;
374 std::string fname;
375 ROMol *mol;
376
377 std::string rdbase = getenv("RDBASE");
378 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.2.csv";
379 {
380 SmilesMolSupplier nSup2(fname, ",", 1, 0, true);
381 TEST_ASSERT(nSup2.length() == 10);
382 }
383 {
384 SmilesMolSupplier nSup2(fname, ",", 1, 0, true);
385
386 mol = nSup2[3];
387 TEST_ASSERT(!nSup2.atEnd())
388 TEST_ASSERT(nSup2.length() == 10);
389
390 mol->getProp(common_properties::_Name, mname);
391 CHECK_INVARIANT(mname == "4", "");
392 mol->getProp("TPSA", mname);
393 CHECK_INVARIANT(mname == "82.78", "");
394 delete mol;
395
396 mol = nSup2[9];
397 TEST_ASSERT(mol);
398 delete mol;
399 // now make sure we can grab earlier mols (was sf.net issue 1904170):
400 mol = nSup2[0];
401 TEST_ASSERT(mol);
402 delete mol;
403 }
404 {
405 std::ifstream strm(fname.c_str(), std::ios_base::binary);
406 SmilesMolSupplier nSup2(&strm, false, ",", 1, 0, true);
407
408 mol = nSup2[3];
409 CHECK_INVARIANT(nSup2.length() == 10, "");
410
411 mol->getProp(common_properties::_Name, mname);
412 CHECK_INVARIANT(mname == "4", "");
413 mol->getProp("TPSA", mname);
414 CHECK_INVARIANT(mname == "82.78", "");
415 delete mol;
416
417 mol = nSup2[9];
418 TEST_ASSERT(mol);
419 delete mol;
420 // now make sure we can grab earlier mols (was sf.net issue 1904170):
421 mol = nSup2[0];
422 TEST_ASSERT(mol);
423 delete mol;
424 }
425
426 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/first_200.tpsa.csv";
427 SmilesMolSupplier smiSup(fname, ",", 0, -1);
428
429 mol = smiSup[16];
430
431 mol->getProp("TPSA", mname);
432 CHECK_INVARIANT(mname == "46.25", "");
433 delete mol;
434
435 mol = smiSup[8];
436 mol->getProp("TPSA", mname);
437 CHECK_INVARIANT(mname == "65.18", "");
438 delete mol;
439
440 int len = smiSup.length();
441 CHECK_INVARIANT(len == 200, "");
442
443 smiSup.reset();
444 int i = 0;
445 mol = smiSup.next();
446 while (1) {
447 std::string mname;
448 mol->getProp(common_properties::_Name, mname);
449 i++;
450 delete mol;
451 try {
452 mol = smiSup.next();
453 } catch (FileParseException &) {
454 break;
455 }
456 }
457
458 CHECK_INVARIANT(i == 200, "");
459
460 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
461 SmilesMolSupplier *nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
462
463 // check the length before we read anything out...
464 // this was a problem at one point (Issue 113)
465 CHECK_INVARIANT(nSup->length() == 10, "");
466 mol = (*nSup)[3];
467
468 mol->getProp(common_properties::_Name, mname);
469 CHECK_INVARIANT(mname == "4", "");
470 mol->getProp("Column_2", mname);
471 CHECK_INVARIANT(mname == "82.78", "");
472
473 delete nSup;
474 nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
475 unsigned int nRead = 0;
476 while (!nSup->atEnd()) {
477 delete mol;
478 mol = nSup->next();
479 TEST_ASSERT(mol);
480 nRead++;
481 }
482 TEST_ASSERT(nSup->length() == 10);
483 TEST_ASSERT(nRead == 10);
484
485 delete nSup;
486 delete mol;
487 }
488
testSmilesSupFromText()489 void testSmilesSupFromText() {
490 std::string mname;
491 std::string fname;
492 ROMol *mol;
493
494 SmilesMolSupplier nSup2;
495 std::string text;
496 bool failed;
497 int nAts;
498
499 // this was a delightful boundary condition:
500 BOOST_LOG(rdErrorLog)
501 << "------------------------------------------------------" << std::endl;
502 text =
503 "CC\n"
504 "CCC\n"
505 "CCOC\n"
506 "CCCCOC";
507 {
508 nSup2.setData(text, " ", 0, -1, false, true);
509 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
510 mol = nSup2.next();
511 nAts = mol->getNumAtoms();
512 delete mol;
513 TEST_ASSERT(nAts == 2);
514
515 mol = nSup2[3];
516 nAts = mol->getNumAtoms();
517 delete mol;
518 TEST_ASSERT(nAts == 6);
519 TEST_ASSERT(nSup2.length() == 4);
520
521 failed = false;
522 try {
523 mol = nSup2[4];
524 delete mol;
525 } catch (FileParseException &) {
526 failed = true;
527 }
528 TEST_ASSERT(failed);
529 mol = nSup2[2];
530 nAts = mol->getNumAtoms();
531 TEST_ASSERT(nAts == 4);
532 TEST_ASSERT(mol->hasProp(common_properties::_Name));
533 mol->getProp(common_properties::_Name, mname);
534 TEST_ASSERT(mname == "2");
535 delete mol;
536 }
537 {
538 nSup2.setData(text, " ", 0, -1, false, true);
539 mol = nSup2[2];
540 TEST_ASSERT(mol);
541 nAts = mol->getNumAtoms();
542 TEST_ASSERT(nAts == 4);
543 TEST_ASSERT(mol->hasProp(common_properties::_Name));
544 mol->getProp(common_properties::_Name, mname);
545 TEST_ASSERT(mname == "2");
546 delete mol;
547
548 mol = nSup2[3];
549 TEST_ASSERT(mol);
550 nAts = mol->getNumAtoms();
551 TEST_ASSERT(nAts == 6);
552 TEST_ASSERT(mol->hasProp(common_properties::_Name));
553 mol->getProp(common_properties::_Name, mname);
554 TEST_ASSERT(mname == "3");
555 delete mol;
556 }
557 {
558 nSup2.setData(text, " ", 0, -1, false, true);
559 mol = nSup2[3];
560 TEST_ASSERT(mol);
561 nAts = mol->getNumAtoms();
562 TEST_ASSERT(nAts == 6);
563 TEST_ASSERT(mol->hasProp(common_properties::_Name));
564 mol->getProp(common_properties::_Name, mname);
565 TEST_ASSERT(mname == "3");
566
567 delete mol;
568 mol = nSup2[2];
569 TEST_ASSERT(mol);
570 nAts = mol->getNumAtoms();
571 TEST_ASSERT(nAts == 4);
572 TEST_ASSERT(mol->hasProp(common_properties::_Name));
573 mol->getProp(common_properties::_Name, mname);
574 TEST_ASSERT(mname == "2");
575 delete mol;
576 }
577 // --------------
578 // basics:
579 text =
580 "Id SMILES Column_2\n"
581 "mol-1 C 1.0\n"
582 "mol-2 CC 4.0\n"
583 "mol-3 CCC 9.0\n"
584 "mol-4 CCCC 16.0\n";
585 #if 1
586 nSup2.setData(text, " ", 1, 0, true, true);
587 mol = nSup2[3];
588 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
589 CHECK_INVARIANT(nSup2.length() == 4, "");
590 mol->getProp(common_properties::_Name, mname);
591 TEST_ASSERT(mname == "mol-4");
592 mol->getProp("Column_2", mname);
593 TEST_ASSERT(mname == "16.0");
594 delete mol;
595
596 // ensure that we can call setData a second time:
597 text =
598 "Id SMILES Column_2\n"
599 "mol-1 C 1.0\n"
600 "mol-2 CC 4.0\n"
601 "mol-3 CCC 9.0\n";
602 nSup2.setData(text, " ", 1, 0, true, true);
603 CHECK_INVARIANT(nSup2.length() == 3, "");
604 mol = nSup2[2];
605 mol->getProp(common_properties::_Name, mname);
606 TEST_ASSERT(mname == "mol-3");
607 mol->getProp("Column_2", mname);
608 TEST_ASSERT(mname == "9.0");
609 delete mol;
610
611 // now test for failure handling:
612 text =
613 "Id SMILES Column_2\n"
614 "mol-1 C 1.0\n"
615 "mol-2 CC 4.0\n"
616 "mol-3 fail 9.0\n"
617 "mol-4 CCCC 16.0\n";
618 nSup2.setData(text, " ", 1, 0, true, true);
619 mol = nSup2[3];
620 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
621 TEST_ASSERT(nSup2.length() == 4);
622 mol->getProp(common_properties::_Name, mname);
623 TEST_ASSERT(mname == "mol-4");
624 mol->getProp("Column_2", mname);
625 TEST_ASSERT(mname == "16.0");
626 delete mol;
627
628 // failures should give null molecules:
629 mol = nSup2[2];
630 TEST_ASSERT(!mol);
631 delete mol;
632 #endif
633
634 // issue 114, no \n at EOF:
635 text =
636 "Id SMILES Column_2\n"
637 "mol-1 C 1.0\n"
638 "mol-2 CC 4.0\n"
639 "mol-4 CCCC 16.0\n";
640 nSup2.setData(text, " ", 1, 0, true, true);
641 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
642 TEST_ASSERT(nSup2.length() == 3);
643 mol = nSup2[2];
644 TEST_ASSERT(mol);
645 mol->getProp(common_properties::_Name, mname);
646 TEST_ASSERT(mname == "mol-4");
647 mol->getProp("Column_2", mname);
648 TEST_ASSERT(mname == "16.0");
649 TEST_ASSERT(nSup2.atEnd());
650 delete mol;
651
652 text =
653 "Id SMILES Column_2\n"
654 "mol-1 C 1.0\n"
655 "mol-2 CC 4.0\n"
656 "mol-4 CCCC 16.0";
657 nSup2.setData(text, " ", 1, 0, true, true);
658 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
659 TEST_ASSERT(nSup2.length() == 3);
660 mol = nSup2[2];
661 TEST_ASSERT(mol);
662 mol->getProp(common_properties::_Name, mname);
663 TEST_ASSERT(mname == "mol-4");
664 mol->getProp("Column_2", mname);
665 TEST_ASSERT(mname == "16.0");
666 TEST_ASSERT(nSup2.atEnd());
667 delete mol;
668
669 try {
670 mol = nSup2[3];
671 delete mol;
672 } catch (FileParseException &) {
673 failed = true;
674 }
675 TEST_ASSERT(failed);
676
677 text =
678 "mol-1 C 1.0\n"
679 "mol-2 CC 4.0\n"
680 "mol-4 CCCC 16.0";
681 nSup2.setData(text, " ", 1, 0, false, true);
682 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
683 TEST_ASSERT(nSup2.length() == 3);
684 mol = nSup2[2];
685 TEST_ASSERT(mol);
686 mol->getProp(common_properties::_Name, mname);
687 TEST_ASSERT(mname == "mol-4");
688 mol->getProp("Column_2", mname);
689 TEST_ASSERT(mname == "16.0");
690 delete mol;
691
692 text =
693 "C\n"
694 "CC\n"
695 "CCCC";
696 nSup2.setData(text, " ", 0, -1, false, true);
697 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
698 TEST_ASSERT(nSup2.length() == 3);
699 mol = nSup2[2];
700 TEST_ASSERT(mol);
701 TEST_ASSERT(mol->getNumAtoms() == 4);
702 delete mol;
703
704 // this was a delightful boundary condition:
705 BOOST_LOG(rdErrorLog)
706 << "------------------------------------------------------" << std::endl;
707 text =
708 "CC\n"
709 "CCC\n"
710 "CCOC\n"
711 "CCCCOC";
712 nSup2.setData(text, " ", 0, -1, false, true);
713 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
714 mol = nSup2.next();
715 delete mol;
716
717 mol = nSup2[3];
718 TEST_ASSERT(nSup2.length() == 4);
719 delete mol;
720
721 failed = false;
722 try {
723 mol = nSup2[4];
724 delete mol;
725 } catch (FileParseException &) {
726 failed = true;
727 }
728 TEST_ASSERT(failed);
729
730 BOOST_LOG(rdErrorLog)
731 << "------------------------------------------------------" << std::endl;
732 // this was a delightful boundary condition:
733 text =
734 "CC\n"
735 "CCC\n"
736 "CCOC\n"
737 "CCCCOC";
738 nSup2.setData(text, " ", 0, -1, false, true);
739 // BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
740 failed = false;
741 try {
742 mol = nSup2[4];
743 delete mol;
744 } catch (FileParseException &) {
745 failed = true;
746 }
747 TEST_ASSERT(failed);
748 BOOST_LOG(rdErrorLog) << ">>> This may result in an infinite loop. It "
749 "should finish almost immediately:"
750 << std::endl;
751 TEST_ASSERT(nSup2.length() == 4);
752 BOOST_LOG(rdErrorLog) << "<<< done." << std::endl;
753
754 nSup2.reset();
755 unsigned int nDone = 0;
756 while (!nSup2.atEnd()) {
757 mol = nSup2.next();
758 nDone++;
759 delete mol;
760 }
761 TEST_ASSERT(nDone == nSup2.length());
762
763 // ensure that we can call setData a second time:
764 text =
765 "Id SMILES Column_2\n"
766 "# comment, ignore\n"
767 "mol-1 C 1.0\n"
768 "mol-2 CC 4.0\n"
769 "mol-3 CCC 9.0\n"
770 "mol-4 CCCC 16.0\n";
771 nSup2.setData(text, " ", 1, 0, true, true);
772 mol = nSup2[2];
773 mol->getProp(common_properties::_Name, mname);
774 TEST_ASSERT(mname == "mol-3");
775 mol->getProp("Column_2", mname);
776 TEST_ASSERT(mname == "9.0");
777 delete mol;
778
779 mol = nSup2[1];
780 mol->getProp(common_properties::_Name, mname);
781 TEST_ASSERT(mname == "mol-2");
782 mol->getProp("Column_2", mname);
783 TEST_ASSERT(mname == "4.0");
784 delete mol;
785
786 // this was a delightful boundary condition:
787 text =
788 "CC\n"
789 "CCC\n"
790 "CCOC\n"
791 "CCCCOC\n"
792 "\n"
793 "\n";
794 nSup2.setData(text, " ", 0, -1, false, true);
795 TEST_ASSERT(nSup2.length() == 4);
796 nSup2.reset();
797 nDone = 0;
798 while (!nSup2.atEnd()) {
799 mol = nSup2.next();
800 nDone++;
801 delete mol;
802 }
803 TEST_ASSERT(nDone == nSup2.length());
804 };
805
testSmilesWriter()806 void testSmilesWriter() {
807 std::string rdbase = getenv("RDBASE");
808 std::string fname =
809 rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
810 // std::string fname = "../test_data/fewSmi.csv";
811 SmilesMolSupplier *nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
812 std::string oname =
813 rdbase + "/Code/GraphMol/FileParsers/test_data/outSmiles_molsupplier.csv";
814 // std::string oname = "../test_data/outSmiles.csv";
815
816 STR_VECT propNames;
817 propNames.push_back(std::string("Column_2"));
818 SmilesWriter *writer = new SmilesWriter(oname, " ");
819 writer->setProps(propNames);
820
821 STR_VECT names;
822 STR_VECT props;
823 ROMol *mol = nSup->next();
824 // BOOST_LOG(rdErrorLog) << "WRITING" << std::endl;
825 while (mol) {
826 // BOOST_LOG(rdErrorLog) << "MOL: " << MolToSmiles(*mol) << std::endl;
827 std::string mname, pval;
828 mol->getProp(common_properties::_Name, mname);
829 mol->getProp("Column_2", pval);
830 names.push_back(mname);
831 props.push_back(pval);
832 writer->write(*mol);
833 delete mol;
834 try {
835 mol = nSup->next();
836 } catch (FileParseException &) {
837 break;
838 }
839 }
840 writer->flush();
841 delete nSup;
842
843 // now read the molecules back in a check if we have the same properties etc
844 nSup = new SmilesMolSupplier(oname);
845 int i = 0;
846 mol = nSup->next();
847 while (mol) {
848 std::string mname, pval;
849 mol->getProp(common_properties::_Name, mname);
850 mol->getProp("Column_2", pval);
851 CHECK_INVARIANT(mname == names[i], "");
852 CHECK_INVARIANT(pval == props[i], "");
853 i++;
854 delete mol;
855 try {
856 mol = nSup->next();
857 } catch (FileParseException &) {
858 break;
859 }
860 }
861 TEST_ASSERT(nSup->length() == writer->numMols());
862 writer->close();
863 delete writer;
864 delete nSup;
865 }
866
testSDWriter()867 void testSDWriter() {
868 std::string rdbase = getenv("RDBASE");
869 std::string fname =
870 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
871 SDMolSupplier sdsup(fname);
872
873 std::string ofile =
874 rdbase +
875 "/Code/GraphMol/FileParsers/test_data/outNCI_few_molsupplier.sdf";
876
877 auto *writer = new SDWriter(ofile);
878
879 STR_VECT names;
880
881 while (!sdsup.atEnd()) {
882 ROMol *mol = sdsup.next();
883 std::string mname;
884 mol->getProp(common_properties::_Name, mname);
885 names.push_back(mname);
886
887 writer->write(*mol);
888 delete mol;
889 }
890 writer->flush();
891 CHECK_INVARIANT(writer->numMols() == 16, "");
892 writer->close();
893 delete writer;
894
895 // now read in the file we just finished writing
896
897 SDMolSupplier reader(ofile);
898 int i = 0;
899 while (!reader.atEnd()) {
900 ROMol *mol = reader.next();
901 std::string mname;
902 mol->getProp(common_properties::_Name, mname);
903 BOOST_LOG(rdInfoLog) << mname << "\n";
904 // CHECK_INVARIANT(mname == names[i], "");
905
906 delete mol;
907 i++;
908 }
909
910 BOOST_LOG(rdInfoLog) << i << "\n";
911 /*
912 // now read in a file with aromatic information on the bonds
913 std::string infile = rdbase +
914 "/Code/GraphMol/FileParsers/test_data/outNCI_arom.sdf";
915 SDMolSupplier nreader(infile);
916 i = 0;
917 while (!nreader.atEnd()) {
918 ROMol *mol = nreader.next();
919 std::string mname;
920 mol->getProp(common_properties::_Name, mname);
921 BOOST_LOG(rdInfoLog) << mname << "\n";
922 //CHECK_INVARIANT(mname == names[i], "");
923 i++;
924
925 delete mol;
926 }*/
927 }
928
testSDSupplierEnding()929 void testSDSupplierEnding() {
930 std::string rdbase = getenv("RDBASE");
931 // test the SD supplier to check if it properly handle the end of sd file
932 // conditions
933 // should work fine if the sd file end with a $$$$ follwed by blank line or
934 // no
935 // no blank lines
936 std::string infile =
937 rdbase + "/Code/GraphMol/FileParsers/test_data/esters_end.sdf";
938 int i = 0;
939 SDMolSupplier reader(infile);
940 while (!reader.atEnd()) {
941 ROMol *mol = reader.next();
942 std::string mname;
943 mol->getProp(common_properties::_Name, mname);
944 i++;
945 delete mol;
946 }
947 CHECK_INVARIANT(i == 6, "");
948 }
949
testSuppliersEmptyFile()950 void testSuppliersEmptyFile() {
951 std::string rdbase = getenv("RDBASE");
952 { // contains no records
953 std::string infile =
954 rdbase + "/Code/GraphMol/FileParsers/test_data/empty.sdf";
955 SDMolSupplier reader(infile);
956 TEST_ASSERT(reader.atEnd());
957 }
958 {
959 std::string infile =
960 rdbase + "/Code/GraphMol/FileParsers/test_data/empty.smi";
961 SmilesMolSupplier smiSup(infile, ",", 0, -1);
962 TEST_ASSERT(smiSup.atEnd());
963 }
964 // tests for GitHub issue 19:
965 { // actually an empty file, throws an exception:
966 std::string infile =
967 rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
968 bool failed = false;
969 try {
970 SDMolSupplier reader(infile);
971 } catch (BadFileException &) {
972 failed = true;
973 }
974 TEST_ASSERT(failed);
975 }
976 {
977 SDMolSupplier reader;
978 reader.setData("");
979 TEST_ASSERT(reader.atEnd());
980 bool failed = false;
981 try {
982 reader[0];
983 } catch (FileParseException &) {
984 failed = true;
985 }
986 TEST_ASSERT(failed);
987 TEST_ASSERT(reader.length() == 0);
988 }
989 {
990 SDMolSupplier reader;
991 reader.setData("");
992 bool failed = false;
993 try {
994 reader[0];
995 } catch (FileParseException &) {
996 failed = true;
997 }
998 TEST_ASSERT(failed);
999 TEST_ASSERT(reader.length() == 0);
1000 }
1001 {
1002 SDMolSupplier reader;
1003 reader.setData("");
1004 TEST_ASSERT(reader.length() == 0);
1005 }
1006 }
1007
testCisTrans()1008 void testCisTrans() {
1009 std::string text;
1010 text =
1011 "mol-1 ClC(C)=C(Br)C\n"
1012 "mol-2 C1=COC=CC1C(Cl)=C(Br)C\n"
1013 "mol-3 C1=COC=CC1\\C(Cl)=C(Br)\\C";
1014 SmilesMolSupplier smiSup;
1015 smiSup.setData(text, " ", 1, 0, false, true);
1016
1017 std::string ofile = "cisTrans_molsupplier.sdf";
1018 SDWriter writer(ofile);
1019 while (!smiSup.atEnd()) {
1020 ROMol *mol = smiSup.next();
1021 TEST_ASSERT(mol);
1022 RDDepict::compute2DCoords(*mol);
1023 writer.write(*mol);
1024 delete mol;
1025 }
1026 writer.close();
1027 // do the round t;est
1028 // parse the sd file and write it out to smiles
1029
1030 SDMolSupplier *reader;
1031 try {
1032 reader = new SDMolSupplier("cisTrans_molsupplier.sdf");
1033 } catch (FileParseException &) {
1034 reader = nullptr;
1035 }
1036 TEST_ASSERT(reader);
1037 while (!reader->atEnd()) {
1038 ROMol *mol = reader->next();
1039 std::string mname;
1040 mol->getProp(common_properties::_Name, mname);
1041 BOOST_LOG(rdInfoLog) << mname << " ";
1042 BOOST_LOG(rdInfoLog) << MolToSmiles(*mol, 1) << "\n";
1043 delete mol;
1044 }
1045 delete reader;
1046 }
1047
testStereoRound()1048 void testStereoRound() {
1049 // - we will read ina bunch of cdk2 smiles with stereo on them
1050 // - generate the canonical smiles for each one
1051 // - generate 2D coordinates, write to an sdf file
1052 // - read the sdf file back in and compare the canonical smiles
1053 std::string rdbase = getenv("RDBASE");
1054 std::string infile =
1055 rdbase + "/Code/GraphMol/FileParsers/test_data/cdk2_stereo.csv";
1056 SmilesMolSupplier *smiSup;
1057 try {
1058 smiSup = new SmilesMolSupplier(infile, ",", 0, 1, false, true);
1059 } catch (FileParseException &) {
1060 smiSup = nullptr;
1061 }
1062 TEST_ASSERT(smiSup)
1063 std::map<std::string, std::string> nameSmi;
1064 std::string ofile =
1065 rdbase +
1066 "/Code/GraphMol/FileParsers/test_data/cdk2_stereo_molsupplier.sdf";
1067 auto *writer = new SDWriter(ofile);
1068 int count = 0;
1069
1070 while (!smiSup->atEnd()) {
1071 ROMol *mol = smiSup->next();
1072 // mol->debugMol(std::cout);
1073 std::string mname;
1074 mol->getProp(common_properties::_Name, mname);
1075 nameSmi[mname] = MolToSmiles(*mol, 1);
1076
1077 ROMol *nmol = SmilesToMol(nameSmi[mname]);
1078 // nmol->debugMol(std::cout);
1079
1080 std::string nsmi = MolToSmiles(*nmol, 1);
1081 // BOOST_LOG(rdErrorLog) << mname << "\n";
1082 if (nameSmi[mname] != nsmi) {
1083 BOOST_LOG(rdInfoLog) << mname << " " << nameSmi[mname] << " " << nsmi
1084 << "\n";
1085 }
1086 RDDepict::compute2DCoords(*mol);
1087 writer->write(*mol);
1088 count++;
1089 delete mol;
1090 delete nmol;
1091
1092 if (count % 50 == 0) {
1093 BOOST_LOG(rdInfoLog) << count << " " << mname << "\n";
1094 }
1095 }
1096 writer->close();
1097 delete smiSup;
1098 delete writer;
1099
1100 // now read the SD file back in check if the canonical smiles are the same
1101 SDMolSupplier *reader;
1102 try {
1103 reader = new SDMolSupplier(ofile);
1104 } catch (FileParseException &) {
1105 reader = nullptr;
1106 }
1107 TEST_ASSERT(reader);
1108 count = 0;
1109
1110 while (!reader->atEnd()) {
1111 ROMol *mol = reader->next();
1112 // mol->debugMol(std::cout);
1113 std::string smiles = MolToSmiles(*mol, 1);
1114 std::string mname;
1115 mol->getProp(common_properties::_Name, mname);
1116 if (nameSmi[mname] != smiles) {
1117 BOOST_LOG(rdInfoLog) << mname << " " << nameSmi[mname] << " " << smiles
1118 << "\n";
1119 }
1120 delete mol;
1121 count++;
1122 }
1123 delete reader;
1124 }
1125
testIssue226()1126 void testIssue226() {
1127 std::string rdbase = getenv("RDBASE");
1128 std::string fname =
1129 rdbase + "/Code/GraphMol/FileParsers/test_data/Issue226.sdf";
1130 SDMolSupplier sdsup(fname);
1131
1132 ROMol *mol;
1133
1134 mol = sdsup.next();
1135 TEST_ASSERT(mol);
1136 TEST_ASSERT(mol->hasProp("E1"));
1137 TEST_ASSERT(mol->hasProp("E2"));
1138 delete mol;
1139
1140 mol = sdsup.next();
1141 TEST_ASSERT(mol);
1142 TEST_ASSERT(mol->hasProp("E1"));
1143 TEST_ASSERT(mol->hasProp("E2"));
1144 delete mol;
1145 }
1146
testTDTSupplier1()1147 int testTDTSupplier1() {
1148 std::string rdbase = getenv("RDBASE");
1149 std::string fname =
1150 rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
1151 {
1152 TDTMolSupplier suppl(fname, "PN");
1153 unsigned int i = 0;
1154 while (!suppl.atEnd()) {
1155 ROMol *nmol = suppl.next();
1156 if (nmol) {
1157 std::string prop1, prop2;
1158 TEST_ASSERT(nmol->getNumAtoms() > 0);
1159 TEST_ASSERT(nmol->hasProp("PN"));
1160 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1161 TEST_ASSERT(nmol->hasProp("MFCD"));
1162
1163 nmol->getProp("PN", prop1);
1164 nmol->getProp(common_properties::_Name, prop2);
1165 TEST_ASSERT(prop1 == prop2);
1166
1167 // we didn't ask for 2D conformers, so there should be a property 2D:
1168 TEST_ASSERT(nmol->hasProp(common_properties::TWOD));
1169 // and no conformer:
1170 TEST_ASSERT(!nmol->getNumConformers());
1171
1172 delete nmol;
1173 i++;
1174 }
1175 }
1176 TEST_ASSERT(i == 10);
1177 }
1178 {
1179 std::ifstream strm(fname.c_str(), std::ios_base::binary);
1180 TDTMolSupplier suppl(&strm, false, "PN");
1181 unsigned int i = 0;
1182 while (!suppl.atEnd()) {
1183 ROMol *nmol = suppl.next();
1184 if (nmol) {
1185 std::string prop1, prop2;
1186 TEST_ASSERT(nmol->getNumAtoms() > 0);
1187 TEST_ASSERT(nmol->hasProp("PN"));
1188 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1189 TEST_ASSERT(nmol->hasProp("MFCD"));
1190
1191 nmol->getProp("PN", prop1);
1192 nmol->getProp(common_properties::_Name, prop2);
1193 TEST_ASSERT(prop1 == prop2);
1194
1195 // we didn't ask for 2D conformers, so there should be a property 2D:
1196 TEST_ASSERT(nmol->hasProp(common_properties::TWOD));
1197 // and no conformer:
1198 TEST_ASSERT(!nmol->getNumConformers());
1199
1200 delete nmol;
1201 i++;
1202 }
1203 }
1204 TEST_ASSERT(i == 10);
1205 }
1206 return 1;
1207 }
testTDTSupplier2()1208 int testTDTSupplier2() {
1209 std::string rdbase = getenv("RDBASE");
1210 std::string fname =
1211 rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
1212 int i;
1213 std::string prop1, prop2;
1214
1215 TDTMolSupplier suppl(fname, "PN", 2);
1216 i = 0;
1217 while (!suppl.atEnd()) {
1218 ROMol *nmol = suppl.next();
1219 if (nmol) {
1220 TEST_ASSERT(nmol->getNumAtoms() > 0);
1221 TEST_ASSERT(nmol->hasProp("PN"));
1222 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1223 TEST_ASSERT(nmol->hasProp("MFCD"));
1224
1225 nmol->getProp("PN", prop1);
1226 nmol->getProp(common_properties::_Name, prop2);
1227 TEST_ASSERT(prop1 == prop2);
1228
1229 // we asked for 2D conformers, so there should be no property 2D:
1230 TEST_ASSERT(!nmol->hasProp(common_properties::TWOD));
1231 // and a conformer:
1232 TEST_ASSERT(nmol->getNumConformers() == 1);
1233 // with id "2":
1234 TEST_ASSERT(nmol->beginConformers()->get()->getId() == 2);
1235
1236 delete nmol;
1237 i++;
1238 }
1239 }
1240 TEST_ASSERT(i == 10);
1241 return 1;
1242 }
testTDTSupplier3()1243 int testTDTSupplier3() {
1244 std::string data;
1245 int i;
1246 std::string prop1, prop2;
1247
1248 TDTMolSupplier suppl;
1249
1250 data =
1251 "$SMI<Cc1nnc(N)nc1C>\n"
1252 "CAS<17584-12-2>\n"
1253 "|\n"
1254 "$SMI<Cc1n[nH]c(=O)nc1N>\n"
1255 "CAS<~>\n"
1256 "|\n"
1257 "$SMI<Cc1n[nH]c(=O)[nH]c1=O>\n"
1258 "CAS<932-53-6>\n"
1259 "|\n"
1260 "$SMI<Cc1nnc(NN)nc1O>\n"
1261 "CAS<~>\n"
1262 "|\n";
1263 suppl.setData(data, "CAS");
1264
1265 i = 0;
1266 while (!suppl.atEnd()) {
1267 ROMol *nmol = suppl.next();
1268 if (nmol) {
1269 TEST_ASSERT(nmol->getNumAtoms() > 0);
1270 TEST_ASSERT(nmol->hasProp("CAS"));
1271 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1272
1273 nmol->getProp("CAS", prop1);
1274 nmol->getProp(common_properties::_Name, prop2);
1275 TEST_ASSERT(prop1 == prop2);
1276
1277 // no conformers should have been read:
1278 TEST_ASSERT(nmol->getNumConformers() == 0);
1279
1280 delete nmol;
1281 i++;
1282 }
1283 }
1284 TEST_ASSERT(i == 4);
1285 TEST_ASSERT(suppl.length() == 4);
1286
1287 // now make sure we can grab earlier mols (was sf.net issue 1904170):
1288 ROMol *mol = suppl[0];
1289 TEST_ASSERT(mol);
1290 delete mol;
1291
1292 // make sure we can reset the supplier and still process it properly;
1293 suppl.setData(data, "CAS");
1294
1295 i = 0;
1296 while (!suppl.atEnd()) {
1297 ROMol *nmol = suppl.next();
1298 if (nmol) {
1299 TEST_ASSERT(nmol->getNumAtoms() > 0);
1300 TEST_ASSERT(nmol->hasProp("CAS"));
1301 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1302
1303 nmol->getProp("CAS", prop1);
1304 nmol->getProp(common_properties::_Name, prop2);
1305 TEST_ASSERT(prop1 == prop2);
1306
1307 // no conformers should have been read:
1308 TEST_ASSERT(nmol->getNumConformers() == 0);
1309
1310 delete nmol;
1311 i++;
1312 }
1313 }
1314 TEST_ASSERT(i == 4);
1315
1316 return 1;
1317 }
1318
testSDSupplierFromText()1319 void testSDSupplierFromText() {
1320 std::string text;
1321 int i = 0;
1322 SDMolSupplier reader;
1323
1324 text =
1325 "Structure1\n"
1326 "csChFnd70/05230312262D\n"
1327 "\n"
1328 " 5 4 0 0 0 0 0 0 0 0999 V2000\n"
1329 " 1.2124 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1330 " 2.4249 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1331 " 3.6373 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1332 " 2.4249 2.1000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1333 " 0.0000 0.7000 0.0000 Y 0 0 0 0 0 0 0 0 0 0 0 0\n"
1334 " 1 2 1 0 0 0 0\n"
1335 " 2 3 1 0 0 0 0\n"
1336 " 2 4 2 0 0 0 0\n"
1337 " 1 5 1 0 0 0 0\n"
1338 "M END\n"
1339 "> <ID> (3)\n"
1340 "Lig1\n"
1341 "\n"
1342 "$$$$\n"
1343 "Structure1\n"
1344 "csChFnd70/05230312262D\n"
1345 "\n"
1346 " 6 5 0 0 0 0 0 0 0 0999 V2000\n"
1347 " 1.2124 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1348 " 2.4249 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1349 " 3.6373 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1350 " 2.4249 2.1000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1351 " 0.0000 0.7000 0.0000 Y 0 0 0 0 0 0 0 0 0 0 0 0\n"
1352 " 4.8477 0.6988 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1353 " 1 2 1 0 0 0 0\n"
1354 " 2 3 1 0 0 0 0\n"
1355 " 2 4 2 0 0 0 0\n"
1356 " 1 5 1 0 0 0 0\n"
1357 " 3 6 1 0 0 0 0\n"
1358 "M END\n"
1359 "> <ID> (4)\n"
1360 "Lig2\n"
1361 "\n"
1362 "$$$$\n";
1363 reader.setData(text);
1364
1365 i = 0;
1366 while (!reader.atEnd()) {
1367 ROMol *mol = reader.next();
1368 std::string mname;
1369 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1370 TEST_ASSERT(mol->hasProp("ID"));
1371 i++;
1372 delete mol;
1373 }
1374 TEST_ASSERT(i == 2);
1375 }
1376
testSDSupplierFromTextStrLax1()1377 void testSDSupplierFromTextStrLax1() {
1378 std::string text;
1379 text =
1380 "Structure1\n"
1381 "csChFnd70/05230312262D\n"
1382 "\n"
1383 " 5 4 0 0 0 0 0 0 0 0999 V2000\n"
1384 " 1.2124 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1385 " 2.4249 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1386 " 3.6373 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1387 " 2.4249 2.1000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1388 " 0.0000 0.7000 0.0000 Y 0 0 0 0 0 0 0 0 0 0 0 0\n"
1389 " 1 2 1 0 0 0 0\n"
1390 " 2 3 1 0 0 0 0\n"
1391 " 2 4 2 0 0 0 0\n"
1392 " 1 5 1 0 0 0 0\n"
1393 "M END\n"
1394 "blah\n"
1395 "\n"
1396 "blah after blank line\n"
1397 "> <ID> (3)\n"
1398 "Lig1\n"
1399 "\n"
1400 "This will be ignored\n"
1401 "> <ANOTHER_PROPERTY> (4)\n"
1402 "Value\n"
1403 "\n"
1404 "$$$$\n"
1405 "Structure1\n"
1406 "csChFnd70/05230312262D\n"
1407 "\n"
1408 " 6 5 0 0 0 0 0 0 0 0999 V2000\n"
1409 " 1.2124 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1410 " 2.4249 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1411 " 3.6373 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1412 " 2.4249 2.1000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1413 " 0.0000 0.7000 0.0000 Y 0 0 0 0 0 0 0 0 0 0 0 0\n"
1414 " 4.8477 0.6988 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1415 " 1 2 1 0 0 0 0\n"
1416 " 2 3 1 0 0 0 0\n"
1417 " 2 4 2 0 0 0 0\n"
1418 " 1 5 1 0 0 0 0\n"
1419 " 3 6 1 0 0 0 0\n"
1420 "M END\n"
1421 "> <ID> (4)\n"
1422 "Lig2\n"
1423 "\n"
1424 "This will be ignored\n"
1425 "\n"
1426 "> <ANOTHER_PROPERTY> (4)\n"
1427 "Value\n"
1428 "\n"
1429 "This will be ignored\n"
1430 "\n"
1431 "$$$$\n";
1432
1433 // strict
1434 {
1435 SDMolSupplier reader;
1436
1437 reader.setData(text, true, true, true);
1438
1439 int i = 0;
1440 while (!reader.atEnd()) {
1441 ROMol *mol = reader.next();
1442 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1443 if (i == 0) {
1444 TEST_ASSERT(!mol->hasProp("ID"));
1445 }
1446 TEST_ASSERT(!mol->hasProp("ANOTHER_PROPERTY"));
1447 i++;
1448 delete mol;
1449 }
1450 TEST_ASSERT(i == 2);
1451 }
1452 // lax
1453 {
1454 SDMolSupplier reader;
1455
1456 reader.setData(text, true, true, false);
1457
1458 int i = 0;
1459 while (!reader.atEnd()) {
1460 ROMol *mol = reader.next();
1461 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1462 TEST_ASSERT(mol->hasProp("ID"));
1463 TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1464 i++;
1465 delete mol;
1466 }
1467 TEST_ASSERT(i == 2);
1468 }
1469 }
1470
testSDSupplierFromTextStrLax2()1471 void testSDSupplierFromTextStrLax2() {
1472 std::string text;
1473 text =
1474 "Structure1\n"
1475 "csChFnd70/05230312262D\n"
1476 "\n"
1477 " 5 4 0 0 0 0 0 0 0 0999 V2000\n"
1478 " 1.2124 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1479 " 2.4249 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1480 " 3.6373 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1481 " 2.4249 2.1000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1482 " 0.0000 0.7000 0.0000 Y 0 0 0 0 0 0 0 0 0 0 0 0\n"
1483 " 1 2 1 0 0 0 0\n"
1484 " 2 3 1 0 0 0 0\n"
1485 " 2 4 2 0 0 0 0\n"
1486 " 1 5 1 0 0 0 0\n"
1487 "M END\n"
1488 "> <ID> (3)\n"
1489 "Lig1\n"
1490 "\n"
1491 "> <ANOTHER_PROPERTY> (4)\n"
1492 "No blank line before dollars\n"
1493 "$$$$\n"
1494 "Structure1\n"
1495 "csChFnd70/05230312262D\n"
1496 "\n"
1497 " 6 5 0 0 0 0 0 0 0 0999 V2000\n"
1498 " 1.2124 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1499 " 2.4249 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1500 " 3.6373 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1501 " 2.4249 2.1000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
1502 " 0.0000 0.7000 0.0000 Y 0 0 0 0 0 0 0 0 0 0 0 0\n"
1503 " 4.8477 0.6988 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
1504 " 1 2 1 0 0 0 0\n"
1505 " 2 3 1 0 0 0 0\n"
1506 " 2 4 2 0 0 0 0\n"
1507 " 1 5 1 0 0 0 0\n"
1508 " 3 6 1 0 0 0 0\n"
1509 "M END\n"
1510 "> <ID> (3)\n"
1511 "Lig2\n"
1512 "\n"
1513 "> <ANOTHER_PROPERTY> (4)\n"
1514 "Value2\n"
1515 "\n"
1516 "$$$$\n";
1517
1518 // strict
1519 {
1520 SDMolSupplier reader;
1521
1522 reader.setData(text, true, true, true);
1523
1524 int i = 0;
1525 while (!reader.atEnd()) {
1526 ROMol *mol = reader.next();
1527 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1528 TEST_ASSERT(mol->hasProp("ID"));
1529 TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1530 std::string s;
1531 mol->getProp("ID", s);
1532 TEST_ASSERT(s == "Lig1");
1533 mol->getProp("ANOTHER_PROPERTY", s);
1534 TEST_ASSERT(s ==
1535 "No blank line before dollars\n"
1536 "$$$$\n"
1537 "Structure1\n"
1538 "csChFnd70/05230312262D");
1539 i++;
1540 delete mol;
1541 }
1542 TEST_ASSERT(i == 1);
1543 }
1544 // lax
1545 {
1546 SDMolSupplier reader;
1547
1548 reader.setData(text, true, true, false);
1549
1550 int i = 0;
1551 while (!reader.atEnd()) {
1552 ROMol *mol = reader.next();
1553 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1554 TEST_ASSERT(mol->hasProp("ID"));
1555 TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1556 std::string s;
1557 mol->getProp("ID", s);
1558 TEST_ASSERT(s == "Lig2");
1559 mol->getProp("ANOTHER_PROPERTY", s);
1560 TEST_ASSERT(s == "Value2");
1561 i++;
1562 delete mol;
1563 }
1564 TEST_ASSERT(i == 1);
1565 }
1566 }
1567
testSDSupplierStrLax1()1568 void testSDSupplierStrLax1() {
1569 std::string rdbase = getenv("RDBASE");
1570 std::string fname =
1571 rdbase + "/Code/GraphMol/FileParsers/test_data/strictLax1.sdf";
1572 // strict
1573 {
1574 SDMolSupplier reader(fname, true, true, true);
1575
1576 int i = 0;
1577 while (!reader.atEnd()) {
1578 ROMol *mol = reader.next();
1579 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1580 if (i == 0) {
1581 TEST_ASSERT(!mol->hasProp("ID"));
1582 }
1583 TEST_ASSERT(!mol->hasProp("ANOTHER_PROPERTY"));
1584 i++;
1585 delete mol;
1586 }
1587 TEST_ASSERT(i == 2);
1588 }
1589 // lax
1590 {
1591 SDMolSupplier reader(fname, true, true, false);
1592
1593 int i = 0;
1594 while (!reader.atEnd()) {
1595 ROMol *mol = reader.next();
1596 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1597 TEST_ASSERT(mol->hasProp("ID"));
1598 TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1599 i++;
1600 delete mol;
1601 }
1602 TEST_ASSERT(i == 2);
1603 }
1604 }
1605
testSDSupplierStrLax2()1606 void testSDSupplierStrLax2() {
1607 std::string rdbase = getenv("RDBASE");
1608 std::string fname =
1609 rdbase + "/Code/GraphMol/FileParsers/test_data/strictLax2.sdf";
1610 // strict
1611 {
1612 SDMolSupplier reader(fname, true, true, true);
1613
1614 int i = 0;
1615 while (!reader.atEnd()) {
1616 ROMol *mol = reader.next();
1617 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1618 TEST_ASSERT(mol->hasProp("ID"));
1619 TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1620 std::string s;
1621 mol->getProp("ID", s);
1622 TEST_ASSERT(s == "Lig1");
1623 mol->getProp("ANOTHER_PROPERTY", s);
1624 TEST_ASSERT(s ==
1625 "No blank line before dollars\n"
1626 "$$$$\n"
1627 "Structure1\n"
1628 "csChFnd70/05230312262D");
1629 i++;
1630 delete mol;
1631 }
1632 TEST_ASSERT(i == 1);
1633 }
1634 // lax
1635 {
1636 SDMolSupplier reader(fname, true, true, false);
1637
1638 int i = 0;
1639 while (!reader.atEnd()) {
1640 ROMol *mol = reader.next();
1641 TEST_ASSERT(mol->hasProp(common_properties::_Name));
1642 TEST_ASSERT(mol->hasProp("ID"));
1643 TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1644 std::string s;
1645 mol->getProp("ID", s);
1646 TEST_ASSERT(s == "Lig2");
1647 mol->getProp("ANOTHER_PROPERTY", s);
1648 TEST_ASSERT(s == "Value2");
1649 i++;
1650 delete mol;
1651 }
1652 TEST_ASSERT(i == 1);
1653 }
1654 }
1655
testIssue265()1656 void testIssue265() {
1657 std::string rdbase = getenv("RDBASE");
1658 std::string fname =
1659 rdbase + "/Code/GraphMol/FileParsers/test_data/NotThere.sdf";
1660 bool ok = false;
1661 try {
1662 SDMolSupplier reader(fname);
1663 ok = false;
1664 } catch (BadFileException &) {
1665 ok = true;
1666 }
1667 TEST_ASSERT(ok);
1668
1669 try {
1670 SmilesMolSupplier reader(fname);
1671 ok = false;
1672 } catch (BadFileException &) {
1673 ok = true;
1674 }
1675 TEST_ASSERT(ok);
1676
1677 try {
1678 TDTMolSupplier reader(fname);
1679 ok = false;
1680 } catch (BadFileException &) {
1681 ok = true;
1682 }
1683 TEST_ASSERT(ok);
1684 }
1685
testSDErrorHandling()1686 void testSDErrorHandling() {
1687 std::string rdbase = getenv("RDBASE");
1688 std::string fname =
1689 rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors1.sdf";
1690 SDMolSupplier *sdsup;
1691 ROMol *nmol = nullptr;
1692
1693 // entry 1: bad properties
1694 sdsup = new SDMolSupplier(fname);
1695 TEST_ASSERT(!sdsup->atEnd());
1696 nmol = sdsup->next();
1697 TEST_ASSERT(nmol);
1698 TEST_ASSERT(!nmol->hasProp("ID"));
1699 delete sdsup;
1700 delete nmol;
1701
1702 // case 2: can't be sanitized
1703 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors2.sdf";
1704 sdsup = new SDMolSupplier(fname);
1705 TEST_ASSERT(!sdsup->atEnd());
1706 nmol = sdsup->next();
1707 TEST_ASSERT(!nmol);
1708 TEST_ASSERT(sdsup->atEnd());
1709 delete sdsup;
1710 delete nmol;
1711
1712 // entry 3: bad number of atoms
1713 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors3.sdf";
1714 sdsup = new SDMolSupplier(fname);
1715 TEST_ASSERT(!sdsup->atEnd());
1716 nmol = sdsup->next();
1717 TEST_ASSERT(!nmol);
1718 TEST_ASSERT(sdsup->atEnd());
1719 delete sdsup;
1720 delete nmol;
1721
1722 // entry 4: bad number of bonds
1723 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors4.sdf";
1724 sdsup = new SDMolSupplier(fname);
1725 TEST_ASSERT(!sdsup->atEnd());
1726 nmol = sdsup->next();
1727 TEST_ASSERT(!nmol);
1728 TEST_ASSERT(sdsup->atEnd());
1729 delete sdsup;
1730 delete nmol;
1731 }
1732
testIssue381()1733 void testIssue381() {
1734 std::string rdbase = getenv("RDBASE");
1735 std::string fname =
1736 rdbase + "/Code/GraphMol/FileParsers/test_data/Issue381.sdf";
1737 SDMolSupplier *sdsup;
1738
1739 ROMol *nmol = nullptr;
1740 int count;
1741
1742 // entry 1: bad properties
1743 sdsup = new SDMolSupplier(fname);
1744 TEST_ASSERT(!sdsup->atEnd());
1745 count = 0;
1746 while (!sdsup->atEnd()) {
1747 nmol = sdsup->next();
1748 if (nmol) {
1749 delete nmol;
1750 }
1751 count++;
1752 }
1753 TEST_ASSERT(sdsup->atEnd());
1754 TEST_ASSERT(count == 9);
1755
1756 TEST_ASSERT(sdsup->length() == 9);
1757
1758 delete sdsup;
1759 }
1760
testSetStreamIndices()1761 void testSetStreamIndices() {
1762 std::string rdbase = getenv("RDBASE");
1763 std::string fname =
1764 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
1765 std::ifstream ifs(fname.c_str(), std::ios_base::binary);
1766 std::vector<std::streampos> indices;
1767 bool addIndex = true;
1768 bool notEof = true;
1769 std::streampos pos = 0;
1770 std::string line;
1771 while (notEof) {
1772 if (addIndex) {
1773 pos = ifs.tellg();
1774 }
1775 notEof = (std::getline(ifs, line) ? true : false);
1776 if (notEof) {
1777 if (addIndex) {
1778 indices.push_back(pos);
1779 }
1780 addIndex = (line.substr(0, 4) == "$$$$");
1781 }
1782 }
1783 ifs.close();
1784 SDMolSupplier *sdsup;
1785
1786 ROMol *nmol = nullptr;
1787 int count;
1788
1789 sdsup = new SDMolSupplier(fname);
1790 sdsup->setStreamIndices(indices);
1791 TEST_ASSERT(!sdsup->atEnd());
1792 TEST_ASSERT(sdsup->length() == 16);
1793
1794 count = 0;
1795 while (!sdsup->atEnd()) {
1796 nmol = sdsup->next();
1797 if (nmol) {
1798 delete nmol;
1799 }
1800 count++;
1801 }
1802 TEST_ASSERT(sdsup->atEnd());
1803 TEST_ASSERT(count == 16);
1804
1805 TEST_ASSERT(sdsup->length() == 16);
1806
1807 delete sdsup;
1808 }
1809
testMixIterAndRandom()1810 int testMixIterAndRandom() {
1811 std::string rdbase = getenv("RDBASE");
1812 std::string fname =
1813 rdbase + "/Code/GraphMol/FileParsers/test_data/esters.sdf";
1814 bool ok;
1815
1816 SDMolSupplier *sdsup;
1817 ROMol *mol;
1818 std::string name;
1819
1820 sdsup = new SDMolSupplier(fname);
1821 TEST_ASSERT(sdsup);
1822 unsigned int i = 0;
1823 while (!sdsup->atEnd()) {
1824 mol = sdsup->next();
1825 if (mol) {
1826 TEST_ASSERT(mol->hasProp("ID"));
1827 delete mol;
1828 }
1829 i++;
1830 }
1831 TEST_ASSERT(i == 6);
1832 TEST_ASSERT(sdsup->length() == 6);
1833
1834 delete sdsup;
1835 sdsup = new SDMolSupplier(fname);
1836 TEST_ASSERT(sdsup);
1837 TEST_ASSERT(sdsup->length() == 6);
1838
1839 mol = sdsup->next();
1840 TEST_ASSERT(mol);
1841 TEST_ASSERT(mol->hasProp("ID"));
1842 mol->getProp("ID", name);
1843 TEST_ASSERT(name == "Lig1");
1844 delete mol;
1845
1846 mol = (*sdsup)[0];
1847 TEST_ASSERT(mol);
1848 TEST_ASSERT(mol->hasProp("ID"));
1849 mol->getProp("ID", name);
1850 TEST_ASSERT(name == "Lig1");
1851 delete mol;
1852
1853 sdsup->reset();
1854 mol = sdsup->next();
1855 TEST_ASSERT(mol);
1856 TEST_ASSERT(mol->hasProp("ID"));
1857 mol->getProp("ID", name);
1858 TEST_ASSERT(name == "Lig1");
1859 delete mol;
1860 mol = sdsup->next();
1861 TEST_ASSERT(mol);
1862 TEST_ASSERT(mol->hasProp("ID"));
1863 mol->getProp("ID", name);
1864 TEST_ASSERT(name == "Lig2");
1865 delete mol;
1866 delete sdsup;
1867
1868 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
1869 SmilesMolSupplier *nSup;
1870 nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1871 TEST_ASSERT(nSup);
1872 TEST_ASSERT(nSup->length() == 10);
1873 mol = (*nSup)[0];
1874 TEST_ASSERT(mol);
1875 TEST_ASSERT(mol->getNumAtoms() == 9);
1876 delete mol;
1877 delete nSup;
1878
1879 nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1880 TEST_ASSERT(nSup);
1881 mol = (*nSup)[0];
1882 TEST_ASSERT(mol);
1883 TEST_ASSERT(mol->getNumAtoms() == 9);
1884 TEST_ASSERT(nSup->length() == 10);
1885 delete mol;
1886 delete nSup;
1887
1888 nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1889 TEST_ASSERT(nSup);
1890 mol = nSup->next();
1891 TEST_ASSERT(mol);
1892 TEST_ASSERT(mol->getNumAtoms() == 9);
1893 TEST_ASSERT(nSup->length() == 10);
1894 delete mol;
1895 mol = (*nSup)[0];
1896 TEST_ASSERT(mol);
1897 TEST_ASSERT(mol->getNumAtoms() == 9);
1898 TEST_ASSERT(nSup->length() == 10);
1899 delete mol;
1900 mol = nSup->next();
1901 TEST_ASSERT(mol);
1902 TEST_ASSERT(mol->getNumAtoms() == 20);
1903 delete nSup;
1904 delete mol;
1905
1906 nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1907 TEST_ASSERT(nSup);
1908 mol = nullptr;
1909 try {
1910 mol = (*nSup)[20];
1911 ok = false;
1912 } catch (FileParseException &) {
1913 ok = true;
1914 }
1915 TEST_ASSERT(ok);
1916 delete nSup;
1917
1918 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
1919 TDTMolSupplier *tSup;
1920 tSup = new TDTMolSupplier(fname);
1921 TEST_ASSERT(tSup);
1922 TEST_ASSERT(tSup->length() == 10);
1923 mol = (*tSup)[0];
1924 TEST_ASSERT(mol);
1925 TEST_ASSERT(mol->getNumAtoms() == 9);
1926 delete mol;
1927 delete tSup;
1928
1929 tSup = new TDTMolSupplier(fname);
1930 TEST_ASSERT(tSup);
1931 mol = (*tSup)[0];
1932 TEST_ASSERT(mol);
1933 TEST_ASSERT(mol->getNumAtoms() == 9);
1934 TEST_ASSERT(tSup->length() == 10);
1935 delete mol;
1936 delete tSup;
1937
1938 tSup = new TDTMolSupplier(fname);
1939 TEST_ASSERT(tSup);
1940 mol = tSup->next();
1941 TEST_ASSERT(mol);
1942 TEST_ASSERT(mol->getNumAtoms() == 9);
1943 TEST_ASSERT(tSup->length() == 10);
1944 delete mol;
1945
1946 mol = (*tSup)[0];
1947 TEST_ASSERT(mol);
1948 TEST_ASSERT(mol->getNumAtoms() == 9);
1949 TEST_ASSERT(tSup->length() == 10);
1950 delete mol;
1951
1952 mol = tSup->next();
1953 TEST_ASSERT(mol);
1954 delete mol;
1955
1956 mol = tSup->next();
1957 TEST_ASSERT(mol);
1958 delete mol;
1959
1960 mol = tSup->next();
1961 TEST_ASSERT(mol);
1962 TEST_ASSERT(mol->getNumAtoms() == 10);
1963 delete tSup;
1964 delete mol;
1965
1966 tSup = new TDTMolSupplier(fname);
1967 TEST_ASSERT(tSup);
1968 mol = nullptr;
1969 try {
1970 mol = (*tSup)[20];
1971 delete mol;
1972 ok = false;
1973 } catch (FileParseException &) {
1974 ok = true;
1975 }
1976 TEST_ASSERT(ok);
1977 delete tSup;
1978
1979 return 1;
1980 }
1981
testRemoveHs()1982 int testRemoveHs() {
1983 std::string rdbase = getenv("RDBASE");
1984 std::string fname =
1985 rdbase + "/Code/GraphMol/FileParsers/test_data/withHs.sdf";
1986
1987 SDMolSupplier sdsup(fname);
1988 ROMol *nmol;
1989
1990 nmol = sdsup.next();
1991 TEST_ASSERT(nmol);
1992 TEST_ASSERT(nmol->getNumAtoms() == 23);
1993 delete nmol;
1994 nmol = sdsup.next();
1995 TEST_ASSERT(nmol);
1996 TEST_ASSERT(nmol->getNumAtoms() == 28);
1997 delete nmol;
1998
1999 std::cerr << "build:" << std::endl;
2000 SDMolSupplier sdsup2(fname, true, false);
2001 nmol = sdsup2.next();
2002 TEST_ASSERT(nmol);
2003 // std::cerr<<" count: "<<nmol->getNumAtoms()<<std::endl;
2004 TEST_ASSERT(nmol->getNumAtoms() == 39);
2005 delete nmol;
2006 nmol = sdsup2.next();
2007 TEST_ASSERT(nmol);
2008 TEST_ASSERT(nmol->getNumAtoms() == 30);
2009 delete nmol;
2010
2011 return 1;
2012 }
2013
testGetItemText()2014 void testGetItemText() {
2015 std::string rdbase = getenv("RDBASE");
2016 std::string fname;
2017
2018 ROMol *mol1, *mol2;
2019 std::string molB, smiles;
2020 bool ok;
2021
2022 {
2023 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2024 SDMolSupplier sdsup(fname);
2025 TEST_ASSERT(sdsup.length() == 16);
2026
2027 molB = sdsup.getItemText(0);
2028 mol1 = sdsup[0];
2029 TEST_ASSERT(mol1);
2030 mol2 = MolBlockToMol(molB);
2031 TEST_ASSERT(mol2);
2032 TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2033 delete mol1;
2034 delete mol2;
2035
2036 // make sure getItemText() doesn't screw up the current position:
2037 molB = sdsup.getItemText(10);
2038 mol1 = sdsup.next();
2039 molB = sdsup.getItemText(1);
2040 TEST_ASSERT(mol1);
2041 mol2 = MolBlockToMol(molB);
2042 TEST_ASSERT(mol2);
2043 TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2044 delete mol1;
2045 delete mol2;
2046
2047 // make sure getItemText() works on the last molecule
2048 // (this was sf.net issue 1874882
2049 molB = sdsup.getItemText(15);
2050 mol1 = sdsup[15];
2051 mol2 = MolBlockToMol(molB);
2052 TEST_ASSERT(mol2);
2053 TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2054 delete mol1;
2055 delete mol2;
2056
2057 try {
2058 molB = sdsup.getItemText(16);
2059 ok = false;
2060 } catch (FileParseException &) {
2061 ok = true;
2062 }
2063 TEST_ASSERT(ok);
2064
2065 try {
2066 molB = sdsup.getItemText(20);
2067 ok = false;
2068 } catch (FileParseException &) {
2069 ok = true;
2070 }
2071 TEST_ASSERT(ok);
2072 }
2073
2074 {
2075 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2076 SDMolSupplier sdsup(fname);
2077
2078 // make sure getItemText() works if we haven't read at all from the
2079 // supplier:
2080 // (this was sf.net issue 2632960)
2081 molB = sdsup.getItemText(0);
2082 mol2 = MolBlockToMol(molB);
2083 TEST_ASSERT(mol2);
2084 mol1 = sdsup[0];
2085 TEST_ASSERT(mol1);
2086 TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2087 delete mol1;
2088 delete mol2;
2089
2090 molB = sdsup.getItemText(5);
2091 mol2 = MolBlockToMol(molB);
2092 TEST_ASSERT(mol2);
2093 TEST_ASSERT(mol2->getNumAtoms() == 16);
2094 mol1 = sdsup[5];
2095 TEST_ASSERT(mol1);
2096 TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2097 delete mol1;
2098 delete mol2;
2099 }
2100
2101 {
2102 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
2103 SmilesMolSupplier smisup(fname, ",", 1, 0, false);
2104 TEST_ASSERT(smisup.length() == 10);
2105
2106 molB = smisup.getItemText(0);
2107 TEST_ASSERT(molB == "1, CC1=CC(=O)C=CC1=O, 34.14");
2108 mol1 = smisup[0];
2109 TEST_ASSERT(mol1);
2110 delete mol1;
2111
2112 molB = smisup.getItemText(5);
2113 TEST_ASSERT(
2114 molB ==
2115 "6, OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br, 87.74");
2116 mol1 = smisup.next();
2117 TEST_ASSERT(mol1);
2118 TEST_ASSERT(mol1->getNumAtoms() == 20);
2119 delete mol1;
2120
2121 // make sure getItemText() works on the last molecule
2122 // (this was sf.net issue 1874882
2123 molB = smisup.getItemText(8);
2124 TEST_ASSERT(molB == "9, CC(=NO)C(C)=NO, 65.18");
2125 molB = smisup.getItemText(9);
2126 TEST_ASSERT(molB == "10, C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3, 0.00");
2127
2128 mol1 = smisup[0];
2129 TEST_ASSERT(mol1);
2130 smiles = MolToSmiles(*mol1, 1);
2131 TEST_ASSERT(smiles == "CC1=CC(=O)C=CC1=O");
2132 TEST_ASSERT(mol1->getNumAtoms() == 9);
2133 delete mol1;
2134 }
2135
2136 {
2137 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
2138 SmilesMolSupplier smisup(fname, ",", 1, 0, false);
2139
2140 // make sure getItemText() works if we haven't read at all from the
2141 // supplier:
2142 // (this was sf.net issue 2632960)
2143 molB = smisup.getItemText(0);
2144 TEST_ASSERT(molB == "1, CC1=CC(=O)C=CC1=O, 34.14");
2145
2146 molB = smisup.getItemText(5);
2147 TEST_ASSERT(
2148 molB ==
2149 "6, OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br, 87.74");
2150
2151 molB = smisup.getItemText(8);
2152 TEST_ASSERT(molB == "9, CC(=NO)C(C)=NO, 65.18");
2153 molB = smisup.getItemText(9);
2154 TEST_ASSERT(molB == "10, C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3, 0.00");
2155 }
2156
2157 {
2158 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
2159 SmilesMolSupplier smisup(fname, ",", 1, 0, false);
2160
2161 // make sure getItemText() flags EOF
2162 // (this was sf.net issue 3299878)
2163 molB = smisup.getItemText(0);
2164 TEST_ASSERT(molB == "1, CC1=CC(=O)C=CC1=O, 34.14");
2165
2166 ROMol *m = smisup[9];
2167 TEST_ASSERT(m);
2168 delete m;
2169 TEST_ASSERT(smisup.atEnd());
2170 molB = smisup.getItemText(9);
2171 TEST_ASSERT(molB == "10, C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3, 0.00");
2172 TEST_ASSERT(smisup.atEnd());
2173 }
2174
2175 {
2176 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
2177 TDTMolSupplier tdtsup(fname);
2178 // make sure getItemText() works if we haven't read at all from the
2179 // supplier:
2180 // (this was sf.net issue 2632960)
2181 molB = tdtsup.getItemText(0);
2182 TEST_ASSERT(molB != "");
2183 }
2184
2185 {
2186 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
2187 TDTMolSupplier tdtsup(fname);
2188 TEST_ASSERT(tdtsup.length() == 10);
2189
2190 molB = tdtsup.getItemText(0);
2191 TEST_ASSERT(molB != "");
2192
2193 mol1 = tdtsup[0];
2194 TEST_ASSERT(mol1);
2195 smiles = MolToSmiles(*mol1, 1);
2196 TEST_ASSERT(smiles == "Cc1nnc(N)nc1C");
2197 TEST_ASSERT(mol1->getNumAtoms() == 9);
2198 delete mol1;
2199
2200 // make sure getItemText doesn't screw up next()
2201 molB = tdtsup.getItemText(5);
2202 mol1 = tdtsup.next();
2203 TEST_ASSERT(mol1);
2204 TEST_ASSERT(mol1->getNumAtoms() == 9);
2205 smiles = MolToSmiles(*mol1, 1);
2206 TEST_ASSERT(smiles == "Cc1n[nH]c(=O)nc1N");
2207 delete mol1;
2208
2209 // make sure getItemText() works on the last molecule
2210 // (this was sf.net issue 1874882
2211 molB = tdtsup.getItemText(9);
2212 TEST_ASSERT(molB != "");
2213 TEST_ASSERT(molB.substr(0, 12) == "$SMI<Cc1n[nH");
2214 }
2215
2216 {
2217 fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
2218 TDTMolSupplier tdtsup(fname);
2219 TEST_ASSERT(tdtsup.length() == 10);
2220
2221 ROMol *mol = tdtsup[9];
2222 TEST_ASSERT(mol);
2223 delete mol;
2224 TEST_ASSERT(tdtsup.atEnd());
2225
2226 // (this was sf.net issue 3299878
2227 molB = tdtsup.getItemText(9);
2228 TEST_ASSERT(molB != "");
2229 TEST_ASSERT(molB.substr(0, 12) == "$SMI<Cc1n[nH");
2230 TEST_ASSERT(tdtsup.atEnd());
2231 }
2232 }
2233
testForwardSDSupplier()2234 int testForwardSDSupplier() {
2235 std::string rdbase = getenv("RDBASE");
2236 std::string fname =
2237 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2238 std::string fname2 =
2239 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf.gz";
2240
2241 {
2242 std::ifstream strm(fname.c_str());
2243 ForwardSDMolSupplier sdsup(&strm, false);
2244 unsigned int i = 0;
2245 while (!sdsup.atEnd()) {
2246 ROMol *nmol = sdsup.next();
2247 TEST_ASSERT(nmol || sdsup.atEnd());
2248 if (nmol) {
2249 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
2250 TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
2251 delete nmol;
2252 i++;
2253 }
2254 }
2255 TEST_ASSERT(i == 16);
2256 }
2257 #ifdef RDK_USE_BOOST_IOSTREAMS
2258 // make sure the boost::iostreams are working
2259 {
2260 io::filtering_istream strm;
2261 strm.push(io::file_source(fname));
2262
2263 unsigned int i = 0;
2264 while (!strm.eof()) {
2265 std::string line;
2266 std::getline(strm, line);
2267 if (!strm.eof()) {
2268 ++i;
2269 }
2270 if (i > 1000) {
2271 break;
2272 }
2273 }
2274 TEST_ASSERT(i == 998);
2275 }
2276 {
2277 gzstream strm(fname2);
2278 unsigned int i = 0;
2279 while (!strm.eof()) {
2280 std::string line;
2281 std::getline(strm, line);
2282 if (!strm.eof()) {
2283 ++i;
2284 }
2285 if (i > 1000) {
2286 break;
2287 }
2288 }
2289 TEST_ASSERT(i == 997);
2290 }
2291 // looks good, now do a supplier:
2292 {
2293 gzstream strm(fname2);
2294
2295 ForwardSDMolSupplier sdsup(&strm, false);
2296 unsigned int i = 0;
2297 while (!sdsup.atEnd()) {
2298 ROMol *nmol = sdsup.next();
2299 if (nmol) {
2300 TEST_ASSERT(nmol->hasProp(common_properties::_Name));
2301 TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
2302 delete nmol;
2303 i++;
2304 }
2305 }
2306 TEST_ASSERT(i == 16);
2307 }
2308 #endif
2309
2310 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
2311 // Now test that Maestro parsing of gz files works
2312 std::string maefname =
2313 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.mae";
2314 std::string maefname2 =
2315 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.maegz";
2316 {
2317 io::filtering_istream strm;
2318 strm.push(io::file_source(maefname));
2319
2320 unsigned int i = 0;
2321 while (!strm.eof()) {
2322 std::string line;
2323 std::getline(strm, line);
2324 if (!strm.eof()) {
2325 ++i;
2326 }
2327 if (i > 1700) {
2328 break;
2329 }
2330 }
2331 TEST_ASSERT(i == 1663);
2332 }
2333 {
2334 gzstream strm(maefname2);
2335
2336 unsigned int i = 0;
2337 while (!strm.eof()) {
2338 std::string line;
2339 std::getline(strm, line);
2340 if (!strm.eof()) {
2341 ++i;
2342 }
2343 if (i > 1700) {
2344 break;
2345 }
2346 }
2347 TEST_ASSERT(i == 1663);
2348 }
2349 // looks good, now do a supplier:
2350 {
2351 auto *strm = new gzstream(maefname2);
2352
2353 MaeMolSupplier maesup(strm);
2354 unsigned int i = 0;
2355 std::shared_ptr<ROMol> nmol;
2356 while (!maesup.atEnd()) {
2357 nmol.reset(maesup.next());
2358 if (nmol != nullptr) {
2359 i++;
2360 }
2361 }
2362 TEST_ASSERT(i == 16);
2363 }
2364 #endif // RDK_BUILD_MAEPARSER_SUPPORT
2365
2366 return 1;
2367 }
2368
testMissingCRSDSupplier()2369 void testMissingCRSDSupplier() {
2370 std::string rdbase = getenv("RDBASE");
2371 std::string infile =
2372 rdbase + "/Code/GraphMol/FileParsers/test_data/missingCR.sdf";
2373 SDMolSupplier reader(infile);
2374 auto *mol = reader.next();
2375 delete mol;
2376 TEST_ASSERT(reader.atEnd());
2377 }
2378
testIssue3482695()2379 void testIssue3482695() {
2380 std::string rdbase = getenv("RDBASE");
2381 std::string infile =
2382 rdbase + "/Code/GraphMol/FileParsers/test_data/Issue3482695.sdf";
2383 SDMolSupplier reader(infile);
2384 ROMol *nmol = reader.next();
2385 TEST_ASSERT(nmol);
2386 TEST_ASSERT(nmol->getNumAtoms() == 0);
2387 TEST_ASSERT(nmol->hasProp("test"));
2388 delete nmol;
2389 }
2390
testIssue3525673()2391 void testIssue3525673() {
2392 std::string rdbase = getenv("RDBASE");
2393 std::string infile =
2394 rdbase + "/Code/GraphMol/FileParsers/test_data/Issue3525673.sdf";
2395 std::ifstream ins(infile.c_str());
2396 ForwardSDMolSupplier reader(&ins, false);
2397 ROMol *nmol;
2398
2399 nmol = reader.next();
2400 TEST_ASSERT(nmol);
2401 delete nmol;
2402
2403 nmol = reader.next();
2404 TEST_ASSERT(nmol);
2405 TEST_ASSERT(nmol->getNumAtoms() == 37);
2406 delete nmol;
2407
2408 nmol = reader.next();
2409 TEST_ASSERT(nmol);
2410 delete nmol;
2411
2412 nmol = reader.next();
2413 TEST_ASSERT(nmol);
2414 delete nmol;
2415
2416 nmol = reader.next();
2417 TEST_ASSERT(nmol);
2418 TEST_ASSERT(nmol->getNumAtoms() == 58);
2419 delete nmol;
2420
2421 nmol = reader.next();
2422 TEST_ASSERT(nmol);
2423 delete nmol;
2424
2425 nmol = reader.next();
2426 TEST_ASSERT(!nmol); // broken due to 'foo' in counts line!
2427
2428 nmol = reader.next();
2429 TEST_ASSERT(nmol);
2430 TEST_ASSERT(nmol->getNumAtoms() == 58);
2431 delete nmol;
2432
2433 nmol = reader.next();
2434 TEST_ASSERT(nmol);
2435 delete nmol;
2436 }
2437
testBlankLinesInProps()2438 void testBlankLinesInProps() {
2439 std::string rdbase = getenv("RDBASE");
2440 std::string infile =
2441 rdbase + "/Code/GraphMol/FileParsers/test_data/BlankPropLines.sdf";
2442 std::ifstream ins(infile.c_str());
2443 ForwardSDMolSupplier reader(&ins, false);
2444 ROMol *nmol;
2445 std::string pval;
2446
2447 nmol = reader.next();
2448 TEST_ASSERT(nmol);
2449 TEST_ASSERT(nmol->getNumAtoms() == 19);
2450 TEST_ASSERT(nmol->hasProp("MultiLineProperty1"));
2451 nmol->getProp("MultiLineProperty1", pval);
2452 TEST_ASSERT(pval == "foo\nbar\n \nbaz");
2453 TEST_ASSERT(nmol->hasProp("MultiLineProperty2"));
2454 TEST_ASSERT(!(nmol->hasProp("fooprop")));
2455 nmol->getProp("MultiLineProperty2", pval);
2456 TEST_ASSERT(pval == "foo\n> <fooprop>\nbaz\n ");
2457 delete nmol;
2458 }
2459
testSkipLines()2460 void testSkipLines() {
2461 std::string rdbase = getenv("RDBASE");
2462 std::string infile =
2463 rdbase + "/Code/GraphMol/FileParsers/test_data/SkipLines.sdf";
2464 std::ifstream ins(infile.c_str());
2465 ForwardSDMolSupplier reader(&ins, false);
2466 ROMol *nmol;
2467 std::string pval;
2468
2469 nmol = reader.next();
2470 TEST_ASSERT(nmol);
2471 TEST_ASSERT(nmol->getNumAtoms() == 1);
2472 TEST_ASSERT(nmol->hasProp("prop1"));
2473 delete nmol;
2474 }
2475
testGitHub23()2476 void testGitHub23() {
2477 std::string rdbase = getenv("RDBASE");
2478 std::string ofile =
2479 rdbase + "/Code/GraphMol/FileParsers/test_data/blah_molsupplier.sdf";
2480 auto *writer = new SDWriter(ofile);
2481
2482 ROMol *mol = SmilesToMol("CCCC");
2483 INT_VECT iv;
2484 iv.push_back(1);
2485 iv.push_back(2);
2486 mol->setProp("pval", iv);
2487 writer->write(*mol);
2488 delete mol;
2489
2490 writer->close();
2491 delete writer;
2492 }
2493
testGitHub88()2494 void testGitHub88() {
2495 std::string rdbase = getenv("RDBASE");
2496 std::string infile =
2497 rdbase + "/Code/GraphMol/FileParsers/test_data/github88.v3k.sdf";
2498 std::ifstream ins(infile.c_str());
2499 ForwardSDMolSupplier reader(&ins, false);
2500 ROMol *nmol;
2501
2502 nmol = reader.next();
2503 TEST_ASSERT(nmol);
2504 TEST_ASSERT(nmol->getNumAtoms() == 8);
2505 TEST_ASSERT(nmol->hasProp("prop1"));
2506 std::string pval;
2507 nmol->getProp("prop1", pval);
2508 TEST_ASSERT(pval == "4");
2509 delete nmol;
2510 }
2511
testGitHub2285()2512 void testGitHub2285() {
2513 std::string rdbase = getenv("RDBASE");
2514 std::string fname =
2515 rdbase + "/Code/GraphMol/FileParsers/test_data/github2285.sdf";
2516
2517 std::vector<std::string> smiles;
2518 {
2519 SDMolSupplier sdsup(fname);
2520 while (!sdsup.atEnd()) {
2521 ROMol *nmol = sdsup.next();
2522 TEST_ASSERT(nmol);
2523 smiles.push_back(MolToSmiles(*nmol));
2524 delete nmol;
2525 }
2526 }
2527 {
2528 SDMolSupplier sdsup(fname, true, false);
2529 int i = 0;
2530 while (!sdsup.atEnd()) {
2531 ROMol *nmol = sdsup.next();
2532 TEST_ASSERT(nmol);
2533 ROMol *m = MolOps::removeHs(*nmol);
2534 TEST_ASSERT(MolToSmiles(*m) == smiles[i++]);
2535 delete nmol;
2536 delete m;
2537 }
2538 TEST_ASSERT(i > 0);
2539 }
2540 }
2541
testGitHub2479()2542 void testGitHub2479() {
2543 std::string smiles1 = R"DATA(smiles id
2544 c1ccccc duff
2545 c1ccccc1 ok
2546 C(C garbage
2547 C1CC1 ok2
2548 CC(C)(C)(C)C duff2
2549 )DATA";
2550 {
2551 SmilesMolSupplier suppl;
2552 suppl.setData(smiles1);
2553 unsigned int cnt = 0;
2554 while (!suppl.atEnd()) {
2555 std::unique_ptr<ROMol> mol(suppl.next());
2556 if (cnt % 2) {
2557 TEST_ASSERT(mol);
2558 }
2559 ++cnt;
2560 }
2561 TEST_ASSERT(cnt == 5);
2562 }
2563
2564 std::string sdf1 = R"SDF(
2565 Mrv1810 06051911332D
2566
2567 3 2 0 0 0 0 999 V2000
2568 -13.3985 4.9850 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2569 -12.7066 5.4343 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2570 -12.0654 4.9151 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2571 1 2 1 0 0 0 0
2572 2 3 1 0 0 0 0
2573 M END
2574 $$$$
2575
2576 Mrv1810 06051911332D
2577
2578 3 2 0 0 0 0 999 V2000
2579 -10.3083 4.8496 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2580 -9.6408 5.3345 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0
2581 -9.0277 4.7825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2582 1 2 1 0 0 0 0
2583 2 3 1 0 0 0 0
2584 M END
2585 $$$$
2586 )SDF";
2587 {
2588 std::stringstream iss(sdf1);
2589 SDMolSupplier suppl(&iss, false);
2590 std::unique_ptr<ROMol> mol1(suppl.next());
2591 TEST_ASSERT(mol1);
2592 std::unique_ptr<ROMol> mol2(suppl.next());
2593 TEST_ASSERT(!mol2);
2594 TEST_ASSERT(suppl.atEnd());
2595 }
2596 {
2597 std::stringstream iss(sdf1);
2598 ForwardSDMolSupplier suppl(&iss, false);
2599 std::unique_ptr<ROMol> mol1(suppl.next());
2600 TEST_ASSERT(mol1);
2601 std::unique_ptr<ROMol> mol2(suppl.next());
2602 TEST_ASSERT(!mol2);
2603 TEST_ASSERT(!suppl.atEnd());
2604 TEST_ASSERT(!suppl.getEOFHitOnRead());
2605 std::unique_ptr<ROMol> mol3(suppl.next());
2606 TEST_ASSERT(!mol3);
2607 TEST_ASSERT(suppl.atEnd());
2608 TEST_ASSERT(suppl.getEOFHitOnRead());
2609 }
2610
2611 // truncated file1
2612 std::string sdf2 = R"SDF(
2613 Mrv1810 06051911332D
2614
2615 3 2 0 0 0 0 999 V2000
2616 -13.3985 4.9850 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2617 -12.7066 5.4343 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2618 -12.0654 4.9151 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2619 1 2 1 0 0 0 0
2620 2 3 1 0 0 0 0
2621 M END
2622 $$$$
2623
2624 Mrv1810 06051911332D
2625
2626 3 2 0 0 0 0 999 V2000
2627 -10.3083 4.8496 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2628 -9.6408 5.3345 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0
2629 -9.0277 4.7825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2630 1 2 1 0 0 0 0
2631 2 3 1 0 0 0 0
2632 M END
2633 $$$$
2634
2635 Mrv1810 06051911332D
2636
2637 3 2 0 0 0 0 999 V2000
2638 -10.3083 4.8496 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2639 -9.6
2640 )SDF";
2641 {
2642 std::stringstream iss(sdf2);
2643 SDMolSupplier suppl(&iss, false);
2644 std::unique_ptr<ROMol> mol1(suppl.next());
2645 TEST_ASSERT(mol1);
2646 std::unique_ptr<ROMol> mol2(suppl.next());
2647 TEST_ASSERT(!mol2);
2648 std::unique_ptr<ROMol> mol3(suppl.next());
2649 TEST_ASSERT(!mol3);
2650 TEST_ASSERT(suppl.atEnd());
2651 }
2652 {
2653 std::stringstream iss(sdf2);
2654 ForwardSDMolSupplier suppl(&iss, false);
2655 std::unique_ptr<ROMol> mol1(suppl.next());
2656 TEST_ASSERT(mol1);
2657 std::unique_ptr<ROMol> mol2(suppl.next());
2658 TEST_ASSERT(!mol2);
2659 TEST_ASSERT(!suppl.atEnd());
2660 TEST_ASSERT(!suppl.getEOFHitOnRead());
2661 std::unique_ptr<ROMol> mol3(suppl.next());
2662 TEST_ASSERT(!mol3);
2663 TEST_ASSERT(suppl.atEnd());
2664 TEST_ASSERT(!suppl.getEOFHitOnRead());
2665 }
2666 // truncated file2
2667 std::string sdf3 = R"SDF(
2668 Mrv1810 06051911332D
2669
2670 3 2 0 0 0 0 999 V2000
2671 -13.3985 4.9850 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2672 -12.7066 5.4343 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2673 -12.0654 4.9151 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2674 1 2 1 0 0 0 0
2675 2 3 1 0 0 0 0
2676 M END
2677 > <pval> (1)
2678 [1,2,]
2679
2680 $$$$
2681
2682 Mrv1810 06051911332D
2683
2684 3 2 0 0 0 0 999 V2000
2685 -10.3083 4.8496 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2686 -9.6408 5.3345 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2687 -9.0277 4.7825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2688 1 2 1 0 0 0 0
2689 2 3 1 0 0 0 0
2690 M END
2691 > <pval> (1)
2692 [1,2,]
2693 )SDF";
2694 {
2695 std::stringstream iss(sdf3);
2696 SDMolSupplier suppl(&iss, false);
2697 std::unique_ptr<ROMol> mol1(suppl.next());
2698 TEST_ASSERT(mol1);
2699 std::unique_ptr<ROMol> mol2(suppl.next());
2700 TEST_ASSERT(mol2);
2701 TEST_ASSERT(suppl.atEnd());
2702 }
2703 {
2704 std::stringstream iss(sdf3);
2705 ForwardSDMolSupplier suppl(&iss, false);
2706 std::unique_ptr<ROMol> mol1(suppl.next());
2707 TEST_ASSERT(mol1);
2708 std::unique_ptr<ROMol> mol2(suppl.next());
2709 TEST_ASSERT(mol2);
2710 TEST_ASSERT(suppl.atEnd());
2711 }
2712 }
2713
2714 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
testGitHub2881()2715 void testGitHub2881() {
2716 std::string data = R"DATA(f_m_ct {
2717 s_m_title
2718 s_m_entry_id
2719 s_m_entry_name
2720 s_m_Source_Path
2721 s_m_Source_File
2722 i_m_Source_File_Index
2723 s_st_Chirality_1
2724 s_st_Chirality_2
2725 s_m_subgroup_title
2726 s_m_subgroupid
2727 b_m_subgroup_collapsed
2728 i_m_ct_format
2729 :::
2730 "Untitled Document-4"
2731 17
2732 newTemplates2.1
2733 /Users/nicola/schrodinger/coordgen_standalone
2734 templates.mae
2735 17
2736 3_S_4_6_2
2737 7_S_8_9_6_10
2738 templates->templates->templates
2739 templates->templates1->templates11
2740 0
2741 2
2742 m_depend[2] {
2743 # First column is dependency index #
2744 i_m_depend_dependency
2745 s_m_depend_property
2746 :::
2747 1 10 s_st_Chirality_1
2748 2 10 s_st_Chirality_2
2749 :::
2750 }
2751 m_atom[15] {
2752 # First column is atom index #
2753 i_m_mmod_type
2754 r_m_x_coord
2755 r_m_y_coord
2756 r_m_z_coord
2757 i_m_residue_number
2758 i_m_color
2759 i_m_atomic_number
2760 s_m_color_rgb
2761 s_m_atom_name
2762 :::
2763 1 5 1.186400 1.035900 0.000000 900 2 6 A0A0A0 C1
2764 2 5 0.370300 1.157000 0.000000 900 2 6 A0A0A0 C2
2765 3 4 -0.326500 0.715300 0.000000 900 2 6 A0A0A0 C3
2766 4 5 0.085100 0.000400 0.000000 900 2 6 A0A0A0 C4
2767 5 26 -0.328300 -0.713600 0.000000 900 43 7 5757FF N5
2768 6 5 -1.151500 0.716400 0.000000 900 2 6 A0A0A0 C6
2769 7 5 -1.564900 0.002400 0.000000 900 2 6 A0A0A0 C7
2770 8 5 -1.153300 -0.712600 0.000000 900 2 6 A0A0A0 C9
2771 9 2 1.724800 0.410800 0.000000 900 2 6 A0A0A0 C12
2772 10 2 1.723800 -0.414200 0.000000 900 2 6 A0A0A0 C13
2773 11 5 1.183800 -1.037900 0.000000 900 2 6 A0A0A0 C14
2774 12 5 0.367400 -1.157000 0.000000 900 2 6 A0A0A0 C15
2775 13 7 2.508100 -0.670100 0.000000 900 2 6 A0A0A0 C16
2776 14 7 2.993800 -0.003300 0.000000 900 2 6 A0A0A0 C17
2777 15 29 2.509700 0.664800 0.000000 900 43 7 5757FF N18
2778 :::
2779 }
2780 m_bond[17] {
2781 # First column is bond index #
2782 i_m_from
2783 i_m_to
2784 i_m_order
2785 :::
2786 1 1 2 1
2787 2 1 9 1
2788 3 2 3 1
2789 4 3 4 1
2790 5 3 6 1
2791 6 4 5 1
2792 7 5 8 1
2793 8 5 12 1
2794 9 6 7 1
2795 10 7 8 1
2796 11 9 10 2
2797 12 9 15 1
2798 13 10 11 1
2799 14 10 13 1
2800 15 11 12 1
2801 16 13 14 2
2802 17 14 15 1
2803 :::
2804 }
2805 }
2806 )DATA";
2807 {
2808 auto *iss = new std::istringstream(data);
2809 bool sanitize = false;
2810 bool takeOwnership = true;
2811 MaeMolSupplier suppl(iss, takeOwnership, sanitize);
2812 ROMol *mol = nullptr;
2813 try {
2814 mol = suppl.next();
2815 } catch (const Invar::Invariant &) {
2816 }
2817 TEST_ASSERT(!mol);
2818 }
2819 }
2820 #else
testGitHub2881()2821 void testGitHub2881() {}
2822 #endif
2823
testGitHub3517()2824 void testGitHub3517() {
2825 std::string rdbase = getenv("RDBASE");
2826 std::string fname =
2827 rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2828
2829 SDMolSupplier sdsup(fname);
2830 TEST_ASSERT(!sdsup.atEnd());
2831 size_t l = sdsup.length();
2832 TEST_ASSERT(l > 0);
2833 TEST_ASSERT(!sdsup.atEnd());
2834 }
2835
main()2836 int main() {
2837 RDLog::InitLogs();
2838
2839 #if 1
2840 BOOST_LOG(rdErrorLog) << "\n-----------------------------------------\n";
2841 testMolSup();
2842 BOOST_LOG(rdErrorLog) << "Finished: testMolSup()\n";
2843 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2844
2845 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2846 testRandMolSup();
2847 BOOST_LOG(rdErrorLog) << "Finished: testRandMolSup()\n";
2848 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2849
2850 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2851 testSmilesSup();
2852 BOOST_LOG(rdErrorLog) << "Finished: testSmilesSup()\n";
2853 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2854
2855 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2856 testSmilesSupFromText();
2857 BOOST_LOG(rdErrorLog) << "Finished: testSmilesSupFromText()\n";
2858 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2859
2860 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2861 testSmilesWriter();
2862 BOOST_LOG(rdErrorLog) << "Finished: testSmilesWriter()\n";
2863 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2864
2865 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2866 testSDWriter();
2867 BOOST_LOG(rdErrorLog) << "Finished: testSDWriter()\n";
2868 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2869
2870 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2871 testSDSupplierEnding();
2872 BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierEnding()\n";
2873 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2874
2875 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2876 testSuppliersEmptyFile();
2877 BOOST_LOG(rdErrorLog) << "Finished: testSuppliersEmptyFile()\n";
2878 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2879
2880 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2881 testCisTrans();
2882 BOOST_LOG(rdErrorLog) << "Finished: testCisTrans()\n";
2883 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2884
2885 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2886 testStereoRound();
2887 BOOST_LOG(rdErrorLog) << "Finished: testStereoRound()\n";
2888 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2889
2890 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2891 testIssue226();
2892 BOOST_LOG(rdErrorLog) << "Finished: testIssue226()\n";
2893 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2894
2895 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2896 testTDTSupplier1();
2897 BOOST_LOG(rdErrorLog) << "Finished: testTDTSupplier1()\n";
2898 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2899
2900 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2901 testTDTSupplier2();
2902 BOOST_LOG(rdErrorLog) << "Finished: testTDTSupplier2()\n";
2903 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2904
2905 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2906 testTDTSupplier3();
2907 BOOST_LOG(rdErrorLog) << "Finished: testTDTSupplier3()\n";
2908 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2909
2910 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2911 testSDSupplierFromText();
2912 BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierFromText()\n";
2913 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2914
2915 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2916 testSDSupplierStrLax1();
2917 BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierStrLax1()\n";
2918 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2919
2920 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2921 testSDSupplierStrLax2();
2922 BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierStrLax2()\n";
2923 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2924
2925 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2926 testSDSupplierFromTextStrLax1();
2927 BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierFromTextStrLax1()\n";
2928 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2929
2930 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2931 testSDSupplierFromTextStrLax2();
2932 BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierFromTextStrLax2()\n";
2933 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2934
2935 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2936 testIssue265();
2937 BOOST_LOG(rdErrorLog) << "Finished: testIssue265()\n";
2938 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2939
2940 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2941 testSDErrorHandling();
2942 BOOST_LOG(rdErrorLog) << "Finished: testSDErrorHandling()\n";
2943 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2944
2945 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2946 testIssue381();
2947 BOOST_LOG(rdErrorLog) << "Finished: testIssue381()\n";
2948 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2949
2950 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2951 testSetStreamIndices();
2952 BOOST_LOG(rdErrorLog) << "Finished: testSetStreamIndices()\n";
2953 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2954
2955 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2956 testMixIterAndRandom();
2957 BOOST_LOG(rdErrorLog) << "Finished: testMixIterAndRandom()\n";
2958 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2959
2960 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2961 testRemoveHs();
2962 BOOST_LOG(rdErrorLog) << "Finished: testRemoveHs()\n";
2963 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2964
2965 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2966 testGetItemText();
2967 BOOST_LOG(rdErrorLog) << "Finished: testGetItemText()\n";
2968 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2969
2970 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2971 testForwardSDSupplier();
2972 BOOST_LOG(rdErrorLog) << "Finished: testForwardSDSupplier()\n";
2973 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2974
2975 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2976 testMissingCRSDSupplier();
2977 BOOST_LOG(rdErrorLog) << "Finished: testMissingCRSDSupplier()\n";
2978 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2979
2980 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2981 testIssue3482695();
2982 BOOST_LOG(rdErrorLog) << "Finished: testIssue3482695()\n";
2983 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2984 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2985 testIssue3525673();
2986 BOOST_LOG(rdErrorLog) << "Finished: testIssue3525673()\n";
2987 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2988
2989 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2990 testBlankLinesInProps();
2991 BOOST_LOG(rdErrorLog) << "Finished: testBlankLinesInProps()\n";
2992 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2993
2994 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2995 testSkipLines();
2996 BOOST_LOG(rdErrorLog) << "Finished: testSkipLines()\n";
2997 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2998
2999 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3000 testGitHub23();
3001 BOOST_LOG(rdErrorLog) << "Finished: testGitHub23()\n";
3002 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3003
3004 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3005 testGitHub88();
3006 BOOST_LOG(rdErrorLog) << "Finished: testGitHub88()\n";
3007 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3008
3009 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3010 testGitHub2285();
3011 BOOST_LOG(rdErrorLog) << "Finished: testGitHub2285()\n";
3012 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3013 #endif
3014
3015 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3016 testGitHub2479();
3017 BOOST_LOG(rdErrorLog) << "Finished: testGitHub2479()\n";
3018 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3019
3020 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3021 testGitHub2881();
3022 BOOST_LOG(rdErrorLog) << "Finished: testGitHub2881()\n";
3023 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3024
3025 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3026 testGitHub3517();
3027 BOOST_LOG(rdErrorLog) << "Finished: testGitHub3517()\n";
3028 BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3029
3030 return 0;
3031 }
3032