1 //
2 //   Copyright (C) 2002-2019 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <RDGeneral/test.h>
11 #include <GraphMol/RDKitBase.h>
12 #include <string>
13 #include <iostream>
14 #include <fstream>
15 #include <map>
16 #include <memory>
17 
18 #include "MolSupplier.h"
19 #include "MolWriters.h"
20 #include "FileParsers.h"
21 #include "FileParserUtils.h"
22 #include <RDGeneral/FileParseException.h>
23 #include <RDGeneral/BadFileException.h>
24 #include <RDGeneral/RDLog.h>
25 #include <RDStreams/streams.h>
26 #include <GraphMol/MonomerInfo.h>
27 #include <GraphMol/SmilesParse/SmilesWrite.h>
28 #include <GraphMol/SmilesParse/SmilesParse.h>
29 #include <GraphMol/Depictor/RDDepictor.h>
30 
31 #include <boost/iostreams/device/file.hpp>
32 #include <boost/iostreams/filtering_stream.hpp>
33 namespace io = boost::iostreams;
34 
35 using namespace RDKit;
36 
testMolSup()37 int testMolSup() {
38   std::string rdbase = getenv("RDBASE");
39   std::string fname =
40       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
41 
42   {
43     SDMolSupplier sdsup(fname);
44     unsigned int i = 0;
45     while (!sdsup.atEnd()) {
46       ROMol *nmol = sdsup.next();
47       if (nmol) {
48         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
49         TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
50         delete nmol;
51       }
52       i++;
53     }
54     TEST_ASSERT(i == 16);
55   }
56   {
57     SDMolSupplier sdsup(fname);
58     for (unsigned int i = 0; i < 16; ++i) {
59       ROMol *nmol = sdsup.next();
60       if (nmol) {
61         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
62         TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
63         delete nmol;
64       }
65     }
66     // test issue 3524949:
67     TEST_ASSERT(sdsup.atEnd());
68     bool ok = false;
69     try {
70       sdsup.next();
71     } catch (FileParseException &) {
72       ok = true;
73     }
74     TEST_ASSERT(ok);
75   }
76   {
77     std::ifstream strm(fname.c_str());
78     SDMolSupplier sdsup(&strm, false);
79     unsigned int i = 0;
80     while (!sdsup.atEnd()) {
81       ROMol *nmol = sdsup.next();
82       if (nmol) {
83         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
84         TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
85         delete nmol;
86       }
87       i++;
88     }
89     TEST_ASSERT(i == 16);
90   }
91   {
92     auto *strm = new std::ifstream(fname.c_str());
93     SDMolSupplier sdsup(strm, true);
94     unsigned int i = 0;
95     while (!sdsup.atEnd()) {
96       ROMol *nmol = sdsup.next();
97       if (nmol) {
98         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
99         TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
100         delete nmol;
101       }
102       i++;
103     }
104     TEST_ASSERT(i == 16);
105   }
106 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
107   {  // Test reading properties
108     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/props_test.mae";
109 
110     MaeMolSupplier maesup(fname);
111     std::unique_ptr<ROMol> nmol(maesup.next());
112     TEST_ASSERT(nmol);
113 
114     // Test mol properties
115     TEST_ASSERT(nmol->hasProp(common_properties::_Name));
116     TEST_ASSERT(nmol->hasProp("b_sd_chiral_flag"));
117     TEST_ASSERT(nmol->getProp<bool>("b_sd_chiral_flag") == false);
118     TEST_ASSERT(nmol->hasProp("i_sd_NSC"));
119     TEST_ASSERT(nmol->getProp<int>("i_sd_NSC") == 48);
120     TEST_ASSERT(nmol->hasProp("s_m_entry_name"));
121     TEST_ASSERT(nmol->getProp<std::string>("s_m_entry_name") ==
122                 "NCI_aids_few.1");
123     TEST_ASSERT(nmol->hasProp("r_f3d_dummy"));
124     TEST_ASSERT(std::abs(nmol->getProp<double>("r_f3d_dummy") - 42.123) <
125                 0.0001);
126 
127     // Test atom properties
128     TEST_ASSERT(nmol->getNumAtoms() == 19);
129     for (int i = 0; i < 19; ++i) {
130       const auto *atom = nmol->getAtomWithIdx(i);
131 
132       // The integer property is present for all atoms
133       TEST_ASSERT(atom->hasProp("i_m_minimize_atom_index"));
134       TEST_ASSERT(atom->getProp<int>("i_m_minimize_atom_index") == 1 + i);
135 
136       // The bool property is only defined for i < 10
137       if (i < 10) {
138         TEST_ASSERT(atom->hasProp("b_m_dummy"));
139         TEST_ASSERT(atom->getProp<bool>("b_m_dummy") ==
140                     static_cast<bool>(i % 2));
141       } else {
142         TEST_ASSERT(!atom->hasProp("b_m_dummy"));
143       }
144 
145       // The real property is only defined for i >= 10
146       if (i >= 10) {
147         TEST_ASSERT(atom->hasProp("r_f3d_dummy"));
148         TEST_ASSERT(std::abs(atom->getProp<double>("r_f3d_dummy") -
149                              (19.1 - i)) < 0.0001);
150       } else {
151         TEST_ASSERT(!atom->hasProp("r_f3d_dummy"));
152       }
153 
154       // All atoms have the string prop
155       TEST_ASSERT(atom->hasProp("s_m_dummy"));
156       TEST_ASSERT(atom->getProp<std::string>("s_m_dummy") ==
157                   std::to_string(19 - i));
158     }
159 
160     TEST_ASSERT(maesup.atEnd());
161   }
162   {  // Test parsing stereo properties. Mol is 2D and has stereo labels.
163     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/stereochem.mae";
164     MaeMolSupplier maesup(fname);
165 
166     {  // Stereo bonds. These get overwritten by the double bond detection.
167       std::unique_ptr<ROMol> nmol(maesup.next());
168       TEST_ASSERT(nmol);
169       {
170         Bond *bnd = nmol->getBondWithIdx(1);
171         TEST_ASSERT(bnd);
172         TEST_ASSERT(bnd->getStereoAtoms() == INT_VECT({0, 3}));
173         TEST_ASSERT(bnd->getStereo() == Bond::STEREOTRANS);
174       }
175       {
176         Bond *bnd = nmol->getBondWithIdx(3);
177         TEST_ASSERT(bnd);
178         TEST_ASSERT(bnd->getStereoAtoms() == INT_VECT({2, 5}));
179         TEST_ASSERT(bnd->getStereo() == Bond::STEREOCIS);
180       }
181     }
182     {  // Chiralities (these get CIP codes)
183       std::unique_ptr<ROMol> nmol(maesup.next());
184       TEST_ASSERT(nmol);
185       {
186         Atom *at = nmol->getAtomWithIdx(1);
187         TEST_ASSERT(at);
188         TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
189         TEST_ASSERT(at->getProp<std::string>(common_properties::_CIPCode) ==
190                     "R");
191       }
192       {
193         Atom *at = nmol->getAtomWithIdx(3);
194         TEST_ASSERT(at);
195         TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW);
196         TEST_ASSERT(at->getProp<std::string>(common_properties::_CIPCode) ==
197                     "S");
198       }
199     }
200     {  // Pseudochiralities (no CIP codes)
201       std::unique_ptr<ROMol> nmol(maesup.next());
202       TEST_ASSERT(nmol);
203       {
204         Atom *at = nmol->getAtomWithIdx(2);
205         TEST_ASSERT(at);
206         TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW);
207         TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
208       }
209       {
210         Atom *at = nmol->getAtomWithIdx(5);
211         TEST_ASSERT(at);
212         TEST_ASSERT(at->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW);
213         TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
214       }
215     }
216     {  // intentionally bad chirality label, intended to
217       // make sure we can step over parse errors
218       std::unique_ptr<ROMol> nmol;
219       try {
220         nmol.reset(maesup.next());
221       } catch (const Invar::Invariant &) {
222         // just ignore this failure
223       }
224       TEST_ASSERT(!nmol);
225     }
226     {  // "Undefined" chirality label
227       std::unique_ptr<ROMol> nmol(maesup.next());
228       TEST_ASSERT(nmol);
229       {
230         Atom *at = nmol->getAtomWithIdx(2);
231         TEST_ASSERT(at);
232         TEST_ASSERT(at->getChiralTag() == Atom::CHI_UNSPECIFIED);
233         TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
234       }
235       {
236         Atom *at = nmol->getAtomWithIdx(5);
237         TEST_ASSERT(at);
238         TEST_ASSERT(at->getChiralTag() == Atom::CHI_UNSPECIFIED);
239         TEST_ASSERT(!at->hasProp(common_properties::_CIPCode));
240       }
241     }
242     TEST_ASSERT(maesup.atEnd());
243   }
244   {  // Test loop reading
245     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.mae";
246     MaeMolSupplier maesup(fname);
247     std::shared_ptr<ROMol> nmol;
248     for (unsigned int i = 0; i < 16; ++i) {
249       nmol.reset(maesup.next());
250       if (nmol) {
251         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
252         TEST_ASSERT(nmol->getNumAtoms() > 0);
253         if (i == 0) {
254           auto smiles = MolToSmiles(*nmol);
255           TEST_ASSERT(smiles ==
256                       "CCC1=[O+][Cu@]2([O+]=C(CC)CC(CC)=[O+]2)[O+]=C(CC)C1");
257         }
258       }
259     }
260     TEST_ASSERT(maesup.atEnd());
261     bool ok = false;
262     try {
263       maesup.next();
264     } catch (FileParseException &) {
265       ok = true;
266     }
267     TEST_ASSERT(ok);
268   }
269 
270   {
271     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/bad_ppty.mae";
272     const std::string err_msg_substr = "Bad format for property";
273 
274     bool ok = false;
275     std::unique_ptr<ROMol> mol;
276     MaeMolSupplier maesup(fname);
277 
278     // This is in excess: there are only 3 mols in the file, and the second one
279     // has an invalid property name, so it won't be read
280     for (unsigned int i = 0; i < 5; ++i) {
281       try {
282         mol.reset(maesup.next());
283       } catch (const FileParseException &e) {
284         const std::string err_msg(e.what());
285         TEST_ASSERT(i == 1);
286         TEST_ASSERT(err_msg.find(err_msg_substr) != std::string::npos);
287         ok = true;
288         break;
289       }
290       TEST_ASSERT(mol);
291       TEST_ASSERT(mol->hasProp(common_properties::_Name));
292       TEST_ASSERT(mol->getNumAtoms() == 1);
293       TEST_ASSERT(!maesup.atEnd());
294     }
295     TEST_ASSERT(!maesup.atEnd());
296     TEST_ASSERT(ok);
297   }
298 
299   {  // Test Maestro PDB property reading
300     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/1kv1.maegz";
301     auto *strm = new gzstream(fname);
302     MaeMolSupplier maesup(strm);
303 
304     std::shared_ptr<ROMol> nmol;
305     nmol.reset(maesup.next());
306     const Atom *atom = nmol->getAtomWithIdx(0);
307     auto *info = (AtomPDBResidueInfo *)(atom->getMonomerInfo());
308     TEST_ASSERT(info->getResidueName() == "ARG ");
309     TEST_ASSERT(info->getChainId() == "A");
310     TEST_ASSERT(info->getResidueNumber() == 5);
311   }
312 #endif  // RDK_BUILD_MAEPARSER_SUPPORT
313   return 1;
314 }
315 
testRandMolSup()316 void testRandMolSup() {
317   std::string rdbase = getenv("RDBASE");
318   std::string fname =
319       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
320   // std::string fname("../test_data/NCI_aids_few.sdf");
321   SDMolSupplier sdsup(fname);
322 
323   ROMol *tmol = sdsup[7];
324   delete tmol;
325 
326   CHECK_INVARIANT(sdsup.length() == 16, "");
327 
328   STR_VECT names;
329   names.push_back(std::string("48"));
330   names.push_back(std::string("128"));
331   names.push_back(std::string("164"));
332   names.push_back(std::string("180"));
333   names.push_back(std::string("192"));
334   names.push_back(std::string("210"));
335   names.push_back(std::string("213"));
336   names.push_back(std::string("229"));
337 
338   int i;
339   for (i = 0; i < 8; i++) {
340     ROMol *mol = sdsup[2 * i];
341     std::string mname;
342     mol->getProp(common_properties::_Name, mname);
343     CHECK_INVARIANT(mname == names[i], "");
344     delete mol;
345   }
346 
347   // get a random molecule
348   ROMol *mol = sdsup[5];
349   TEST_ASSERT(mol);
350   std::string mname;
351   mol->getProp(common_properties::_Name, mname);
352   delete mol;
353   CHECK_INVARIANT(mname == "170", "");
354 
355   // get the last molecule:
356   mol = sdsup[15];
357   TEST_ASSERT(mol);
358   delete mol;
359 
360   // and make sure we're at the end:
361   TEST_ASSERT(sdsup.atEnd());
362   // now make sure we can grab earlier mols (was sf.net issue 1904170):
363   mol = sdsup[0];
364   TEST_ASSERT(mol);
365   delete mol;
366 
367   // Issue 113: calling length before grabbing a molecule results in crashes:
368   SDMolSupplier sdsup2(fname);
369   CHECK_INVARIANT(sdsup2.length() == 16, "");
370 }
371 
testSmilesSup()372 void testSmilesSup() {
373   std::string mname;
374   std::string fname;
375   ROMol *mol;
376 
377   std::string rdbase = getenv("RDBASE");
378   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.2.csv";
379   {
380     SmilesMolSupplier nSup2(fname, ",", 1, 0, true);
381     TEST_ASSERT(nSup2.length() == 10);
382   }
383   {
384     SmilesMolSupplier nSup2(fname, ",", 1, 0, true);
385 
386     mol = nSup2[3];
387     TEST_ASSERT(!nSup2.atEnd())
388     TEST_ASSERT(nSup2.length() == 10);
389 
390     mol->getProp(common_properties::_Name, mname);
391     CHECK_INVARIANT(mname == "4", "");
392     mol->getProp("TPSA", mname);
393     CHECK_INVARIANT(mname == "82.78", "");
394     delete mol;
395 
396     mol = nSup2[9];
397     TEST_ASSERT(mol);
398     delete mol;
399     // now make sure we can grab earlier mols (was sf.net issue 1904170):
400     mol = nSup2[0];
401     TEST_ASSERT(mol);
402     delete mol;
403   }
404   {
405     std::ifstream strm(fname.c_str(), std::ios_base::binary);
406     SmilesMolSupplier nSup2(&strm, false, ",", 1, 0, true);
407 
408     mol = nSup2[3];
409     CHECK_INVARIANT(nSup2.length() == 10, "");
410 
411     mol->getProp(common_properties::_Name, mname);
412     CHECK_INVARIANT(mname == "4", "");
413     mol->getProp("TPSA", mname);
414     CHECK_INVARIANT(mname == "82.78", "");
415     delete mol;
416 
417     mol = nSup2[9];
418     TEST_ASSERT(mol);
419     delete mol;
420     // now make sure we can grab earlier mols (was sf.net issue 1904170):
421     mol = nSup2[0];
422     TEST_ASSERT(mol);
423     delete mol;
424   }
425 
426   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/first_200.tpsa.csv";
427   SmilesMolSupplier smiSup(fname, ",", 0, -1);
428 
429   mol = smiSup[16];
430 
431   mol->getProp("TPSA", mname);
432   CHECK_INVARIANT(mname == "46.25", "");
433   delete mol;
434 
435   mol = smiSup[8];
436   mol->getProp("TPSA", mname);
437   CHECK_INVARIANT(mname == "65.18", "");
438   delete mol;
439 
440   int len = smiSup.length();
441   CHECK_INVARIANT(len == 200, "");
442 
443   smiSup.reset();
444   int i = 0;
445   mol = smiSup.next();
446   while (1) {
447     std::string mname;
448     mol->getProp(common_properties::_Name, mname);
449     i++;
450     delete mol;
451     try {
452       mol = smiSup.next();
453     } catch (FileParseException &) {
454       break;
455     }
456   }
457 
458   CHECK_INVARIANT(i == 200, "");
459 
460   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
461   SmilesMolSupplier *nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
462 
463   // check the length before we read anything out...
464   //  this was a problem at one point (Issue 113)
465   CHECK_INVARIANT(nSup->length() == 10, "");
466   mol = (*nSup)[3];
467 
468   mol->getProp(common_properties::_Name, mname);
469   CHECK_INVARIANT(mname == "4", "");
470   mol->getProp("Column_2", mname);
471   CHECK_INVARIANT(mname == "82.78", "");
472 
473   delete nSup;
474   nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
475   unsigned int nRead = 0;
476   while (!nSup->atEnd()) {
477     delete mol;
478     mol = nSup->next();
479     TEST_ASSERT(mol);
480     nRead++;
481   }
482   TEST_ASSERT(nSup->length() == 10);
483   TEST_ASSERT(nRead == 10);
484 
485   delete nSup;
486   delete mol;
487 }
488 
testSmilesSupFromText()489 void testSmilesSupFromText() {
490   std::string mname;
491   std::string fname;
492   ROMol *mol;
493 
494   SmilesMolSupplier nSup2;
495   std::string text;
496   bool failed;
497   int nAts;
498 
499   // this was a delightful boundary condition:
500   BOOST_LOG(rdErrorLog)
501       << "------------------------------------------------------" << std::endl;
502   text =
503       "CC\n"
504       "CCC\n"
505       "CCOC\n"
506       "CCCCOC";
507   {
508     nSup2.setData(text, " ", 0, -1, false, true);
509     //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
510     mol = nSup2.next();
511     nAts = mol->getNumAtoms();
512     delete mol;
513     TEST_ASSERT(nAts == 2);
514 
515     mol = nSup2[3];
516     nAts = mol->getNumAtoms();
517     delete mol;
518     TEST_ASSERT(nAts == 6);
519     TEST_ASSERT(nSup2.length() == 4);
520 
521     failed = false;
522     try {
523       mol = nSup2[4];
524       delete mol;
525     } catch (FileParseException &) {
526       failed = true;
527     }
528     TEST_ASSERT(failed);
529     mol = nSup2[2];
530     nAts = mol->getNumAtoms();
531     TEST_ASSERT(nAts == 4);
532     TEST_ASSERT(mol->hasProp(common_properties::_Name));
533     mol->getProp(common_properties::_Name, mname);
534     TEST_ASSERT(mname == "2");
535     delete mol;
536   }
537   {
538     nSup2.setData(text, " ", 0, -1, false, true);
539     mol = nSup2[2];
540     TEST_ASSERT(mol);
541     nAts = mol->getNumAtoms();
542     TEST_ASSERT(nAts == 4);
543     TEST_ASSERT(mol->hasProp(common_properties::_Name));
544     mol->getProp(common_properties::_Name, mname);
545     TEST_ASSERT(mname == "2");
546     delete mol;
547 
548     mol = nSup2[3];
549     TEST_ASSERT(mol);
550     nAts = mol->getNumAtoms();
551     TEST_ASSERT(nAts == 6);
552     TEST_ASSERT(mol->hasProp(common_properties::_Name));
553     mol->getProp(common_properties::_Name, mname);
554     TEST_ASSERT(mname == "3");
555     delete mol;
556   }
557   {
558     nSup2.setData(text, " ", 0, -1, false, true);
559     mol = nSup2[3];
560     TEST_ASSERT(mol);
561     nAts = mol->getNumAtoms();
562     TEST_ASSERT(nAts == 6);
563     TEST_ASSERT(mol->hasProp(common_properties::_Name));
564     mol->getProp(common_properties::_Name, mname);
565     TEST_ASSERT(mname == "3");
566 
567     delete mol;
568     mol = nSup2[2];
569     TEST_ASSERT(mol);
570     nAts = mol->getNumAtoms();
571     TEST_ASSERT(nAts == 4);
572     TEST_ASSERT(mol->hasProp(common_properties::_Name));
573     mol->getProp(common_properties::_Name, mname);
574     TEST_ASSERT(mname == "2");
575     delete mol;
576   }
577   // --------------
578   // basics:
579   text =
580       "Id SMILES Column_2\n"
581       "mol-1 C 1.0\n"
582       "mol-2 CC 4.0\n"
583       "mol-3 CCC 9.0\n"
584       "mol-4 CCCC 16.0\n";
585 #if 1
586   nSup2.setData(text, " ", 1, 0, true, true);
587   mol = nSup2[3];
588   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
589   CHECK_INVARIANT(nSup2.length() == 4, "");
590   mol->getProp(common_properties::_Name, mname);
591   TEST_ASSERT(mname == "mol-4");
592   mol->getProp("Column_2", mname);
593   TEST_ASSERT(mname == "16.0");
594   delete mol;
595 
596   // ensure that we can call setData a second time:
597   text =
598       "Id SMILES Column_2\n"
599       "mol-1 C 1.0\n"
600       "mol-2 CC 4.0\n"
601       "mol-3 CCC 9.0\n";
602   nSup2.setData(text, " ", 1, 0, true, true);
603   CHECK_INVARIANT(nSup2.length() == 3, "");
604   mol = nSup2[2];
605   mol->getProp(common_properties::_Name, mname);
606   TEST_ASSERT(mname == "mol-3");
607   mol->getProp("Column_2", mname);
608   TEST_ASSERT(mname == "9.0");
609   delete mol;
610 
611   // now test for failure handling:
612   text =
613       "Id SMILES Column_2\n"
614       "mol-1 C 1.0\n"
615       "mol-2 CC 4.0\n"
616       "mol-3 fail 9.0\n"
617       "mol-4 CCCC 16.0\n";
618   nSup2.setData(text, " ", 1, 0, true, true);
619   mol = nSup2[3];
620   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
621   TEST_ASSERT(nSup2.length() == 4);
622   mol->getProp(common_properties::_Name, mname);
623   TEST_ASSERT(mname == "mol-4");
624   mol->getProp("Column_2", mname);
625   TEST_ASSERT(mname == "16.0");
626   delete mol;
627 
628   // failures should give null molecules:
629   mol = nSup2[2];
630   TEST_ASSERT(!mol);
631   delete mol;
632 #endif
633 
634   // issue 114, no \n at EOF:
635   text =
636       "Id SMILES Column_2\n"
637       "mol-1 C 1.0\n"
638       "mol-2 CC 4.0\n"
639       "mol-4 CCCC 16.0\n";
640   nSup2.setData(text, " ", 1, 0, true, true);
641   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
642   TEST_ASSERT(nSup2.length() == 3);
643   mol = nSup2[2];
644   TEST_ASSERT(mol);
645   mol->getProp(common_properties::_Name, mname);
646   TEST_ASSERT(mname == "mol-4");
647   mol->getProp("Column_2", mname);
648   TEST_ASSERT(mname == "16.0");
649   TEST_ASSERT(nSup2.atEnd());
650   delete mol;
651 
652   text =
653       "Id SMILES Column_2\n"
654       "mol-1 C 1.0\n"
655       "mol-2 CC 4.0\n"
656       "mol-4 CCCC 16.0";
657   nSup2.setData(text, " ", 1, 0, true, true);
658   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
659   TEST_ASSERT(nSup2.length() == 3);
660   mol = nSup2[2];
661   TEST_ASSERT(mol);
662   mol->getProp(common_properties::_Name, mname);
663   TEST_ASSERT(mname == "mol-4");
664   mol->getProp("Column_2", mname);
665   TEST_ASSERT(mname == "16.0");
666   TEST_ASSERT(nSup2.atEnd());
667   delete mol;
668 
669   try {
670     mol = nSup2[3];
671     delete mol;
672   } catch (FileParseException &) {
673     failed = true;
674   }
675   TEST_ASSERT(failed);
676 
677   text =
678       "mol-1 C 1.0\n"
679       "mol-2 CC 4.0\n"
680       "mol-4 CCCC 16.0";
681   nSup2.setData(text, " ", 1, 0, false, true);
682   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
683   TEST_ASSERT(nSup2.length() == 3);
684   mol = nSup2[2];
685   TEST_ASSERT(mol);
686   mol->getProp(common_properties::_Name, mname);
687   TEST_ASSERT(mname == "mol-4");
688   mol->getProp("Column_2", mname);
689   TEST_ASSERT(mname == "16.0");
690   delete mol;
691 
692   text =
693       "C\n"
694       "CC\n"
695       "CCCC";
696   nSup2.setData(text, " ", 0, -1, false, true);
697   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
698   TEST_ASSERT(nSup2.length() == 3);
699   mol = nSup2[2];
700   TEST_ASSERT(mol);
701   TEST_ASSERT(mol->getNumAtoms() == 4);
702   delete mol;
703 
704   // this was a delightful boundary condition:
705   BOOST_LOG(rdErrorLog)
706       << "------------------------------------------------------" << std::endl;
707   text =
708       "CC\n"
709       "CCC\n"
710       "CCOC\n"
711       "CCCCOC";
712   nSup2.setData(text, " ", 0, -1, false, true);
713   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
714   mol = nSup2.next();
715   delete mol;
716 
717   mol = nSup2[3];
718   TEST_ASSERT(nSup2.length() == 4);
719   delete mol;
720 
721   failed = false;
722   try {
723     mol = nSup2[4];
724     delete mol;
725   } catch (FileParseException &) {
726     failed = true;
727   }
728   TEST_ASSERT(failed);
729 
730   BOOST_LOG(rdErrorLog)
731       << "------------------------------------------------------" << std::endl;
732   // this was a delightful boundary condition:
733   text =
734       "CC\n"
735       "CCC\n"
736       "CCOC\n"
737       "CCCCOC";
738   nSup2.setData(text, " ", 0, -1, false, true);
739   //  BOOST_LOG(rdErrorLog) << "SIZE: " << nSup2.length() << std::endl;
740   failed = false;
741   try {
742     mol = nSup2[4];
743     delete mol;
744   } catch (FileParseException &) {
745     failed = true;
746   }
747   TEST_ASSERT(failed);
748   BOOST_LOG(rdErrorLog) << ">>> This may result in an infinite loop.  It "
749                            "should finish almost immediately:"
750                         << std::endl;
751   TEST_ASSERT(nSup2.length() == 4);
752   BOOST_LOG(rdErrorLog) << "<<< done." << std::endl;
753 
754   nSup2.reset();
755   unsigned int nDone = 0;
756   while (!nSup2.atEnd()) {
757     mol = nSup2.next();
758     nDone++;
759     delete mol;
760   }
761   TEST_ASSERT(nDone == nSup2.length());
762 
763   // ensure that we can call setData a second time:
764   text =
765       "Id SMILES Column_2\n"
766       "# comment, ignore\n"
767       "mol-1 C 1.0\n"
768       "mol-2 CC 4.0\n"
769       "mol-3 CCC 9.0\n"
770       "mol-4 CCCC 16.0\n";
771   nSup2.setData(text, " ", 1, 0, true, true);
772   mol = nSup2[2];
773   mol->getProp(common_properties::_Name, mname);
774   TEST_ASSERT(mname == "mol-3");
775   mol->getProp("Column_2", mname);
776   TEST_ASSERT(mname == "9.0");
777   delete mol;
778 
779   mol = nSup2[1];
780   mol->getProp(common_properties::_Name, mname);
781   TEST_ASSERT(mname == "mol-2");
782   mol->getProp("Column_2", mname);
783   TEST_ASSERT(mname == "4.0");
784   delete mol;
785 
786   // this was a delightful boundary condition:
787   text =
788       "CC\n"
789       "CCC\n"
790       "CCOC\n"
791       "CCCCOC\n"
792       "\n"
793       "\n";
794   nSup2.setData(text, " ", 0, -1, false, true);
795   TEST_ASSERT(nSup2.length() == 4);
796   nSup2.reset();
797   nDone = 0;
798   while (!nSup2.atEnd()) {
799     mol = nSup2.next();
800     nDone++;
801     delete mol;
802   }
803   TEST_ASSERT(nDone == nSup2.length());
804 };
805 
testSmilesWriter()806 void testSmilesWriter() {
807   std::string rdbase = getenv("RDBASE");
808   std::string fname =
809       rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
810   // std::string fname = "../test_data/fewSmi.csv";
811   SmilesMolSupplier *nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
812   std::string oname =
813       rdbase + "/Code/GraphMol/FileParsers/test_data/outSmiles_molsupplier.csv";
814   // std::string oname = "../test_data/outSmiles.csv";
815 
816   STR_VECT propNames;
817   propNames.push_back(std::string("Column_2"));
818   SmilesWriter *writer = new SmilesWriter(oname, " ");
819   writer->setProps(propNames);
820 
821   STR_VECT names;
822   STR_VECT props;
823   ROMol *mol = nSup->next();
824   // BOOST_LOG(rdErrorLog) << "WRITING" << std::endl;
825   while (mol) {
826     // BOOST_LOG(rdErrorLog) << "MOL: " << MolToSmiles(*mol) << std::endl;
827     std::string mname, pval;
828     mol->getProp(common_properties::_Name, mname);
829     mol->getProp("Column_2", pval);
830     names.push_back(mname);
831     props.push_back(pval);
832     writer->write(*mol);
833     delete mol;
834     try {
835       mol = nSup->next();
836     } catch (FileParseException &) {
837       break;
838     }
839   }
840   writer->flush();
841   delete nSup;
842 
843   // now read the molecules back in a check if we have the same properties etc
844   nSup = new SmilesMolSupplier(oname);
845   int i = 0;
846   mol = nSup->next();
847   while (mol) {
848     std::string mname, pval;
849     mol->getProp(common_properties::_Name, mname);
850     mol->getProp("Column_2", pval);
851     CHECK_INVARIANT(mname == names[i], "");
852     CHECK_INVARIANT(pval == props[i], "");
853     i++;
854     delete mol;
855     try {
856       mol = nSup->next();
857     } catch (FileParseException &) {
858       break;
859     }
860   }
861   TEST_ASSERT(nSup->length() == writer->numMols());
862   writer->close();
863   delete writer;
864   delete nSup;
865 }
866 
testSDWriter()867 void testSDWriter() {
868   std::string rdbase = getenv("RDBASE");
869   std::string fname =
870       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
871   SDMolSupplier sdsup(fname);
872 
873   std::string ofile =
874       rdbase +
875       "/Code/GraphMol/FileParsers/test_data/outNCI_few_molsupplier.sdf";
876 
877   auto *writer = new SDWriter(ofile);
878 
879   STR_VECT names;
880 
881   while (!sdsup.atEnd()) {
882     ROMol *mol = sdsup.next();
883     std::string mname;
884     mol->getProp(common_properties::_Name, mname);
885     names.push_back(mname);
886 
887     writer->write(*mol);
888     delete mol;
889   }
890   writer->flush();
891   CHECK_INVARIANT(writer->numMols() == 16, "");
892   writer->close();
893   delete writer;
894 
895   // now read in the file we just finished writing
896 
897   SDMolSupplier reader(ofile);
898   int i = 0;
899   while (!reader.atEnd()) {
900     ROMol *mol = reader.next();
901     std::string mname;
902     mol->getProp(common_properties::_Name, mname);
903     BOOST_LOG(rdInfoLog) << mname << "\n";
904     // CHECK_INVARIANT(mname == names[i], "");
905 
906     delete mol;
907     i++;
908   }
909 
910   BOOST_LOG(rdInfoLog) << i << "\n";
911   /*
912   // now read in a file with aromatic information on the bonds
913   std::string infile = rdbase +
914   "/Code/GraphMol/FileParsers/test_data/outNCI_arom.sdf";
915   SDMolSupplier nreader(infile);
916   i = 0;
917   while (!nreader.atEnd()) {
918     ROMol *mol = nreader.next();
919     std::string mname;
920     mol->getProp(common_properties::_Name, mname);
921     BOOST_LOG(rdInfoLog) << mname << "\n";
922     //CHECK_INVARIANT(mname == names[i], "");
923     i++;
924 
925     delete mol;
926     }*/
927 }
928 
testSDSupplierEnding()929 void testSDSupplierEnding() {
930   std::string rdbase = getenv("RDBASE");
931   // test the SD supplier to check if it properly handle the end of sd file
932   // conditions
933   // should work fine if the sd file end with  a $$$$ follwed by blank line or
934   // no
935   // no blank lines
936   std::string infile =
937       rdbase + "/Code/GraphMol/FileParsers/test_data/esters_end.sdf";
938   int i = 0;
939   SDMolSupplier reader(infile);
940   while (!reader.atEnd()) {
941     ROMol *mol = reader.next();
942     std::string mname;
943     mol->getProp(common_properties::_Name, mname);
944     i++;
945     delete mol;
946   }
947   CHECK_INVARIANT(i == 6, "");
948 }
949 
testSuppliersEmptyFile()950 void testSuppliersEmptyFile() {
951   std::string rdbase = getenv("RDBASE");
952   {  // contains no records
953     std::string infile =
954         rdbase + "/Code/GraphMol/FileParsers/test_data/empty.sdf";
955     SDMolSupplier reader(infile);
956     TEST_ASSERT(reader.atEnd());
957   }
958   {
959     std::string infile =
960         rdbase + "/Code/GraphMol/FileParsers/test_data/empty.smi";
961     SmilesMolSupplier smiSup(infile, ",", 0, -1);
962     TEST_ASSERT(smiSup.atEnd());
963   }
964   // tests for GitHub issue 19:
965   {  // actually an empty file, throws an exception:
966     std::string infile =
967         rdbase + "/Code/GraphMol/FileParsers/test_data/empty2.sdf";
968     bool failed = false;
969     try {
970       SDMolSupplier reader(infile);
971     } catch (BadFileException &) {
972       failed = true;
973     }
974     TEST_ASSERT(failed);
975   }
976   {
977     SDMolSupplier reader;
978     reader.setData("");
979     TEST_ASSERT(reader.atEnd());
980     bool failed = false;
981     try {
982       reader[0];
983     } catch (FileParseException &) {
984       failed = true;
985     }
986     TEST_ASSERT(failed);
987     TEST_ASSERT(reader.length() == 0);
988   }
989   {
990     SDMolSupplier reader;
991     reader.setData("");
992     bool failed = false;
993     try {
994       reader[0];
995     } catch (FileParseException &) {
996       failed = true;
997     }
998     TEST_ASSERT(failed);
999     TEST_ASSERT(reader.length() == 0);
1000   }
1001   {
1002     SDMolSupplier reader;
1003     reader.setData("");
1004     TEST_ASSERT(reader.length() == 0);
1005   }
1006 }
1007 
testCisTrans()1008 void testCisTrans() {
1009   std::string text;
1010   text =
1011       "mol-1 ClC(C)=C(Br)C\n"
1012       "mol-2 C1=COC=CC1C(Cl)=C(Br)C\n"
1013       "mol-3 C1=COC=CC1\\C(Cl)=C(Br)\\C";
1014   SmilesMolSupplier smiSup;
1015   smiSup.setData(text, " ", 1, 0, false, true);
1016 
1017   std::string ofile = "cisTrans_molsupplier.sdf";
1018   SDWriter writer(ofile);
1019   while (!smiSup.atEnd()) {
1020     ROMol *mol = smiSup.next();
1021     TEST_ASSERT(mol);
1022     RDDepict::compute2DCoords(*mol);
1023     writer.write(*mol);
1024     delete mol;
1025   }
1026   writer.close();
1027   // do the round t;est
1028   // parse the sd file and write it out to smiles
1029 
1030   SDMolSupplier *reader;
1031   try {
1032     reader = new SDMolSupplier("cisTrans_molsupplier.sdf");
1033   } catch (FileParseException &) {
1034     reader = nullptr;
1035   }
1036   TEST_ASSERT(reader);
1037   while (!reader->atEnd()) {
1038     ROMol *mol = reader->next();
1039     std::string mname;
1040     mol->getProp(common_properties::_Name, mname);
1041     BOOST_LOG(rdInfoLog) << mname << " ";
1042     BOOST_LOG(rdInfoLog) << MolToSmiles(*mol, 1) << "\n";
1043     delete mol;
1044   }
1045   delete reader;
1046 }
1047 
testStereoRound()1048 void testStereoRound() {
1049   // - we will read ina bunch of cdk2 smiles with stereo on them
1050   // - generate the canonical smiles for each one
1051   // - generate 2D coordinates, write to an sdf file
1052   // - read the sdf file back in and compare the canonical smiles
1053   std::string rdbase = getenv("RDBASE");
1054   std::string infile =
1055       rdbase + "/Code/GraphMol/FileParsers/test_data/cdk2_stereo.csv";
1056   SmilesMolSupplier *smiSup;
1057   try {
1058     smiSup = new SmilesMolSupplier(infile, ",", 0, 1, false, true);
1059   } catch (FileParseException &) {
1060     smiSup = nullptr;
1061   }
1062   TEST_ASSERT(smiSup)
1063   std::map<std::string, std::string> nameSmi;
1064   std::string ofile =
1065       rdbase +
1066       "/Code/GraphMol/FileParsers/test_data/cdk2_stereo_molsupplier.sdf";
1067   auto *writer = new SDWriter(ofile);
1068   int count = 0;
1069 
1070   while (!smiSup->atEnd()) {
1071     ROMol *mol = smiSup->next();
1072     // mol->debugMol(std::cout);
1073     std::string mname;
1074     mol->getProp(common_properties::_Name, mname);
1075     nameSmi[mname] = MolToSmiles(*mol, 1);
1076 
1077     ROMol *nmol = SmilesToMol(nameSmi[mname]);
1078     // nmol->debugMol(std::cout);
1079 
1080     std::string nsmi = MolToSmiles(*nmol, 1);
1081     // BOOST_LOG(rdErrorLog) << mname << "\n";
1082     if (nameSmi[mname] != nsmi) {
1083       BOOST_LOG(rdInfoLog) << mname << " " << nameSmi[mname] << " " << nsmi
1084                            << "\n";
1085     }
1086     RDDepict::compute2DCoords(*mol);
1087     writer->write(*mol);
1088     count++;
1089     delete mol;
1090     delete nmol;
1091 
1092     if (count % 50 == 0) {
1093       BOOST_LOG(rdInfoLog) << count << " " << mname << "\n";
1094     }
1095   }
1096   writer->close();
1097   delete smiSup;
1098   delete writer;
1099 
1100   // now read the SD file back in check if the canonical smiles are the same
1101   SDMolSupplier *reader;
1102   try {
1103     reader = new SDMolSupplier(ofile);
1104   } catch (FileParseException &) {
1105     reader = nullptr;
1106   }
1107   TEST_ASSERT(reader);
1108   count = 0;
1109 
1110   while (!reader->atEnd()) {
1111     ROMol *mol = reader->next();
1112     // mol->debugMol(std::cout);
1113     std::string smiles = MolToSmiles(*mol, 1);
1114     std::string mname;
1115     mol->getProp(common_properties::_Name, mname);
1116     if (nameSmi[mname] != smiles) {
1117       BOOST_LOG(rdInfoLog) << mname << " " << nameSmi[mname] << " " << smiles
1118                            << "\n";
1119     }
1120     delete mol;
1121     count++;
1122   }
1123   delete reader;
1124 }
1125 
testIssue226()1126 void testIssue226() {
1127   std::string rdbase = getenv("RDBASE");
1128   std::string fname =
1129       rdbase + "/Code/GraphMol/FileParsers/test_data/Issue226.sdf";
1130   SDMolSupplier sdsup(fname);
1131 
1132   ROMol *mol;
1133 
1134   mol = sdsup.next();
1135   TEST_ASSERT(mol);
1136   TEST_ASSERT(mol->hasProp("E1"));
1137   TEST_ASSERT(mol->hasProp("E2"));
1138   delete mol;
1139 
1140   mol = sdsup.next();
1141   TEST_ASSERT(mol);
1142   TEST_ASSERT(mol->hasProp("E1"));
1143   TEST_ASSERT(mol->hasProp("E2"));
1144   delete mol;
1145 }
1146 
testTDTSupplier1()1147 int testTDTSupplier1() {
1148   std::string rdbase = getenv("RDBASE");
1149   std::string fname =
1150       rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
1151   {
1152     TDTMolSupplier suppl(fname, "PN");
1153     unsigned int i = 0;
1154     while (!suppl.atEnd()) {
1155       ROMol *nmol = suppl.next();
1156       if (nmol) {
1157         std::string prop1, prop2;
1158         TEST_ASSERT(nmol->getNumAtoms() > 0);
1159         TEST_ASSERT(nmol->hasProp("PN"));
1160         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1161         TEST_ASSERT(nmol->hasProp("MFCD"));
1162 
1163         nmol->getProp("PN", prop1);
1164         nmol->getProp(common_properties::_Name, prop2);
1165         TEST_ASSERT(prop1 == prop2);
1166 
1167         // we didn't ask for 2D conformers, so there should be a property 2D:
1168         TEST_ASSERT(nmol->hasProp(common_properties::TWOD));
1169         // and no conformer:
1170         TEST_ASSERT(!nmol->getNumConformers());
1171 
1172         delete nmol;
1173         i++;
1174       }
1175     }
1176     TEST_ASSERT(i == 10);
1177   }
1178   {
1179     std::ifstream strm(fname.c_str(), std::ios_base::binary);
1180     TDTMolSupplier suppl(&strm, false, "PN");
1181     unsigned int i = 0;
1182     while (!suppl.atEnd()) {
1183       ROMol *nmol = suppl.next();
1184       if (nmol) {
1185         std::string prop1, prop2;
1186         TEST_ASSERT(nmol->getNumAtoms() > 0);
1187         TEST_ASSERT(nmol->hasProp("PN"));
1188         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1189         TEST_ASSERT(nmol->hasProp("MFCD"));
1190 
1191         nmol->getProp("PN", prop1);
1192         nmol->getProp(common_properties::_Name, prop2);
1193         TEST_ASSERT(prop1 == prop2);
1194 
1195         // we didn't ask for 2D conformers, so there should be a property 2D:
1196         TEST_ASSERT(nmol->hasProp(common_properties::TWOD));
1197         // and no conformer:
1198         TEST_ASSERT(!nmol->getNumConformers());
1199 
1200         delete nmol;
1201         i++;
1202       }
1203     }
1204     TEST_ASSERT(i == 10);
1205   }
1206   return 1;
1207 }
testTDTSupplier2()1208 int testTDTSupplier2() {
1209   std::string rdbase = getenv("RDBASE");
1210   std::string fname =
1211       rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
1212   int i;
1213   std::string prop1, prop2;
1214 
1215   TDTMolSupplier suppl(fname, "PN", 2);
1216   i = 0;
1217   while (!suppl.atEnd()) {
1218     ROMol *nmol = suppl.next();
1219     if (nmol) {
1220       TEST_ASSERT(nmol->getNumAtoms() > 0);
1221       TEST_ASSERT(nmol->hasProp("PN"));
1222       TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1223       TEST_ASSERT(nmol->hasProp("MFCD"));
1224 
1225       nmol->getProp("PN", prop1);
1226       nmol->getProp(common_properties::_Name, prop2);
1227       TEST_ASSERT(prop1 == prop2);
1228 
1229       // we asked for 2D conformers, so there should be no property 2D:
1230       TEST_ASSERT(!nmol->hasProp(common_properties::TWOD));
1231       // and a conformer:
1232       TEST_ASSERT(nmol->getNumConformers() == 1);
1233       // with id "2":
1234       TEST_ASSERT(nmol->beginConformers()->get()->getId() == 2);
1235 
1236       delete nmol;
1237       i++;
1238     }
1239   }
1240   TEST_ASSERT(i == 10);
1241   return 1;
1242 }
testTDTSupplier3()1243 int testTDTSupplier3() {
1244   std::string data;
1245   int i;
1246   std::string prop1, prop2;
1247 
1248   TDTMolSupplier suppl;
1249 
1250   data =
1251       "$SMI<Cc1nnc(N)nc1C>\n"
1252       "CAS<17584-12-2>\n"
1253       "|\n"
1254       "$SMI<Cc1n[nH]c(=O)nc1N>\n"
1255       "CAS<~>\n"
1256       "|\n"
1257       "$SMI<Cc1n[nH]c(=O)[nH]c1=O>\n"
1258       "CAS<932-53-6>\n"
1259       "|\n"
1260       "$SMI<Cc1nnc(NN)nc1O>\n"
1261       "CAS<~>\n"
1262       "|\n";
1263   suppl.setData(data, "CAS");
1264 
1265   i = 0;
1266   while (!suppl.atEnd()) {
1267     ROMol *nmol = suppl.next();
1268     if (nmol) {
1269       TEST_ASSERT(nmol->getNumAtoms() > 0);
1270       TEST_ASSERT(nmol->hasProp("CAS"));
1271       TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1272 
1273       nmol->getProp("CAS", prop1);
1274       nmol->getProp(common_properties::_Name, prop2);
1275       TEST_ASSERT(prop1 == prop2);
1276 
1277       // no conformers should have been read:
1278       TEST_ASSERT(nmol->getNumConformers() == 0);
1279 
1280       delete nmol;
1281       i++;
1282     }
1283   }
1284   TEST_ASSERT(i == 4);
1285   TEST_ASSERT(suppl.length() == 4);
1286 
1287   // now make sure we can grab earlier mols (was sf.net issue 1904170):
1288   ROMol *mol = suppl[0];
1289   TEST_ASSERT(mol);
1290   delete mol;
1291 
1292   // make sure we can reset the supplier and still process it properly;
1293   suppl.setData(data, "CAS");
1294 
1295   i = 0;
1296   while (!suppl.atEnd()) {
1297     ROMol *nmol = suppl.next();
1298     if (nmol) {
1299       TEST_ASSERT(nmol->getNumAtoms() > 0);
1300       TEST_ASSERT(nmol->hasProp("CAS"));
1301       TEST_ASSERT(nmol->hasProp(common_properties::_Name));
1302 
1303       nmol->getProp("CAS", prop1);
1304       nmol->getProp(common_properties::_Name, prop2);
1305       TEST_ASSERT(prop1 == prop2);
1306 
1307       // no conformers should have been read:
1308       TEST_ASSERT(nmol->getNumConformers() == 0);
1309 
1310       delete nmol;
1311       i++;
1312     }
1313   }
1314   TEST_ASSERT(i == 4);
1315 
1316   return 1;
1317 }
1318 
testSDSupplierFromText()1319 void testSDSupplierFromText() {
1320   std::string text;
1321   int i = 0;
1322   SDMolSupplier reader;
1323 
1324   text =
1325       "Structure1\n"
1326       "csChFnd70/05230312262D\n"
1327       "\n"
1328       "  5  4  0  0  0  0  0  0  0  0999 V2000\n"
1329       "    1.2124    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1330       "    2.4249    0.7000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1331       "    3.6373    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1332       "    2.4249    2.1000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1333       "    0.0000    0.7000    0.0000 Y   0  0  0  0  0  0  0  0  0  0  0  0\n"
1334       "  1  2  1  0  0  0  0\n"
1335       "  2  3  1  0  0  0  0\n"
1336       "  2  4  2  0  0  0  0\n"
1337       "  1  5  1  0  0  0  0\n"
1338       "M  END\n"
1339       ">  <ID> (3)\n"
1340       "Lig1\n"
1341       "\n"
1342       "$$$$\n"
1343       "Structure1\n"
1344       "csChFnd70/05230312262D\n"
1345       "\n"
1346       "  6  5  0  0  0  0  0  0  0  0999 V2000\n"
1347       "    1.2124    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1348       "    2.4249    0.7000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1349       "    3.6373    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1350       "    2.4249    2.1000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1351       "    0.0000    0.7000    0.0000 Y   0  0  0  0  0  0  0  0  0  0  0  0\n"
1352       "    4.8477    0.6988    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1353       "  1  2  1  0  0  0  0\n"
1354       "  2  3  1  0  0  0  0\n"
1355       "  2  4  2  0  0  0  0\n"
1356       "  1  5  1  0  0  0  0\n"
1357       "  3  6  1  0  0  0  0\n"
1358       "M  END\n"
1359       ">  <ID> (4)\n"
1360       "Lig2\n"
1361       "\n"
1362       "$$$$\n";
1363   reader.setData(text);
1364 
1365   i = 0;
1366   while (!reader.atEnd()) {
1367     ROMol *mol = reader.next();
1368     std::string mname;
1369     TEST_ASSERT(mol->hasProp(common_properties::_Name));
1370     TEST_ASSERT(mol->hasProp("ID"));
1371     i++;
1372     delete mol;
1373   }
1374   TEST_ASSERT(i == 2);
1375 }
1376 
testSDSupplierFromTextStrLax1()1377 void testSDSupplierFromTextStrLax1() {
1378   std::string text;
1379   text =
1380       "Structure1\n"
1381       "csChFnd70/05230312262D\n"
1382       "\n"
1383       "  5  4  0  0  0  0  0  0  0  0999 V2000\n"
1384       "    1.2124    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1385       "    2.4249    0.7000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1386       "    3.6373    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1387       "    2.4249    2.1000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1388       "    0.0000    0.7000    0.0000 Y   0  0  0  0  0  0  0  0  0  0  0  0\n"
1389       "  1  2  1  0  0  0  0\n"
1390       "  2  3  1  0  0  0  0\n"
1391       "  2  4  2  0  0  0  0\n"
1392       "  1  5  1  0  0  0  0\n"
1393       "M  END\n"
1394       "blah\n"
1395       "\n"
1396       "blah after blank line\n"
1397       ">  <ID> (3)\n"
1398       "Lig1\n"
1399       "\n"
1400       "This will be ignored\n"
1401       ">  <ANOTHER_PROPERTY> (4)\n"
1402       "Value\n"
1403       "\n"
1404       "$$$$\n"
1405       "Structure1\n"
1406       "csChFnd70/05230312262D\n"
1407       "\n"
1408       "  6  5  0  0  0  0  0  0  0  0999 V2000\n"
1409       "    1.2124    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1410       "    2.4249    0.7000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1411       "    3.6373    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1412       "    2.4249    2.1000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1413       "    0.0000    0.7000    0.0000 Y   0  0  0  0  0  0  0  0  0  0  0  0\n"
1414       "    4.8477    0.6988    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1415       "  1  2  1  0  0  0  0\n"
1416       "  2  3  1  0  0  0  0\n"
1417       "  2  4  2  0  0  0  0\n"
1418       "  1  5  1  0  0  0  0\n"
1419       "  3  6  1  0  0  0  0\n"
1420       "M  END\n"
1421       ">  <ID> (4)\n"
1422       "Lig2\n"
1423       "\n"
1424       "This will be ignored\n"
1425       "\n"
1426       ">  <ANOTHER_PROPERTY> (4)\n"
1427       "Value\n"
1428       "\n"
1429       "This will be ignored\n"
1430       "\n"
1431       "$$$$\n";
1432 
1433   // strict
1434   {
1435     SDMolSupplier reader;
1436 
1437     reader.setData(text, true, true, true);
1438 
1439     int i = 0;
1440     while (!reader.atEnd()) {
1441       ROMol *mol = reader.next();
1442       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1443       if (i == 0) {
1444         TEST_ASSERT(!mol->hasProp("ID"));
1445       }
1446       TEST_ASSERT(!mol->hasProp("ANOTHER_PROPERTY"));
1447       i++;
1448       delete mol;
1449     }
1450     TEST_ASSERT(i == 2);
1451   }
1452   // lax
1453   {
1454     SDMolSupplier reader;
1455 
1456     reader.setData(text, true, true, false);
1457 
1458     int i = 0;
1459     while (!reader.atEnd()) {
1460       ROMol *mol = reader.next();
1461       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1462       TEST_ASSERT(mol->hasProp("ID"));
1463       TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1464       i++;
1465       delete mol;
1466     }
1467     TEST_ASSERT(i == 2);
1468   }
1469 }
1470 
testSDSupplierFromTextStrLax2()1471 void testSDSupplierFromTextStrLax2() {
1472   std::string text;
1473   text =
1474       "Structure1\n"
1475       "csChFnd70/05230312262D\n"
1476       "\n"
1477       "  5  4  0  0  0  0  0  0  0  0999 V2000\n"
1478       "    1.2124    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1479       "    2.4249    0.7000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1480       "    3.6373    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1481       "    2.4249    2.1000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1482       "    0.0000    0.7000    0.0000 Y   0  0  0  0  0  0  0  0  0  0  0  0\n"
1483       "  1  2  1  0  0  0  0\n"
1484       "  2  3  1  0  0  0  0\n"
1485       "  2  4  2  0  0  0  0\n"
1486       "  1  5  1  0  0  0  0\n"
1487       "M  END\n"
1488       ">  <ID> (3)\n"
1489       "Lig1\n"
1490       "\n"
1491       ">  <ANOTHER_PROPERTY> (4)\n"
1492       "No blank line before dollars\n"
1493       "$$$$\n"
1494       "Structure1\n"
1495       "csChFnd70/05230312262D\n"
1496       "\n"
1497       "  6  5  0  0  0  0  0  0  0  0999 V2000\n"
1498       "    1.2124    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1499       "    2.4249    0.7000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1500       "    3.6373    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1501       "    2.4249    2.1000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n"
1502       "    0.0000    0.7000    0.0000 Y   0  0  0  0  0  0  0  0  0  0  0  0\n"
1503       "    4.8477    0.6988    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n"
1504       "  1  2  1  0  0  0  0\n"
1505       "  2  3  1  0  0  0  0\n"
1506       "  2  4  2  0  0  0  0\n"
1507       "  1  5  1  0  0  0  0\n"
1508       "  3  6  1  0  0  0  0\n"
1509       "M  END\n"
1510       ">  <ID> (3)\n"
1511       "Lig2\n"
1512       "\n"
1513       ">  <ANOTHER_PROPERTY> (4)\n"
1514       "Value2\n"
1515       "\n"
1516       "$$$$\n";
1517 
1518   // strict
1519   {
1520     SDMolSupplier reader;
1521 
1522     reader.setData(text, true, true, true);
1523 
1524     int i = 0;
1525     while (!reader.atEnd()) {
1526       ROMol *mol = reader.next();
1527       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1528       TEST_ASSERT(mol->hasProp("ID"));
1529       TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1530       std::string s;
1531       mol->getProp("ID", s);
1532       TEST_ASSERT(s == "Lig1");
1533       mol->getProp("ANOTHER_PROPERTY", s);
1534       TEST_ASSERT(s ==
1535                   "No blank line before dollars\n"
1536                   "$$$$\n"
1537                   "Structure1\n"
1538                   "csChFnd70/05230312262D");
1539       i++;
1540       delete mol;
1541     }
1542     TEST_ASSERT(i == 1);
1543   }
1544   // lax
1545   {
1546     SDMolSupplier reader;
1547 
1548     reader.setData(text, true, true, false);
1549 
1550     int i = 0;
1551     while (!reader.atEnd()) {
1552       ROMol *mol = reader.next();
1553       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1554       TEST_ASSERT(mol->hasProp("ID"));
1555       TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1556       std::string s;
1557       mol->getProp("ID", s);
1558       TEST_ASSERT(s == "Lig2");
1559       mol->getProp("ANOTHER_PROPERTY", s);
1560       TEST_ASSERT(s == "Value2");
1561       i++;
1562       delete mol;
1563     }
1564     TEST_ASSERT(i == 1);
1565   }
1566 }
1567 
testSDSupplierStrLax1()1568 void testSDSupplierStrLax1() {
1569   std::string rdbase = getenv("RDBASE");
1570   std::string fname =
1571       rdbase + "/Code/GraphMol/FileParsers/test_data/strictLax1.sdf";
1572   // strict
1573   {
1574     SDMolSupplier reader(fname, true, true, true);
1575 
1576     int i = 0;
1577     while (!reader.atEnd()) {
1578       ROMol *mol = reader.next();
1579       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1580       if (i == 0) {
1581         TEST_ASSERT(!mol->hasProp("ID"));
1582       }
1583       TEST_ASSERT(!mol->hasProp("ANOTHER_PROPERTY"));
1584       i++;
1585       delete mol;
1586     }
1587     TEST_ASSERT(i == 2);
1588   }
1589   // lax
1590   {
1591     SDMolSupplier reader(fname, true, true, false);
1592 
1593     int i = 0;
1594     while (!reader.atEnd()) {
1595       ROMol *mol = reader.next();
1596       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1597       TEST_ASSERT(mol->hasProp("ID"));
1598       TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1599       i++;
1600       delete mol;
1601     }
1602     TEST_ASSERT(i == 2);
1603   }
1604 }
1605 
testSDSupplierStrLax2()1606 void testSDSupplierStrLax2() {
1607   std::string rdbase = getenv("RDBASE");
1608   std::string fname =
1609       rdbase + "/Code/GraphMol/FileParsers/test_data/strictLax2.sdf";
1610   // strict
1611   {
1612     SDMolSupplier reader(fname, true, true, true);
1613 
1614     int i = 0;
1615     while (!reader.atEnd()) {
1616       ROMol *mol = reader.next();
1617       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1618       TEST_ASSERT(mol->hasProp("ID"));
1619       TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1620       std::string s;
1621       mol->getProp("ID", s);
1622       TEST_ASSERT(s == "Lig1");
1623       mol->getProp("ANOTHER_PROPERTY", s);
1624       TEST_ASSERT(s ==
1625                   "No blank line before dollars\n"
1626                   "$$$$\n"
1627                   "Structure1\n"
1628                   "csChFnd70/05230312262D");
1629       i++;
1630       delete mol;
1631     }
1632     TEST_ASSERT(i == 1);
1633   }
1634   // lax
1635   {
1636     SDMolSupplier reader(fname, true, true, false);
1637 
1638     int i = 0;
1639     while (!reader.atEnd()) {
1640       ROMol *mol = reader.next();
1641       TEST_ASSERT(mol->hasProp(common_properties::_Name));
1642       TEST_ASSERT(mol->hasProp("ID"));
1643       TEST_ASSERT(mol->hasProp("ANOTHER_PROPERTY"));
1644       std::string s;
1645       mol->getProp("ID", s);
1646       TEST_ASSERT(s == "Lig2");
1647       mol->getProp("ANOTHER_PROPERTY", s);
1648       TEST_ASSERT(s == "Value2");
1649       i++;
1650       delete mol;
1651     }
1652     TEST_ASSERT(i == 1);
1653   }
1654 }
1655 
testIssue265()1656 void testIssue265() {
1657   std::string rdbase = getenv("RDBASE");
1658   std::string fname =
1659       rdbase + "/Code/GraphMol/FileParsers/test_data/NotThere.sdf";
1660   bool ok = false;
1661   try {
1662     SDMolSupplier reader(fname);
1663     ok = false;
1664   } catch (BadFileException &) {
1665     ok = true;
1666   }
1667   TEST_ASSERT(ok);
1668 
1669   try {
1670     SmilesMolSupplier reader(fname);
1671     ok = false;
1672   } catch (BadFileException &) {
1673     ok = true;
1674   }
1675   TEST_ASSERT(ok);
1676 
1677   try {
1678     TDTMolSupplier reader(fname);
1679     ok = false;
1680   } catch (BadFileException &) {
1681     ok = true;
1682   }
1683   TEST_ASSERT(ok);
1684 }
1685 
testSDErrorHandling()1686 void testSDErrorHandling() {
1687   std::string rdbase = getenv("RDBASE");
1688   std::string fname =
1689       rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors1.sdf";
1690   SDMolSupplier *sdsup;
1691   ROMol *nmol = nullptr;
1692 
1693   // entry 1: bad properties
1694   sdsup = new SDMolSupplier(fname);
1695   TEST_ASSERT(!sdsup->atEnd());
1696   nmol = sdsup->next();
1697   TEST_ASSERT(nmol);
1698   TEST_ASSERT(!nmol->hasProp("ID"));
1699   delete sdsup;
1700   delete nmol;
1701 
1702   // case 2: can't be sanitized
1703   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors2.sdf";
1704   sdsup = new SDMolSupplier(fname);
1705   TEST_ASSERT(!sdsup->atEnd());
1706   nmol = sdsup->next();
1707   TEST_ASSERT(!nmol);
1708   TEST_ASSERT(sdsup->atEnd());
1709   delete sdsup;
1710   delete nmol;
1711 
1712   // entry 3: bad number of atoms
1713   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors3.sdf";
1714   sdsup = new SDMolSupplier(fname);
1715   TEST_ASSERT(!sdsup->atEnd());
1716   nmol = sdsup->next();
1717   TEST_ASSERT(!nmol);
1718   TEST_ASSERT(sdsup->atEnd());
1719   delete sdsup;
1720   delete nmol;
1721 
1722   // entry 4: bad number of bonds
1723   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/sdErrors4.sdf";
1724   sdsup = new SDMolSupplier(fname);
1725   TEST_ASSERT(!sdsup->atEnd());
1726   nmol = sdsup->next();
1727   TEST_ASSERT(!nmol);
1728   TEST_ASSERT(sdsup->atEnd());
1729   delete sdsup;
1730   delete nmol;
1731 }
1732 
testIssue381()1733 void testIssue381() {
1734   std::string rdbase = getenv("RDBASE");
1735   std::string fname =
1736       rdbase + "/Code/GraphMol/FileParsers/test_data/Issue381.sdf";
1737   SDMolSupplier *sdsup;
1738 
1739   ROMol *nmol = nullptr;
1740   int count;
1741 
1742   // entry 1: bad properties
1743   sdsup = new SDMolSupplier(fname);
1744   TEST_ASSERT(!sdsup->atEnd());
1745   count = 0;
1746   while (!sdsup->atEnd()) {
1747     nmol = sdsup->next();
1748     if (nmol) {
1749       delete nmol;
1750     }
1751     count++;
1752   }
1753   TEST_ASSERT(sdsup->atEnd());
1754   TEST_ASSERT(count == 9);
1755 
1756   TEST_ASSERT(sdsup->length() == 9);
1757 
1758   delete sdsup;
1759 }
1760 
testSetStreamIndices()1761 void testSetStreamIndices() {
1762   std::string rdbase = getenv("RDBASE");
1763   std::string fname =
1764       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
1765   std::ifstream ifs(fname.c_str(), std::ios_base::binary);
1766   std::vector<std::streampos> indices;
1767   bool addIndex = true;
1768   bool notEof = true;
1769   std::streampos pos = 0;
1770   std::string line;
1771   while (notEof) {
1772     if (addIndex) {
1773       pos = ifs.tellg();
1774     }
1775     notEof = (std::getline(ifs, line) ? true : false);
1776     if (notEof) {
1777       if (addIndex) {
1778         indices.push_back(pos);
1779       }
1780       addIndex = (line.substr(0, 4) == "$$$$");
1781     }
1782   }
1783   ifs.close();
1784   SDMolSupplier *sdsup;
1785 
1786   ROMol *nmol = nullptr;
1787   int count;
1788 
1789   sdsup = new SDMolSupplier(fname);
1790   sdsup->setStreamIndices(indices);
1791   TEST_ASSERT(!sdsup->atEnd());
1792   TEST_ASSERT(sdsup->length() == 16);
1793 
1794   count = 0;
1795   while (!sdsup->atEnd()) {
1796     nmol = sdsup->next();
1797     if (nmol) {
1798       delete nmol;
1799     }
1800     count++;
1801   }
1802   TEST_ASSERT(sdsup->atEnd());
1803   TEST_ASSERT(count == 16);
1804 
1805   TEST_ASSERT(sdsup->length() == 16);
1806 
1807   delete sdsup;
1808 }
1809 
testMixIterAndRandom()1810 int testMixIterAndRandom() {
1811   std::string rdbase = getenv("RDBASE");
1812   std::string fname =
1813       rdbase + "/Code/GraphMol/FileParsers/test_data/esters.sdf";
1814   bool ok;
1815 
1816   SDMolSupplier *sdsup;
1817   ROMol *mol;
1818   std::string name;
1819 
1820   sdsup = new SDMolSupplier(fname);
1821   TEST_ASSERT(sdsup);
1822   unsigned int i = 0;
1823   while (!sdsup->atEnd()) {
1824     mol = sdsup->next();
1825     if (mol) {
1826       TEST_ASSERT(mol->hasProp("ID"));
1827       delete mol;
1828     }
1829     i++;
1830   }
1831   TEST_ASSERT(i == 6);
1832   TEST_ASSERT(sdsup->length() == 6);
1833 
1834   delete sdsup;
1835   sdsup = new SDMolSupplier(fname);
1836   TEST_ASSERT(sdsup);
1837   TEST_ASSERT(sdsup->length() == 6);
1838 
1839   mol = sdsup->next();
1840   TEST_ASSERT(mol);
1841   TEST_ASSERT(mol->hasProp("ID"));
1842   mol->getProp("ID", name);
1843   TEST_ASSERT(name == "Lig1");
1844   delete mol;
1845 
1846   mol = (*sdsup)[0];
1847   TEST_ASSERT(mol);
1848   TEST_ASSERT(mol->hasProp("ID"));
1849   mol->getProp("ID", name);
1850   TEST_ASSERT(name == "Lig1");
1851   delete mol;
1852 
1853   sdsup->reset();
1854   mol = sdsup->next();
1855   TEST_ASSERT(mol);
1856   TEST_ASSERT(mol->hasProp("ID"));
1857   mol->getProp("ID", name);
1858   TEST_ASSERT(name == "Lig1");
1859   delete mol;
1860   mol = sdsup->next();
1861   TEST_ASSERT(mol);
1862   TEST_ASSERT(mol->hasProp("ID"));
1863   mol->getProp("ID", name);
1864   TEST_ASSERT(name == "Lig2");
1865   delete mol;
1866   delete sdsup;
1867 
1868   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
1869   SmilesMolSupplier *nSup;
1870   nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1871   TEST_ASSERT(nSup);
1872   TEST_ASSERT(nSup->length() == 10);
1873   mol = (*nSup)[0];
1874   TEST_ASSERT(mol);
1875   TEST_ASSERT(mol->getNumAtoms() == 9);
1876   delete mol;
1877   delete nSup;
1878 
1879   nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1880   TEST_ASSERT(nSup);
1881   mol = (*nSup)[0];
1882   TEST_ASSERT(mol);
1883   TEST_ASSERT(mol->getNumAtoms() == 9);
1884   TEST_ASSERT(nSup->length() == 10);
1885   delete mol;
1886   delete nSup;
1887 
1888   nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1889   TEST_ASSERT(nSup);
1890   mol = nSup->next();
1891   TEST_ASSERT(mol);
1892   TEST_ASSERT(mol->getNumAtoms() == 9);
1893   TEST_ASSERT(nSup->length() == 10);
1894   delete mol;
1895   mol = (*nSup)[0];
1896   TEST_ASSERT(mol);
1897   TEST_ASSERT(mol->getNumAtoms() == 9);
1898   TEST_ASSERT(nSup->length() == 10);
1899   delete mol;
1900   mol = nSup->next();
1901   TEST_ASSERT(mol);
1902   TEST_ASSERT(mol->getNumAtoms() == 20);
1903   delete nSup;
1904   delete mol;
1905 
1906   nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
1907   TEST_ASSERT(nSup);
1908   mol = nullptr;
1909   try {
1910     mol = (*nSup)[20];
1911     ok = false;
1912   } catch (FileParseException &) {
1913     ok = true;
1914   }
1915   TEST_ASSERT(ok);
1916   delete nSup;
1917 
1918   fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
1919   TDTMolSupplier *tSup;
1920   tSup = new TDTMolSupplier(fname);
1921   TEST_ASSERT(tSup);
1922   TEST_ASSERT(tSup->length() == 10);
1923   mol = (*tSup)[0];
1924   TEST_ASSERT(mol);
1925   TEST_ASSERT(mol->getNumAtoms() == 9);
1926   delete mol;
1927   delete tSup;
1928 
1929   tSup = new TDTMolSupplier(fname);
1930   TEST_ASSERT(tSup);
1931   mol = (*tSup)[0];
1932   TEST_ASSERT(mol);
1933   TEST_ASSERT(mol->getNumAtoms() == 9);
1934   TEST_ASSERT(tSup->length() == 10);
1935   delete mol;
1936   delete tSup;
1937 
1938   tSup = new TDTMolSupplier(fname);
1939   TEST_ASSERT(tSup);
1940   mol = tSup->next();
1941   TEST_ASSERT(mol);
1942   TEST_ASSERT(mol->getNumAtoms() == 9);
1943   TEST_ASSERT(tSup->length() == 10);
1944   delete mol;
1945 
1946   mol = (*tSup)[0];
1947   TEST_ASSERT(mol);
1948   TEST_ASSERT(mol->getNumAtoms() == 9);
1949   TEST_ASSERT(tSup->length() == 10);
1950   delete mol;
1951 
1952   mol = tSup->next();
1953   TEST_ASSERT(mol);
1954   delete mol;
1955 
1956   mol = tSup->next();
1957   TEST_ASSERT(mol);
1958   delete mol;
1959 
1960   mol = tSup->next();
1961   TEST_ASSERT(mol);
1962   TEST_ASSERT(mol->getNumAtoms() == 10);
1963   delete tSup;
1964   delete mol;
1965 
1966   tSup = new TDTMolSupplier(fname);
1967   TEST_ASSERT(tSup);
1968   mol = nullptr;
1969   try {
1970     mol = (*tSup)[20];
1971     delete mol;
1972     ok = false;
1973   } catch (FileParseException &) {
1974     ok = true;
1975   }
1976   TEST_ASSERT(ok);
1977   delete tSup;
1978 
1979   return 1;
1980 }
1981 
testRemoveHs()1982 int testRemoveHs() {
1983   std::string rdbase = getenv("RDBASE");
1984   std::string fname =
1985       rdbase + "/Code/GraphMol/FileParsers/test_data/withHs.sdf";
1986 
1987   SDMolSupplier sdsup(fname);
1988   ROMol *nmol;
1989 
1990   nmol = sdsup.next();
1991   TEST_ASSERT(nmol);
1992   TEST_ASSERT(nmol->getNumAtoms() == 23);
1993   delete nmol;
1994   nmol = sdsup.next();
1995   TEST_ASSERT(nmol);
1996   TEST_ASSERT(nmol->getNumAtoms() == 28);
1997   delete nmol;
1998 
1999   std::cerr << "build:" << std::endl;
2000   SDMolSupplier sdsup2(fname, true, false);
2001   nmol = sdsup2.next();
2002   TEST_ASSERT(nmol);
2003   // std::cerr<<" count: "<<nmol->getNumAtoms()<<std::endl;
2004   TEST_ASSERT(nmol->getNumAtoms() == 39);
2005   delete nmol;
2006   nmol = sdsup2.next();
2007   TEST_ASSERT(nmol);
2008   TEST_ASSERT(nmol->getNumAtoms() == 30);
2009   delete nmol;
2010 
2011   return 1;
2012 }
2013 
testGetItemText()2014 void testGetItemText() {
2015   std::string rdbase = getenv("RDBASE");
2016   std::string fname;
2017 
2018   ROMol *mol1, *mol2;
2019   std::string molB, smiles;
2020   bool ok;
2021 
2022   {
2023     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2024     SDMolSupplier sdsup(fname);
2025     TEST_ASSERT(sdsup.length() == 16);
2026 
2027     molB = sdsup.getItemText(0);
2028     mol1 = sdsup[0];
2029     TEST_ASSERT(mol1);
2030     mol2 = MolBlockToMol(molB);
2031     TEST_ASSERT(mol2);
2032     TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2033     delete mol1;
2034     delete mol2;
2035 
2036     // make sure getItemText() doesn't screw up the current position:
2037     molB = sdsup.getItemText(10);
2038     mol1 = sdsup.next();
2039     molB = sdsup.getItemText(1);
2040     TEST_ASSERT(mol1);
2041     mol2 = MolBlockToMol(molB);
2042     TEST_ASSERT(mol2);
2043     TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2044     delete mol1;
2045     delete mol2;
2046 
2047     // make sure getItemText() works on the last molecule
2048     // (this was sf.net issue 1874882
2049     molB = sdsup.getItemText(15);
2050     mol1 = sdsup[15];
2051     mol2 = MolBlockToMol(molB);
2052     TEST_ASSERT(mol2);
2053     TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2054     delete mol1;
2055     delete mol2;
2056 
2057     try {
2058       molB = sdsup.getItemText(16);
2059       ok = false;
2060     } catch (FileParseException &) {
2061       ok = true;
2062     }
2063     TEST_ASSERT(ok);
2064 
2065     try {
2066       molB = sdsup.getItemText(20);
2067       ok = false;
2068     } catch (FileParseException &) {
2069       ok = true;
2070     }
2071     TEST_ASSERT(ok);
2072   }
2073 
2074   {
2075     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2076     SDMolSupplier sdsup(fname);
2077 
2078     // make sure getItemText() works if we haven't read at all from the
2079     // supplier:
2080     // (this was sf.net issue 2632960)
2081     molB = sdsup.getItemText(0);
2082     mol2 = MolBlockToMol(molB);
2083     TEST_ASSERT(mol2);
2084     mol1 = sdsup[0];
2085     TEST_ASSERT(mol1);
2086     TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2087     delete mol1;
2088     delete mol2;
2089 
2090     molB = sdsup.getItemText(5);
2091     mol2 = MolBlockToMol(molB);
2092     TEST_ASSERT(mol2);
2093     TEST_ASSERT(mol2->getNumAtoms() == 16);
2094     mol1 = sdsup[5];
2095     TEST_ASSERT(mol1);
2096     TEST_ASSERT(mol2->getNumAtoms() == mol1->getNumAtoms());
2097     delete mol1;
2098     delete mol2;
2099   }
2100 
2101   {
2102     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
2103     SmilesMolSupplier smisup(fname, ",", 1, 0, false);
2104     TEST_ASSERT(smisup.length() == 10);
2105 
2106     molB = smisup.getItemText(0);
2107     TEST_ASSERT(molB == "1, CC1=CC(=O)C=CC1=O, 34.14");
2108     mol1 = smisup[0];
2109     TEST_ASSERT(mol1);
2110     delete mol1;
2111 
2112     molB = smisup.getItemText(5);
2113     TEST_ASSERT(
2114         molB ==
2115         "6, OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br, 87.74");
2116     mol1 = smisup.next();
2117     TEST_ASSERT(mol1);
2118     TEST_ASSERT(mol1->getNumAtoms() == 20);
2119     delete mol1;
2120 
2121     // make sure getItemText() works on the last molecule
2122     // (this was sf.net issue 1874882
2123     molB = smisup.getItemText(8);
2124     TEST_ASSERT(molB == "9, CC(=NO)C(C)=NO, 65.18");
2125     molB = smisup.getItemText(9);
2126     TEST_ASSERT(molB == "10, C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3, 0.00");
2127 
2128     mol1 = smisup[0];
2129     TEST_ASSERT(mol1);
2130     smiles = MolToSmiles(*mol1, 1);
2131     TEST_ASSERT(smiles == "CC1=CC(=O)C=CC1=O");
2132     TEST_ASSERT(mol1->getNumAtoms() == 9);
2133     delete mol1;
2134   }
2135 
2136   {
2137     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
2138     SmilesMolSupplier smisup(fname, ",", 1, 0, false);
2139 
2140     // make sure getItemText() works if we haven't read at all from the
2141     // supplier:
2142     // (this was sf.net issue 2632960)
2143     molB = smisup.getItemText(0);
2144     TEST_ASSERT(molB == "1, CC1=CC(=O)C=CC1=O, 34.14");
2145 
2146     molB = smisup.getItemText(5);
2147     TEST_ASSERT(
2148         molB ==
2149         "6, OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br, 87.74");
2150 
2151     molB = smisup.getItemText(8);
2152     TEST_ASSERT(molB == "9, CC(=NO)C(C)=NO, 65.18");
2153     molB = smisup.getItemText(9);
2154     TEST_ASSERT(molB == "10, C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3, 0.00");
2155   }
2156 
2157   {
2158     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
2159     SmilesMolSupplier smisup(fname, ",", 1, 0, false);
2160 
2161     // make sure getItemText() flags EOF
2162     // (this was sf.net issue 3299878)
2163     molB = smisup.getItemText(0);
2164     TEST_ASSERT(molB == "1, CC1=CC(=O)C=CC1=O, 34.14");
2165 
2166     ROMol *m = smisup[9];
2167     TEST_ASSERT(m);
2168     delete m;
2169     TEST_ASSERT(smisup.atEnd());
2170     molB = smisup.getItemText(9);
2171     TEST_ASSERT(molB == "10, C1=CC=C(C=C1)P(C2=CC=CC=C2)C3=CC=CC=C3, 0.00");
2172     TEST_ASSERT(smisup.atEnd());
2173   }
2174 
2175   {
2176     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
2177     TDTMolSupplier tdtsup(fname);
2178     // make sure getItemText() works if we haven't read at all from the
2179     // supplier:
2180     // (this was sf.net issue 2632960)
2181     molB = tdtsup.getItemText(0);
2182     TEST_ASSERT(molB != "");
2183   }
2184 
2185   {
2186     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
2187     TDTMolSupplier tdtsup(fname);
2188     TEST_ASSERT(tdtsup.length() == 10);
2189 
2190     molB = tdtsup.getItemText(0);
2191     TEST_ASSERT(molB != "");
2192 
2193     mol1 = tdtsup[0];
2194     TEST_ASSERT(mol1);
2195     smiles = MolToSmiles(*mol1, 1);
2196     TEST_ASSERT(smiles == "Cc1nnc(N)nc1C");
2197     TEST_ASSERT(mol1->getNumAtoms() == 9);
2198     delete mol1;
2199 
2200     // make sure getItemText doesn't screw up next()
2201     molB = tdtsup.getItemText(5);
2202     mol1 = tdtsup.next();
2203     TEST_ASSERT(mol1);
2204     TEST_ASSERT(mol1->getNumAtoms() == 9);
2205     smiles = MolToSmiles(*mol1, 1);
2206     TEST_ASSERT(smiles == "Cc1n[nH]c(=O)nc1N");
2207     delete mol1;
2208 
2209     // make sure getItemText() works on the last molecule
2210     // (this was sf.net issue 1874882
2211     molB = tdtsup.getItemText(9);
2212     TEST_ASSERT(molB != "");
2213     TEST_ASSERT(molB.substr(0, 12) == "$SMI<Cc1n[nH");
2214   }
2215 
2216   {
2217     fname = rdbase + "/Code/GraphMol/FileParsers/test_data/acd_few.tdt";
2218     TDTMolSupplier tdtsup(fname);
2219     TEST_ASSERT(tdtsup.length() == 10);
2220 
2221     ROMol *mol = tdtsup[9];
2222     TEST_ASSERT(mol);
2223     delete mol;
2224     TEST_ASSERT(tdtsup.atEnd());
2225 
2226     // (this was sf.net issue 3299878
2227     molB = tdtsup.getItemText(9);
2228     TEST_ASSERT(molB != "");
2229     TEST_ASSERT(molB.substr(0, 12) == "$SMI<Cc1n[nH");
2230     TEST_ASSERT(tdtsup.atEnd());
2231   }
2232 }
2233 
testForwardSDSupplier()2234 int testForwardSDSupplier() {
2235   std::string rdbase = getenv("RDBASE");
2236   std::string fname =
2237       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2238   std::string fname2 =
2239       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf.gz";
2240 
2241   {
2242     std::ifstream strm(fname.c_str());
2243     ForwardSDMolSupplier sdsup(&strm, false);
2244     unsigned int i = 0;
2245     while (!sdsup.atEnd()) {
2246       ROMol *nmol = sdsup.next();
2247       TEST_ASSERT(nmol || sdsup.atEnd());
2248       if (nmol) {
2249         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
2250         TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
2251         delete nmol;
2252         i++;
2253       }
2254     }
2255     TEST_ASSERT(i == 16);
2256   }
2257 #ifdef RDK_USE_BOOST_IOSTREAMS
2258   // make sure the boost::iostreams are working
2259   {
2260     io::filtering_istream strm;
2261     strm.push(io::file_source(fname));
2262 
2263     unsigned int i = 0;
2264     while (!strm.eof()) {
2265       std::string line;
2266       std::getline(strm, line);
2267       if (!strm.eof()) {
2268         ++i;
2269       }
2270       if (i > 1000) {
2271         break;
2272       }
2273     }
2274     TEST_ASSERT(i == 998);
2275   }
2276   {
2277     gzstream strm(fname2);
2278     unsigned int i = 0;
2279     while (!strm.eof()) {
2280       std::string line;
2281       std::getline(strm, line);
2282       if (!strm.eof()) {
2283         ++i;
2284       }
2285       if (i > 1000) {
2286         break;
2287       }
2288     }
2289     TEST_ASSERT(i == 997);
2290   }
2291   // looks good, now do a supplier:
2292   {
2293     gzstream strm(fname2);
2294 
2295     ForwardSDMolSupplier sdsup(&strm, false);
2296     unsigned int i = 0;
2297     while (!sdsup.atEnd()) {
2298       ROMol *nmol = sdsup.next();
2299       if (nmol) {
2300         TEST_ASSERT(nmol->hasProp(common_properties::_Name));
2301         TEST_ASSERT(nmol->hasProp("NCI_AIDS_Antiviral_Screen_Conclusion"));
2302         delete nmol;
2303         i++;
2304       }
2305     }
2306     TEST_ASSERT(i == 16);
2307   }
2308 #endif
2309 
2310 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
2311   // Now test that Maestro parsing of gz files works
2312   std::string maefname =
2313       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.mae";
2314   std::string maefname2 =
2315       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.maegz";
2316   {
2317     io::filtering_istream strm;
2318     strm.push(io::file_source(maefname));
2319 
2320     unsigned int i = 0;
2321     while (!strm.eof()) {
2322       std::string line;
2323       std::getline(strm, line);
2324       if (!strm.eof()) {
2325         ++i;
2326       }
2327       if (i > 1700) {
2328         break;
2329       }
2330     }
2331     TEST_ASSERT(i == 1663);
2332   }
2333   {
2334     gzstream strm(maefname2);
2335 
2336     unsigned int i = 0;
2337     while (!strm.eof()) {
2338       std::string line;
2339       std::getline(strm, line);
2340       if (!strm.eof()) {
2341         ++i;
2342       }
2343       if (i > 1700) {
2344         break;
2345       }
2346     }
2347     TEST_ASSERT(i == 1663);
2348   }
2349   // looks good, now do a supplier:
2350   {
2351     auto *strm = new gzstream(maefname2);
2352 
2353     MaeMolSupplier maesup(strm);
2354     unsigned int i = 0;
2355     std::shared_ptr<ROMol> nmol;
2356     while (!maesup.atEnd()) {
2357       nmol.reset(maesup.next());
2358       if (nmol != nullptr) {
2359         i++;
2360       }
2361     }
2362     TEST_ASSERT(i == 16);
2363   }
2364 #endif  // RDK_BUILD_MAEPARSER_SUPPORT
2365 
2366   return 1;
2367 }
2368 
testMissingCRSDSupplier()2369 void testMissingCRSDSupplier() {
2370   std::string rdbase = getenv("RDBASE");
2371   std::string infile =
2372       rdbase + "/Code/GraphMol/FileParsers/test_data/missingCR.sdf";
2373   SDMolSupplier reader(infile);
2374   auto *mol = reader.next();
2375   delete mol;
2376   TEST_ASSERT(reader.atEnd());
2377 }
2378 
testIssue3482695()2379 void testIssue3482695() {
2380   std::string rdbase = getenv("RDBASE");
2381   std::string infile =
2382       rdbase + "/Code/GraphMol/FileParsers/test_data/Issue3482695.sdf";
2383   SDMolSupplier reader(infile);
2384   ROMol *nmol = reader.next();
2385   TEST_ASSERT(nmol);
2386   TEST_ASSERT(nmol->getNumAtoms() == 0);
2387   TEST_ASSERT(nmol->hasProp("test"));
2388   delete nmol;
2389 }
2390 
testIssue3525673()2391 void testIssue3525673() {
2392   std::string rdbase = getenv("RDBASE");
2393   std::string infile =
2394       rdbase + "/Code/GraphMol/FileParsers/test_data/Issue3525673.sdf";
2395   std::ifstream ins(infile.c_str());
2396   ForwardSDMolSupplier reader(&ins, false);
2397   ROMol *nmol;
2398 
2399   nmol = reader.next();
2400   TEST_ASSERT(nmol);
2401   delete nmol;
2402 
2403   nmol = reader.next();
2404   TEST_ASSERT(nmol);
2405   TEST_ASSERT(nmol->getNumAtoms() == 37);
2406   delete nmol;
2407 
2408   nmol = reader.next();
2409   TEST_ASSERT(nmol);
2410   delete nmol;
2411 
2412   nmol = reader.next();
2413   TEST_ASSERT(nmol);
2414   delete nmol;
2415 
2416   nmol = reader.next();
2417   TEST_ASSERT(nmol);
2418   TEST_ASSERT(nmol->getNumAtoms() == 58);
2419   delete nmol;
2420 
2421   nmol = reader.next();
2422   TEST_ASSERT(nmol);
2423   delete nmol;
2424 
2425   nmol = reader.next();
2426   TEST_ASSERT(!nmol);  // broken due to 'foo' in counts line!
2427 
2428   nmol = reader.next();
2429   TEST_ASSERT(nmol);
2430   TEST_ASSERT(nmol->getNumAtoms() == 58);
2431   delete nmol;
2432 
2433   nmol = reader.next();
2434   TEST_ASSERT(nmol);
2435   delete nmol;
2436 }
2437 
testBlankLinesInProps()2438 void testBlankLinesInProps() {
2439   std::string rdbase = getenv("RDBASE");
2440   std::string infile =
2441       rdbase + "/Code/GraphMol/FileParsers/test_data/BlankPropLines.sdf";
2442   std::ifstream ins(infile.c_str());
2443   ForwardSDMolSupplier reader(&ins, false);
2444   ROMol *nmol;
2445   std::string pval;
2446 
2447   nmol = reader.next();
2448   TEST_ASSERT(nmol);
2449   TEST_ASSERT(nmol->getNumAtoms() == 19);
2450   TEST_ASSERT(nmol->hasProp("MultiLineProperty1"));
2451   nmol->getProp("MultiLineProperty1", pval);
2452   TEST_ASSERT(pval == "foo\nbar\n \nbaz");
2453   TEST_ASSERT(nmol->hasProp("MultiLineProperty2"));
2454   TEST_ASSERT(!(nmol->hasProp("fooprop")));
2455   nmol->getProp("MultiLineProperty2", pval);
2456   TEST_ASSERT(pval == "foo\n>  <fooprop>\nbaz\n ");
2457   delete nmol;
2458 }
2459 
testSkipLines()2460 void testSkipLines() {
2461   std::string rdbase = getenv("RDBASE");
2462   std::string infile =
2463       rdbase + "/Code/GraphMol/FileParsers/test_data/SkipLines.sdf";
2464   std::ifstream ins(infile.c_str());
2465   ForwardSDMolSupplier reader(&ins, false);
2466   ROMol *nmol;
2467   std::string pval;
2468 
2469   nmol = reader.next();
2470   TEST_ASSERT(nmol);
2471   TEST_ASSERT(nmol->getNumAtoms() == 1);
2472   TEST_ASSERT(nmol->hasProp("prop1"));
2473   delete nmol;
2474 }
2475 
testGitHub23()2476 void testGitHub23() {
2477   std::string rdbase = getenv("RDBASE");
2478   std::string ofile =
2479       rdbase + "/Code/GraphMol/FileParsers/test_data/blah_molsupplier.sdf";
2480   auto *writer = new SDWriter(ofile);
2481 
2482   ROMol *mol = SmilesToMol("CCCC");
2483   INT_VECT iv;
2484   iv.push_back(1);
2485   iv.push_back(2);
2486   mol->setProp("pval", iv);
2487   writer->write(*mol);
2488   delete mol;
2489 
2490   writer->close();
2491   delete writer;
2492 }
2493 
testGitHub88()2494 void testGitHub88() {
2495   std::string rdbase = getenv("RDBASE");
2496   std::string infile =
2497       rdbase + "/Code/GraphMol/FileParsers/test_data/github88.v3k.sdf";
2498   std::ifstream ins(infile.c_str());
2499   ForwardSDMolSupplier reader(&ins, false);
2500   ROMol *nmol;
2501 
2502   nmol = reader.next();
2503   TEST_ASSERT(nmol);
2504   TEST_ASSERT(nmol->getNumAtoms() == 8);
2505   TEST_ASSERT(nmol->hasProp("prop1"));
2506   std::string pval;
2507   nmol->getProp("prop1", pval);
2508   TEST_ASSERT(pval == "4");
2509   delete nmol;
2510 }
2511 
testGitHub2285()2512 void testGitHub2285() {
2513   std::string rdbase = getenv("RDBASE");
2514   std::string fname =
2515       rdbase + "/Code/GraphMol/FileParsers/test_data/github2285.sdf";
2516 
2517   std::vector<std::string> smiles;
2518   {
2519     SDMolSupplier sdsup(fname);
2520     while (!sdsup.atEnd()) {
2521       ROMol *nmol = sdsup.next();
2522       TEST_ASSERT(nmol);
2523       smiles.push_back(MolToSmiles(*nmol));
2524       delete nmol;
2525     }
2526   }
2527   {
2528     SDMolSupplier sdsup(fname, true, false);
2529     int i = 0;
2530     while (!sdsup.atEnd()) {
2531       ROMol *nmol = sdsup.next();
2532       TEST_ASSERT(nmol);
2533       ROMol *m = MolOps::removeHs(*nmol);
2534       TEST_ASSERT(MolToSmiles(*m) == smiles[i++]);
2535       delete nmol;
2536       delete m;
2537     }
2538     TEST_ASSERT(i > 0);
2539   }
2540 }
2541 
testGitHub2479()2542 void testGitHub2479() {
2543   std::string smiles1 = R"DATA(smiles id
2544 c1ccccc duff
2545 c1ccccc1 ok
2546 C(C garbage
2547 C1CC1 ok2
2548 CC(C)(C)(C)C duff2
2549 )DATA";
2550   {
2551     SmilesMolSupplier suppl;
2552     suppl.setData(smiles1);
2553     unsigned int cnt = 0;
2554     while (!suppl.atEnd()) {
2555       std::unique_ptr<ROMol> mol(suppl.next());
2556       if (cnt % 2) {
2557         TEST_ASSERT(mol);
2558       }
2559       ++cnt;
2560     }
2561     TEST_ASSERT(cnt == 5);
2562   }
2563 
2564   std::string sdf1 = R"SDF(
2565   Mrv1810 06051911332D
2566 
2567   3  2  0  0  0  0            999 V2000
2568   -13.3985    4.9850    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2569   -12.7066    5.4343    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
2570   -12.0654    4.9151    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2571   1  2  1  0  0  0  0
2572   2  3  1  0  0  0  0
2573 M  END
2574 $$$$
2575 
2576   Mrv1810 06051911332D
2577 
2578   3  2  0  0  0  0            999 V2000
2579   -10.3083    4.8496    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2580    -9.6408    5.3345    0.0000 F   0  0  0  0  0  0  0  0  0  0  0  0
2581    -9.0277    4.7825    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2582   1  2  1  0  0  0  0
2583   2  3  1  0  0  0  0
2584 M  END
2585 $$$$
2586 )SDF";
2587   {
2588     std::stringstream iss(sdf1);
2589     SDMolSupplier suppl(&iss, false);
2590     std::unique_ptr<ROMol> mol1(suppl.next());
2591     TEST_ASSERT(mol1);
2592     std::unique_ptr<ROMol> mol2(suppl.next());
2593     TEST_ASSERT(!mol2);
2594     TEST_ASSERT(suppl.atEnd());
2595   }
2596   {
2597     std::stringstream iss(sdf1);
2598     ForwardSDMolSupplier suppl(&iss, false);
2599     std::unique_ptr<ROMol> mol1(suppl.next());
2600     TEST_ASSERT(mol1);
2601     std::unique_ptr<ROMol> mol2(suppl.next());
2602     TEST_ASSERT(!mol2);
2603     TEST_ASSERT(!suppl.atEnd());
2604     TEST_ASSERT(!suppl.getEOFHitOnRead());
2605     std::unique_ptr<ROMol> mol3(suppl.next());
2606     TEST_ASSERT(!mol3);
2607     TEST_ASSERT(suppl.atEnd());
2608     TEST_ASSERT(suppl.getEOFHitOnRead());
2609   }
2610 
2611   // truncated file1
2612   std::string sdf2 = R"SDF(
2613   Mrv1810 06051911332D
2614 
2615   3  2  0  0  0  0            999 V2000
2616   -13.3985    4.9850    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2617   -12.7066    5.4343    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
2618   -12.0654    4.9151    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2619   1  2  1  0  0  0  0
2620   2  3  1  0  0  0  0
2621 M  END
2622 $$$$
2623 
2624   Mrv1810 06051911332D
2625 
2626   3  2  0  0  0  0            999 V2000
2627   -10.3083    4.8496    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2628    -9.6408    5.3345    0.0000 F   0  0  0  0  0  0  0  0  0  0  0  0
2629    -9.0277    4.7825    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2630   1  2  1  0  0  0  0
2631   2  3  1  0  0  0  0
2632 M  END
2633 $$$$
2634 
2635   Mrv1810 06051911332D
2636 
2637   3  2  0  0  0  0            999 V2000
2638   -10.3083    4.8496    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2639    -9.6
2640 )SDF";
2641   {
2642     std::stringstream iss(sdf2);
2643     SDMolSupplier suppl(&iss, false);
2644     std::unique_ptr<ROMol> mol1(suppl.next());
2645     TEST_ASSERT(mol1);
2646     std::unique_ptr<ROMol> mol2(suppl.next());
2647     TEST_ASSERT(!mol2);
2648     std::unique_ptr<ROMol> mol3(suppl.next());
2649     TEST_ASSERT(!mol3);
2650     TEST_ASSERT(suppl.atEnd());
2651   }
2652   {
2653     std::stringstream iss(sdf2);
2654     ForwardSDMolSupplier suppl(&iss, false);
2655     std::unique_ptr<ROMol> mol1(suppl.next());
2656     TEST_ASSERT(mol1);
2657     std::unique_ptr<ROMol> mol2(suppl.next());
2658     TEST_ASSERT(!mol2);
2659     TEST_ASSERT(!suppl.atEnd());
2660     TEST_ASSERT(!suppl.getEOFHitOnRead());
2661     std::unique_ptr<ROMol> mol3(suppl.next());
2662     TEST_ASSERT(!mol3);
2663     TEST_ASSERT(suppl.atEnd());
2664     TEST_ASSERT(!suppl.getEOFHitOnRead());
2665   }
2666   // truncated file2
2667   std::string sdf3 = R"SDF(
2668   Mrv1810 06051911332D
2669 
2670   3  2  0  0  0  0            999 V2000
2671   -13.3985    4.9850    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2672   -12.7066    5.4343    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
2673   -12.0654    4.9151    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2674   1  2  1  0  0  0  0
2675   2  3  1  0  0  0  0
2676 M  END
2677 >  <pval>  (1)
2678 [1,2,]
2679 
2680 $$$$
2681 
2682   Mrv1810 06051911332D
2683 
2684   3  2  0  0  0  0            999 V2000
2685   -10.3083    4.8496    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2686    -9.6408    5.3345    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
2687    -9.0277    4.7825    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
2688   1  2  1  0  0  0  0
2689   2  3  1  0  0  0  0
2690 M  END
2691 >  <pval>  (1)
2692 [1,2,]
2693 )SDF";
2694   {
2695     std::stringstream iss(sdf3);
2696     SDMolSupplier suppl(&iss, false);
2697     std::unique_ptr<ROMol> mol1(suppl.next());
2698     TEST_ASSERT(mol1);
2699     std::unique_ptr<ROMol> mol2(suppl.next());
2700     TEST_ASSERT(mol2);
2701     TEST_ASSERT(suppl.atEnd());
2702   }
2703   {
2704     std::stringstream iss(sdf3);
2705     ForwardSDMolSupplier suppl(&iss, false);
2706     std::unique_ptr<ROMol> mol1(suppl.next());
2707     TEST_ASSERT(mol1);
2708     std::unique_ptr<ROMol> mol2(suppl.next());
2709     TEST_ASSERT(mol2);
2710     TEST_ASSERT(suppl.atEnd());
2711   }
2712 }
2713 
2714 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
testGitHub2881()2715 void testGitHub2881() {
2716   std::string data = R"DATA(f_m_ct {
2717  s_m_title
2718  s_m_entry_id
2719  s_m_entry_name
2720  s_m_Source_Path
2721  s_m_Source_File
2722  i_m_Source_File_Index
2723  s_st_Chirality_1
2724  s_st_Chirality_2
2725  s_m_subgroup_title
2726  s_m_subgroupid
2727  b_m_subgroup_collapsed
2728  i_m_ct_format
2729  :::
2730  "Untitled Document-4"
2731   17
2732   newTemplates2.1
2733   /Users/nicola/schrodinger/coordgen_standalone
2734   templates.mae
2735   17
2736   3_S_4_6_2
2737   7_S_8_9_6_10
2738   templates->templates->templates
2739   templates->templates1->templates11
2740   0
2741   2
2742  m_depend[2] {
2743   # First column is dependency index #
2744   i_m_depend_dependency
2745   s_m_depend_property
2746   :::
2747   1 10 s_st_Chirality_1
2748   2 10 s_st_Chirality_2
2749   :::
2750  }
2751  m_atom[15] {
2752   # First column is atom index #
2753   i_m_mmod_type
2754   r_m_x_coord
2755   r_m_y_coord
2756   r_m_z_coord
2757   i_m_residue_number
2758   i_m_color
2759   i_m_atomic_number
2760   s_m_color_rgb
2761   s_m_atom_name
2762   :::
2763   1 5 1.186400 1.035900 0.000000 900 2 6 A0A0A0  C1
2764   2 5 0.370300 1.157000 0.000000 900 2 6 A0A0A0  C2
2765   3 4 -0.326500 0.715300 0.000000 900 2 6 A0A0A0  C3
2766   4 5 0.085100 0.000400 0.000000 900 2 6 A0A0A0  C4
2767   5 26 -0.328300 -0.713600 0.000000 900 43 7 5757FF  N5
2768   6 5 -1.151500 0.716400 0.000000 900 2 6 A0A0A0  C6
2769   7 5 -1.564900 0.002400 0.000000 900 2 6 A0A0A0  C7
2770   8 5 -1.153300 -0.712600 0.000000 900 2 6 A0A0A0  C9
2771   9 2 1.724800 0.410800 0.000000 900 2 6 A0A0A0  C12
2772   10 2 1.723800 -0.414200 0.000000 900 2 6 A0A0A0  C13
2773   11 5 1.183800 -1.037900 0.000000 900 2 6 A0A0A0  C14
2774   12 5 0.367400 -1.157000 0.000000 900 2 6 A0A0A0  C15
2775   13 7 2.508100 -0.670100 0.000000 900 2 6 A0A0A0  C16
2776   14 7 2.993800 -0.003300 0.000000 900 2 6 A0A0A0  C17
2777   15 29 2.509700 0.664800 0.000000 900 43 7 5757FF  N18
2778   :::
2779  }
2780  m_bond[17] {
2781   # First column is bond index #
2782   i_m_from
2783   i_m_to
2784   i_m_order
2785   :::
2786   1 1 2 1
2787   2 1 9 1
2788   3 2 3 1
2789   4 3 4 1
2790   5 3 6 1
2791   6 4 5 1
2792   7 5 8 1
2793   8 5 12 1
2794   9 6 7 1
2795   10 7 8 1
2796   11 9 10 2
2797   12 9 15 1
2798   13 10 11 1
2799   14 10 13 1
2800   15 11 12 1
2801   16 13 14 2
2802   17 14 15 1
2803   :::
2804  }
2805 }
2806 )DATA";
2807   {
2808     auto *iss = new std::istringstream(data);
2809     bool sanitize = false;
2810     bool takeOwnership = true;
2811     MaeMolSupplier suppl(iss, takeOwnership, sanitize);
2812     ROMol *mol = nullptr;
2813     try {
2814       mol = suppl.next();
2815     } catch (const Invar::Invariant &) {
2816     }
2817     TEST_ASSERT(!mol);
2818   }
2819 }
2820 #else
testGitHub2881()2821 void testGitHub2881() {}
2822 #endif
2823 
testGitHub3517()2824 void testGitHub3517() {
2825   std::string rdbase = getenv("RDBASE");
2826   std::string fname =
2827       rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
2828 
2829   SDMolSupplier sdsup(fname);
2830   TEST_ASSERT(!sdsup.atEnd());
2831   size_t l = sdsup.length();
2832   TEST_ASSERT(l > 0);
2833   TEST_ASSERT(!sdsup.atEnd());
2834 }
2835 
main()2836 int main() {
2837   RDLog::InitLogs();
2838 
2839 #if 1
2840   BOOST_LOG(rdErrorLog) << "\n-----------------------------------------\n";
2841   testMolSup();
2842   BOOST_LOG(rdErrorLog) << "Finished: testMolSup()\n";
2843   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2844 
2845   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2846   testRandMolSup();
2847   BOOST_LOG(rdErrorLog) << "Finished: testRandMolSup()\n";
2848   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2849 
2850   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2851   testSmilesSup();
2852   BOOST_LOG(rdErrorLog) << "Finished: testSmilesSup()\n";
2853   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2854 
2855   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2856   testSmilesSupFromText();
2857   BOOST_LOG(rdErrorLog) << "Finished: testSmilesSupFromText()\n";
2858   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2859 
2860   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2861   testSmilesWriter();
2862   BOOST_LOG(rdErrorLog) << "Finished: testSmilesWriter()\n";
2863   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2864 
2865   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2866   testSDWriter();
2867   BOOST_LOG(rdErrorLog) << "Finished: testSDWriter()\n";
2868   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2869 
2870   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2871   testSDSupplierEnding();
2872   BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierEnding()\n";
2873   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2874 
2875   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2876   testSuppliersEmptyFile();
2877   BOOST_LOG(rdErrorLog) << "Finished: testSuppliersEmptyFile()\n";
2878   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2879 
2880   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2881   testCisTrans();
2882   BOOST_LOG(rdErrorLog) << "Finished: testCisTrans()\n";
2883   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2884 
2885   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2886   testStereoRound();
2887   BOOST_LOG(rdErrorLog) << "Finished: testStereoRound()\n";
2888   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2889 
2890   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2891   testIssue226();
2892   BOOST_LOG(rdErrorLog) << "Finished: testIssue226()\n";
2893   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2894 
2895   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2896   testTDTSupplier1();
2897   BOOST_LOG(rdErrorLog) << "Finished: testTDTSupplier1()\n";
2898   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2899 
2900   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2901   testTDTSupplier2();
2902   BOOST_LOG(rdErrorLog) << "Finished: testTDTSupplier2()\n";
2903   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2904 
2905   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2906   testTDTSupplier3();
2907   BOOST_LOG(rdErrorLog) << "Finished: testTDTSupplier3()\n";
2908   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2909 
2910   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2911   testSDSupplierFromText();
2912   BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierFromText()\n";
2913   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2914 
2915   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2916   testSDSupplierStrLax1();
2917   BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierStrLax1()\n";
2918   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2919 
2920   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2921   testSDSupplierStrLax2();
2922   BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierStrLax2()\n";
2923   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2924 
2925   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2926   testSDSupplierFromTextStrLax1();
2927   BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierFromTextStrLax1()\n";
2928   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2929 
2930   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2931   testSDSupplierFromTextStrLax2();
2932   BOOST_LOG(rdErrorLog) << "Finished: testSDSupplierFromTextStrLax2()\n";
2933   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2934 
2935   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2936   testIssue265();
2937   BOOST_LOG(rdErrorLog) << "Finished: testIssue265()\n";
2938   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2939 
2940   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2941   testSDErrorHandling();
2942   BOOST_LOG(rdErrorLog) << "Finished: testSDErrorHandling()\n";
2943   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2944 
2945   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2946   testIssue381();
2947   BOOST_LOG(rdErrorLog) << "Finished: testIssue381()\n";
2948   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2949 
2950   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2951   testSetStreamIndices();
2952   BOOST_LOG(rdErrorLog) << "Finished: testSetStreamIndices()\n";
2953   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2954 
2955   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2956   testMixIterAndRandom();
2957   BOOST_LOG(rdErrorLog) << "Finished: testMixIterAndRandom()\n";
2958   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2959 
2960   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2961   testRemoveHs();
2962   BOOST_LOG(rdErrorLog) << "Finished: testRemoveHs()\n";
2963   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2964 
2965   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2966   testGetItemText();
2967   BOOST_LOG(rdErrorLog) << "Finished: testGetItemText()\n";
2968   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2969 
2970   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2971   testForwardSDSupplier();
2972   BOOST_LOG(rdErrorLog) << "Finished: testForwardSDSupplier()\n";
2973   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2974 
2975   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2976   testMissingCRSDSupplier();
2977   BOOST_LOG(rdErrorLog) << "Finished: testMissingCRSDSupplier()\n";
2978   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2979 
2980   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2981   testIssue3482695();
2982   BOOST_LOG(rdErrorLog) << "Finished: testIssue3482695()\n";
2983   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2984   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2985   testIssue3525673();
2986   BOOST_LOG(rdErrorLog) << "Finished: testIssue3525673()\n";
2987   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2988 
2989   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2990   testBlankLinesInProps();
2991   BOOST_LOG(rdErrorLog) << "Finished: testBlankLinesInProps()\n";
2992   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2993 
2994   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
2995   testSkipLines();
2996   BOOST_LOG(rdErrorLog) << "Finished: testSkipLines()\n";
2997   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
2998 
2999   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3000   testGitHub23();
3001   BOOST_LOG(rdErrorLog) << "Finished: testGitHub23()\n";
3002   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3003 
3004   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3005   testGitHub88();
3006   BOOST_LOG(rdErrorLog) << "Finished: testGitHub88()\n";
3007   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3008 
3009   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3010   testGitHub2285();
3011   BOOST_LOG(rdErrorLog) << "Finished: testGitHub2285()\n";
3012   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3013 #endif
3014 
3015   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3016   testGitHub2479();
3017   BOOST_LOG(rdErrorLog) << "Finished: testGitHub2479()\n";
3018   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3019 
3020   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3021   testGitHub2881();
3022   BOOST_LOG(rdErrorLog) << "Finished: testGitHub2881()\n";
3023   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3024 
3025   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n";
3026   testGitHub3517();
3027   BOOST_LOG(rdErrorLog) << "Finished: testGitHub3517()\n";
3028   BOOST_LOG(rdErrorLog) << "-----------------------------------------\n\n";
3029 
3030   return 0;
3031 }
3032