1import os, sys, unittest, doctest 2import gzip 3from rdkit import RDConfig, rdBase 4from rdkit import Chem 5from rdkit import __version__ 6import sys 7 8class TestCase(unittest.TestCase): 9 def testMultiSmiMolSupplier(self): 10 fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 11 'FileParsers', 'test_data', 'first_200.tpsa.csv') 12 # fileN = "../FileParsers/test_data/first_200.tpsa.csv" 13 smiSup = Chem.MultithreadedSmilesMolSupplier(fileN, ",", 0, - 1) 14 i = 0 15 while not smiSup.atEnd(): 16 mol = next(smiSup) 17 if(mol): 18 i += 1 19 self.assertTrue(i == 200) 20 fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 21 'FileParsers', 'test_data', 'fewSmi.csv') 22 # fileN = "../FileParsers/test_data/fewSmi.csv" 23 smiSup = Chem.MultithreadedSmilesMolSupplier( 24 fileN, delimiter=",", smilesColumn=1, nameColumn=0, titleLine=0) 25 names = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] 26 props = ["34.14", "25.78", "106.51", "82.78", "60.16", 27 "87.74", "37.38", "77.28", "65.18", "0.00"] 28 confusedNames = [] 29 confusedProps = [] 30 i = 0 31 for mol in smiSup: 32 if mol is not None: 33 self.assertTrue(mol.HasProp("_Name")) 34 self.assertTrue(mol.HasProp("Column_2")) 35 prop = mol.GetProp("Column_2") 36 name = mol.GetProp("_Name") 37 confusedProps.append(prop) 38 confusedNames.append(name) 39 i += 1 40 self.assertTrue(i == 10) 41 self.assertTrue(sorted(confusedNames) == sorted(names)) 42 self.assertTrue(sorted(confusedProps) == sorted(props)) 43 44 # context manager 45 confusedNames = [] 46 confusedProps = [] 47 i = 0 48 with Chem.MultithreadedSmilesMolSupplier(fileN,delimiter=",", smilesColumn=1, 49 nameColumn=0, titleLine=0) as smiSup: 50 for mol in smiSup: 51 if mol is not None: 52 self.assertTrue(mol.HasProp("_Name")) 53 self.assertTrue(mol.HasProp("Column_2")) 54 prop = mol.GetProp("Column_2") 55 name = mol.GetProp("_Name") 56 confusedProps.append(prop) 57 confusedNames.append(name) 58 i += 1 59 self.assertTrue(i == 10) 60 self.assertTrue(sorted(confusedNames) == sorted(names)) 61 self.assertTrue(sorted(confusedProps) == sorted(props)) 62 63 64 def testMultiSDMolSupplier(self): 65 fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 66 'FileParsers', 'test_data', 'NCI_aids_few.sdf') 67 # fileN = "../FileParsers/test_data/NCI_aids_few.sdf" 68 sdSup = Chem.MultithreadedSDMolSupplier(fileN) 69 molNames = ["48", "78", "128", "163", "164", "170", "180", "186", 70 "192", "203", "210", "211", "213", "220", "229", "256"] 71 confusedMolNames = [] 72 i = 0 73 for mol in sdSup: 74 if mol is not None: 75 confusedMolNames.append(mol.GetProp("_Name")) 76 i += 1 77 self.assertTrue(len(molNames) == i) 78 self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) 79 80 # context manager 81 confusedMolNames = [] 82 i = 0 83 with Chem.MultithreadedSDMolSupplier(fileN) as sdSup: 84 for mol in sdSup: 85 if mol is not None: 86 confusedMolNames.append(mol.GetProp("_Name")) 87 i += 1 88 self.assertTrue(len(molNames) == i) 89 self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) 90 91 92 93 94 95 # NOTE these are disabled until we rewrite the code to construct a 96 # MultithreadedSDMolSupplier from a python stream 97 @unittest.skip("Skipping construction from stream") 98 def testMultiSDMolSupplierFromStream(self): 99 fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 100 'FileParsers', 'test_data', 'NCI_aids_few.sdf') 101 molNames = ["48", "78", "128", "163", "164", "170", "180", "186", 102 "192", "203", "210", "211", "213", "220", "229", "256"] 103 # try opening with streambuf 104 inf = open(fileN,'rb') 105 if(inf): 106 gSup = Chem.SDMolSupplierFromStream(inf) 107 confusedMolNames = [] 108 i = 0 109 for mol in gSup: 110 # print("!!",i,file=sys.stderr);sys.stderr.flush() 111 if(mol): 112 confusedMolNames.append(mol.GetProp("_Name")) 113 i += 1 114 self.assertTrue(len(molNames) == i) 115 self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) 116 # print("done!",file=sys.stderr);sys.stderr.flush() 117 # try opening with streambuf 118 fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'FileParsers', 'test_data', 119 'NCI_aids_few.sdf.gz') 120 # try opening with gzip 121 inf = gzip.open(fileN) 122 if(inf): 123 gSup = Chem.SDMolSupplierFromStream(inf) 124 confusedMolNames = [] 125 i = 0 126 for mol in gSup: 127 # print("!",i,file=sys.stderr);sys.stderr.flush() 128 if(mol): 129 confusedMolNames.append(mol.GetProp("_Name")) 130 i += 1 131 self.assertTrue(len(molNames) == i) 132 self.assertTrue(sorted(confusedMolNames) == sorted(molNames)) 133 134 135 136 137if __name__ == '__main__': 138 print("Testing Smiles and SD MultithreadedMolSupplier") 139 unittest.main() 140