1#/usr/bin/env python 2""" 3test_dataset_info.py 4 5Test that to_matrix() and to_matrix_with_info() return the correct types. 6 7mlpack is free software; you may redistribute it and/or modify it under the 8terms of the 3-clause BSD license. You should have received a copy of the 93-clause BSD license along with mlpack. If not, see 10http://www.opensource.org/licenses/BSD-3-Clause for more information. 11""" 12import unittest 13import pandas as pd 14import numpy as np 15 16from mlpack.matrix_utils import to_matrix 17from mlpack.matrix_utils import to_matrix_with_info 18 19class TestToMatrix(unittest.TestCase): 20 """ 21 This class defines tests for the to_matrix() and to_matrix_with_info() utility 22 functions. 23 """ 24 25 def testPandasToMatrix(self): 26 """ 27 Test that a simple pandas numeric matrix can be turned into a numpy ndarray. 28 """ 29 d = pd.DataFrame(np.random.randn(100, 4), columns=list('abcd')) 30 31 m, _ = to_matrix(d) 32 33 self.assertTrue(isinstance(m, np.ndarray)) 34 self.assertEqual(m.shape[0], 100) 35 self.assertEqual(m.shape[1], 4) 36 self.assertEqual(m.dtype, np.dtype(np.double)) 37 colnames = list('abcd') 38 for i in range(m.shape[1]): 39 for j in range(m.shape[0]): 40 self.assertEqual(m[j, i], d[colnames[i]][j]) 41 42 def testPandasIntToMatrix(self): 43 """ 44 Test that a matrix holding ints is properly turned into a double matrix. 45 """ 46 d = pd.DataFrame({'a': range(5)}) 47 48 m, _ = to_matrix(d) 49 50 self.assertTrue(isinstance(m, np.ndarray)) 51 self.assertEqual(m.shape[0], 5) 52 self.assertEqual(m.shape[1], 1) 53 for i in range(5): 54 self.assertEqual(m[i], i) 55 56 def testPandasMixedToMatrix(self): 57 """ 58 Test that a matrix with one int and one double feature are transformed 59 correctly. 60 """ 61 d = pd.DataFrame({'a': range(50)}) 62 d['b'] = np.random.randn(50, 1) 63 self.assertTrue((d['a'].dtype == np.dtype('int32')) or 64 (d['a'].dtype == np.dtype('int64'))) 65 self.assertEqual(d['b'].dtype, np.dtype(np.double)) 66 67 m, _ = to_matrix(d) 68 69 self.assertTrue(isinstance(m, np.ndarray)) 70 self.assertEqual(m.dtype, np.dtype(np.double)) 71 self.assertEqual(m.shape[0], 50) 72 self.assertEqual(m.shape[1], 2) 73 colNames = list('ab') 74 for i in range(2): 75 for j in range(50): 76 self.assertEqual(d[colNames[i]][j], m[j, i]) 77 78 def testArraylikeToMatrix(self): 79 """ 80 Test that if we pass some array, we get back the right thing. This array 81 will be filled with doubles only. 82 """ 83 a = [[0.01, 0.02, 0.03], 84 [0.04, 0.05, 0.06], 85 [0.07, 0.08, 0.09], 86 [0.10, 0.11, 0.12]] 87 88 m, _ = to_matrix(a) 89 90 self.assertTrue(isinstance(m, np.ndarray)) 91 self.assertEqual(m.dtype, np.dtype(np.double)) 92 self.assertEqual(m.shape[0], 4) 93 self.assertEqual(m.shape[1], 3) 94 95 for i in range(4): 96 for j in range(3): 97 self.assertEqual(a[i][j], m[i, j]) 98 99 def testMultitypeArraylikeToMatrix(self): 100 """ 101 Test that if we pass an array with multiple types, we get back the right 102 thing. The numpy ndarray should be filled with doubles only. 103 """ 104 a = [[0.01, 0.02, 3], 105 [0.04, 0.05, 6], 106 [0.07, 0.08, 9], 107 [0.10, 0.11, 12]] 108 109 m, _ = to_matrix(a) 110 111 self.assertTrue(isinstance(m, np.ndarray)) 112 self.assertEqual(m.dtype, np.dtype(np.double)) 113 self.assertEqual(m.shape[0], 4) 114 self.assertEqual(m.shape[1], 3) 115 116 for i in range(4): 117 for j in range(3): 118 self.assertEqual(a[i][j], m[i, j]) 119 120 def testNumpyToMatrix(self): 121 """ 122 Make sure we can convert a numpy matrix without copying anything. 123 """ 124 m1 = np.random.randn(100, 5) 125 m2, _ = to_matrix(m1) 126 127 self.assertTrue(isinstance(m2, np.ndarray)) 128 self.assertEqual(m2.dtype, np.dtype(np.double)) 129 130 p1 = m1.__array_interface__ 131 p2 = m2.__array_interface__ 132 133 self.assertEqual(p1['data'], p2['data']) 134 135 def testPandasToMatrixNoCategorical(self): 136 """ 137 Make sure that if we pass a Pandas dataframe with no categorical features, 138 we get back the matrix we expect. 139 """ 140 141class TestToMatrixWithInfo(unittest.TestCase): 142 """ 143 This class defines tests for the to_matrix() and to_matrix_with_info() utility 144 functions. 145 """ 146 147 def testPandasToMatrix(self): 148 """ 149 Test that a simple pandas numeric matrix can be turned into a numpy ndarray. 150 """ 151 d = pd.DataFrame(np.random.randn(100, 4), columns=list('abcd')) 152 153 m, _, dims = to_matrix_with_info(d, np.double) 154 155 self.assertTrue(isinstance(m, np.ndarray)) 156 self.assertEqual(m.shape[0], 100) 157 self.assertEqual(m.shape[1], 4) 158 self.assertEqual(m.dtype, np.dtype(np.double)) 159 colnames = list('abcd') 160 for i in range(m.shape[1]): 161 for j in range(m.shape[0]): 162 self.assertEqual(m[j, i], d[colnames[i]][j]) 163 164 self.assertTrue(dims.shape[0], 4) 165 self.assertEqual(dims[0], 0) 166 self.assertEqual(dims[1], 0) 167 self.assertEqual(dims[2], 0) 168 self.assertEqual(dims[3], 0) 169 170 def testPandasIntToMatrix(self): 171 """ 172 Test that a matrix holding ints is properly turned into a double matrix. 173 """ 174 d = pd.DataFrame({'a': range(5)}) 175 176 m, _, dims = to_matrix_with_info(d, np.double) 177 178 self.assertTrue(isinstance(m, np.ndarray)) 179 self.assertEqual(m.shape[0], 5) 180 self.assertEqual(m.shape[1], 1) 181 for i in range(5): 182 self.assertEqual(m[i], i) 183 184 self.assertTrue(dims.shape[0], 1) 185 self.assertEqual(dims[0], 0) 186 187 def testPandasMixedToMatrix(self): 188 """ 189 Test that a matrix with one int and one double feature are transformed 190 correctly. 191 """ 192 d = pd.DataFrame({'a': range(50)}) 193 d['b'] = np.random.randn(50, 1) 194 self.assertTrue((d['a'].dtype == np.dtype('int32')) or 195 (d['a'].dtype == np.dtype('int64'))) 196 self.assertEqual(d['b'].dtype, np.dtype(np.double)) 197 198 m, _, dims = to_matrix_with_info(d, np.double) 199 200 self.assertTrue(isinstance(m, np.ndarray)) 201 self.assertEqual(m.dtype, np.dtype(np.double)) 202 self.assertEqual(m.shape[0], 50) 203 self.assertEqual(m.shape[1], 2) 204 colNames = list('ab') 205 for i in range(2): 206 for j in range(50): 207 self.assertEqual(d[colNames[i]][j], m[j, i]) 208 209 self.assertEqual(dims.shape[0], 2) 210 self.assertEqual(dims[0], 0) 211 self.assertEqual(dims[1], 0) 212 213 def testArraylikeToMatrix(self): 214 """ 215 Test that if we pass some array, we get back the right thing. This array 216 will be filled with doubles only. 217 """ 218 a = [[0.01, 0.02, 0.03], 219 [0.04, 0.05, 0.06], 220 [0.07, 0.08, 0.09], 221 [0.10, 0.11, 0.12]] 222 223 m, _, dims = to_matrix_with_info(a, np.double) 224 225 self.assertTrue(isinstance(m, np.ndarray)) 226 self.assertEqual(m.dtype, np.dtype(np.double)) 227 self.assertEqual(m.shape[0], 4) 228 self.assertEqual(m.shape[1], 3) 229 230 for i in range(4): 231 for j in range(3): 232 self.assertEqual(a[i][j], m[i, j]) 233 234 self.assertEqual(dims.shape[0], 3) 235 self.assertEqual(dims[0], 0) 236 self.assertEqual(dims[1], 0) 237 self.assertEqual(dims[2], 0) 238 239 def testMultitypeArraylikeToMatrix(self): 240 """ 241 Test that if we pass an array with multiple types, we get back the right 242 thing. The numpy ndarray should be filled with doubles only. 243 """ 244 a = [[0.01, 0.02, 3], 245 [0.04, 0.05, 6], 246 [0.07, 0.08, 9], 247 [0.10, 0.11, 12]] 248 249 m, _, dims = to_matrix_with_info(a, np.double) 250 251 self.assertTrue(isinstance(m, np.ndarray)) 252 self.assertEqual(m.dtype, np.dtype(np.double)) 253 self.assertEqual(m.shape[0], 4) 254 self.assertEqual(m.shape[1], 3) 255 256 for i in range(4): 257 for j in range(3): 258 self.assertEqual(a[i][j], m[i, j]) 259 260 self.assertEqual(dims.shape[0], 3) 261 self.assertEqual(dims[0], 0) 262 self.assertEqual(dims[1], 0) 263 self.assertEqual(dims[2], 0) 264 265 def testNumpyToMatrix(self): 266 """ 267 Make sure we can convert a numpy matrix without copying anything. 268 """ 269 m1 = np.random.randn(100, 5) 270 m2, _, dims = to_matrix_with_info(m1, np.double) 271 272 self.assertTrue(isinstance(m2, np.ndarray)) 273 self.assertEqual(m2.dtype, np.dtype(np.double)) 274 275 p1 = m1.__array_interface__ 276 p2 = m2.__array_interface__ 277 278 self.assertEqual(p1['data'], p2['data']) 279 280 self.assertEqual(dims.shape[0], 5) 281 self.assertEqual(dims[0], 0) 282 self.assertEqual(dims[1], 0) 283 self.assertEqual(dims[2], 0) 284 self.assertEqual(dims[3], 0) 285 self.assertEqual(dims[4], 0) 286 287 def testCategoricalOnly(self): 288 """ 289 Make sure that we can convert a categorical-only Pandas matrix. 290 """ 291 d = pd.DataFrame({"A": ["a", "b", "c", "a"] }) 292 d["A"] = d["A"].astype('category') # Convert to categorical. 293 294 m, _, dims = to_matrix_with_info(d, np.double) 295 296 self.assertTrue(isinstance(m, np.ndarray)) 297 self.assertEqual(m.dtype, np.dtype(np.double)) 298 299 self.assertEqual(dims.shape[0], 1) 300 self.assertEqual(dims[0], 1) 301 302 self.assertEqual(m.shape[0], 4) 303 self.assertEqual(m.shape[1], 1) 304 self.assertEqual(m[0], m[3]) 305 self.assertTrue(m[0] != m[1]) 306 self.assertTrue(m[1] != m[2]) 307 self.assertTrue(m[0] != m[2]) 308 309def test_suite(): 310 """ 311 Run all tests. 312 """ 313 loader = unittest.TestLoader() 314 suite = unittest.TestSuite() 315 suite.addTest(loader.loadTestsFromTestCase(TestToMatrix)) 316 suite.addTest(loader.loadTestsFromTestCase(TestToMatrixWithInfo)) 317 return suite 318 319if __name__ == '__main__': 320 unittest.main() 321