1# Copyright (C) 2008-2016, Luis Pedro Coelho <luis@luispedro.org> 2# vim: set ts=4 sts=4 sw=4 expandtab smartindent: 3# 4# License: MIT (see COPYING file) 5 6 7import numpy as np 8from . import _texture 9from ..internal import _verify_is_integer_type 10 11__all__ = [ 12 'haralick', 13 'haralick_labels', 14 'cooccurence', 15 ] 16 17def _entropy(p): 18 p = p.ravel() 19 p1 = p.copy() 20 p1 += (p==0) 21 return -np.dot(np.log2(p1), p) 22 23 24def haralick(f, 25 ignore_zeros=False, 26 preserve_haralick_bug=False, 27 compute_14th_feature=False, 28 return_mean=False, 29 return_mean_ptp=False, 30 use_x_minus_y_variance=False, 31 distance=1 32 ): 33 ''' 34 feats = haralick(f, 35 ignore_zeros=False, 36 preserve_haralick_bug=False, 37 compute_14th_feature=False, 38 return_mean=False, 39 return_mean_ptp=False, 40 use_x_minus_y_variance=False, 41 distance=1 42 ) 43 44 Compute Haralick texture features 45 46 Computes the Haralick texture features for the four 2-D directions or 47 thirteen 3-D directions (depending on the dimensions of `f`). 48 49 ``ignore_zeros`` can be used to have the function ignore any zero-valued 50 pixels (as background). If there are no-nonzero neighbour pairs in all 51 directions, an exception is raised. Note that this can happen even with 52 some non-zero pixels, e.g.:: 53 54 0 0 0 0 55 0 1 0 0 56 0 1 0 0 57 0 0 0 0 58 59 would trigger an error when ``ignore_zeros=True`` as there are no 60 horizontal non-zero pairs! 61 62 Notes 63 ----- 64 Haralick's paper has a typo in one of the equations. This function 65 implements the correct feature unless `preserve_haralick_bug` is True. The 66 only reason why you'd want the buggy behaviour is if you want to match 67 another implementation. 68 69 References 70 ---------- 71 72 Cite the following reference for these features:: 73 74 @article{Haralick1973, 75 author = {Haralick, Robert M. and Dinstein, Its'hak and Shanmugam, K.}, 76 journal = {Ieee Transactions On Systems Man And Cybernetics}, 77 number = {6}, 78 pages = {610--621}, 79 publisher = {IEEE}, 80 title = {Textural features for image classification}, 81 url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4309314}, 82 volume = {3}, 83 year = {1973} 84 } 85 86 Parameters 87 ---------- 88 f : ndarray of integer type 89 input image. 2-D and 3-D images are supported. 90 distance: int, optional (default=1) 91 The distance to consider while computing the cooccurence matrix. 92 ignore_zeros : bool, optional 93 Whether to ignore zero pixels (default: False). 94 95 Other Parameters 96 ---------------- 97 preserve_haralick_bug : bool, optional 98 whether to replicate Haralick's typo (default: False). 99 You probably want to always set this to ``False`` unless you want to 100 replicate someone else's wrong implementation. 101 compute_14th_feature : bool, optional 102 whether to compute & return the 14-th feature 103 return_mean : bool, optional 104 When set, the function returns the mean across all the directions 105 (default: False). 106 return_mean_ptp : bool, optional 107 When set, the function returns the mean and ptp (point-to-point, i.e., 108 difference between max() and min()) across all the directions (default: 109 False). 110 use_x_minus_y_variance : bool, optional 111 Feature 10 (index 9) has two interpretations, as the variance of \|x-y\| 112 or as the variance of P(\|x-y\|). In order to achieve compatibility with 113 other software and previous versions of mahotas, mahotas defaults to 114 using ``VAR[P(\|x-y\|)]``; if this argument is True, then it uses 115 ``VAR[\|x-y\|]`` (default: False) 116 117 118 Returns 119 ------- 120 feats : ndarray of np.double 121 A 4x13 or 4x14 feature vector (one row per direction) if `f` is 2D, 122 13x13 or 13x14 if it is 3D. The exact number of features depends on the 123 value of ``compute_14th_feature`` Also, if either ``return_mean`` or 124 ``return_mean_ptp`` is set, then a single dimensional array is 125 returned. 126 ''' 127 _verify_is_integer_type(f, 'mahotas.haralick') 128 129 if len(f.shape) == 2: 130 nr_dirs = len(_2d_deltas) 131 elif len(f.shape) == 3: 132 nr_dirs = len(_3d_deltas) 133 else: 134 raise ValueError('mahotas.texture.haralick: Can only handle 2D and 3D images.') 135 fm1 = f.max() + 1 136 cmat = np.empty((fm1, fm1), np.int32) 137 def all_cmatrices(): 138 for dir in range(nr_dirs): 139 cooccurence(f, dir, cmat, symmetric=True, distance=distance) 140 yield cmat 141 return haralick_features(all_cmatrices(), 142 ignore_zeros=ignore_zeros, 143 preserve_haralick_bug=preserve_haralick_bug, 144 compute_14th_feature=compute_14th_feature, 145 return_mean=return_mean, 146 return_mean_ptp=return_mean_ptp, 147 use_x_minus_y_variance=use_x_minus_y_variance, 148 ) 149 150def haralick_features(cmats, 151 ignore_zeros=False, 152 preserve_haralick_bug=False, 153 compute_14th_feature=False, 154 return_mean=False, 155 return_mean_ptp=False, 156 use_x_minus_y_variance=False, 157 ): 158 ''' 159 features = haralick_features(cmats, 160 ignore_zeros=False, 161 preserve_haralick_bug=False, 162 compute_14th_feature=False, 163 return_mean=False, 164 return_mean_ptp=False, 165 use_x_minus_y_variance=False, 166 ) 167 168 Computers Haralick features for the given cooccurrence matrices. 169 170 This function is not usually necessary, as you can call ``haralick`` with 171 an image to obtain features for that image. Use only if you know what you 172 are doing. 173 174 Notes 175 ----- 176 Haralick's paper has a typo in one of the equations. This function 177 implements the correct feature unless `preserve_haralick_bug` is True. The 178 only reason why you'd want the buggy behaviour is if you want to match 179 another implementation. 180 181 References 182 ---------- 183 184 Cite the following reference for these features:: 185 186 @article{Haralick1973, 187 author = {Haralick, Robert M. and Dinstein, Its'hak and Shanmugam, K.}, 188 journal = {Ieee Transactions On Systems Man And Cybernetics}, 189 number = {6}, 190 pages = {610--621}, 191 publisher = {IEEE}, 192 title = {Textural features for image classification}, 193 url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4309314}, 194 volume = {3}, 195 year = {1973} 196 } 197 198 199 Parameters 200 ---------- 201 cmats : sequence of ndarrays 202 This should be a sequence of ndarrays, all square and all of the same 203 shape. 204 ignore_zeros : bool, optional 205 Whether to ignore zero pixels (default: False). 206 207 Other Parameters 208 ---------------- 209 preserve_haralick_bug : bool, optional 210 whether to replicate Haralick's typo (default: False). 211 You probably want to always set this to ``False`` unless you want to 212 replicate someone else's wrong implementation. 213 compute_14th_feature : bool, optional 214 whether to compute & return the 14-th feature 215 return_mean : bool, optional 216 When set, the function returns the mean across all the directions 217 (default: False). 218 return_mean_ptp : bool, optional 219 When set, the function returns the mean and ptp (point-to-point, i.e., 220 difference between max() and min()) across all the directions (default: 221 False). 222 use_x_minus_y_variance : bool, optional 223 Feature 10 (index 9) has two interpretations, as the variance of |x-y| 224 or as the variance of P(|x-y|). In order to achieve compatibility with 225 other software and previous versions of mahotas, mahotas defaults to 226 using ``VAR[P(|x-y|)]``; if this argument is True, then it uses 227 ``VAR[|x-y|]`` (default: False) 228 229 Returns 230 ------- 231 feats : ndarray of np.double 232 A 4x13 or 4x14 feature vector (one row per direction) if `f` is 2D, 233 13x13 or 13x14 if it is 3D. The exact number of features depends on the 234 value of ``compute_14th_feature`` Also, if either ``return_mean`` or 235 ``return_mean_ptp`` is set, then a single dimensional array is 236 returned. 237 238 See Also 239 -------- 240 haralick : function 241 compute Haralick features for an image 242 ''' 243 if return_mean and return_mean_ptp: 244 raise ValueError("mahotas.haralick_features: Cannot set both `return_mean` and `return_mean_ptp`") 245 features = [] 246 for cmat in cmats: 247 feats = np.zeros(13 + bool(compute_14th_feature), np.double) 248 if ignore_zeros: 249 cmat[0] = 0 250 cmat[:,0] = 0 251 T = cmat.sum() 252 if not T: 253 raise ValueError('mahotas.haralick_features: the input is empty. Cannot compute features!\n' + 254 'This can happen if you are using `ignore_zeros`' ) 255 if not len(features): 256 maxv = len(cmat) 257 k = np.arange(maxv) 258 k2 = k**2 259 tk = np.arange(2*maxv) 260 tk2 = tk**2 261 i,j = np.mgrid[:maxv,:maxv] 262 ij = i*j 263 i_j2_p1 = (i - j)**2 264 i_j2_p1 += 1 265 i_j2_p1 = 1. / i_j2_p1 266 i_j2_p1 = i_j2_p1.ravel() 267 px_plus_y = np.empty(2*maxv, np.double) 268 px_minus_y = np.empty(maxv, np.double) 269 elif maxv != len(cmat): 270 raise ValueError('mahotas.haralick_features: All cmatrices must be of the same size') 271 272 p = cmat / float(T) 273 pravel = p.ravel() 274 px = p.sum(0) 275 py = p.sum(1) 276 277 ux = np.dot(px, k) 278 uy = np.dot(py, k) 279 vx = np.dot(px, k2) - ux**2 280 vy = np.dot(py, k2) - uy**2 281 282 sx = np.sqrt(vx) 283 sy = np.sqrt(vy) 284 px_plus_y.fill(0) 285 px_minus_y.fill(0) 286 _texture.compute_plus_minus(p, px_plus_y, px_minus_y) 287 288 feats[0] = np.dot(pravel, pravel) 289 feats[1] = np.dot(k2, px_minus_y) 290 291 if sx == 0. or sy == 0.: 292 feats[2] = 1. 293 else: 294 feats[2] = (1. / sx / sy) * (np.dot(ij.ravel(), pravel) - ux * uy) 295 296 feats[3] = vx 297 feats[4] = np.dot(i_j2_p1, pravel) 298 feats[5] = np.dot(tk, px_plus_y) 299 300 feats[7] = _entropy(px_plus_y) 301 302 # There is some confusion w.r.t. feats[6]. 303 # 304 # Haralick's paper uses feats[7] in its computation, but it is 305 # clear that feats[5] should be used (i.e., it computes a 306 # variance). 307 # 308 if preserve_haralick_bug: 309 feats[6] = ((tk-feats[7])**2*px_plus_y).sum() 310 else: 311 feats[6] = np.dot(tk2, px_plus_y) - feats[5]**2 312 313 feats[ 8] = _entropy(pravel) 314 if use_x_minus_y_variance: 315 mu_x_minus_y = np.dot(px_minus_y, k) 316 mu_x_minus_y_sq = np.dot(px_minus_y, k2) 317 feats[ 9] = mu_x_minus_y_sq - mu_x_minus_y**2 318 else: 319 feats[ 9] = px_minus_y.var() 320 feats[10] = _entropy(px_minus_y) 321 322 HX = _entropy(px) 323 HY = _entropy(py) 324 crosspxpy = np.outer(px,py) 325 crosspxpy += (crosspxpy == 0) # This makes log(0) become log(1), and thus evaluate to zero, such that everything works below: 326 crosspxpy = crosspxpy.ravel() 327 HXY1 = -np.dot(pravel, np.log2(crosspxpy)) 328 HXY2 = _entropy(crosspxpy) 329 330 if max(HX, HY) == 0.: 331 feats[11] = (feats[8]-HXY1) 332 else: 333 feats[11] = (feats[8]-HXY1)/max(HX,HY) 334 feats[12] = np.sqrt(max(0,1 - np.exp( -2. * (HXY2 - feats[8])))) 335 336 if compute_14th_feature: 337 # Square root of the second largest eigenvalue of the correlation matrix 338 # Probably the faster way to do this is just SVD the whole (likely rank deficient) matrix 339 # grab the second highest singular value . . . Instead, we just amputate the empty rows/cols and move on. 340 nzero_rc = px != 0 341 nz_pmat = p[nzero_rc,:][:,nzero_rc] # Symmetric, so this is ok! 342 if nz_pmat.shape[0] > 2: 343 ccm = np.corrcoef(nz_pmat) 344 e_vals = np.linalg.eigvalsh(ccm) 345 e_vals.sort() 346 feats[13] = np.sqrt(e_vals[-2]) 347 else: 348 feats[13] = 0 349 features.append(feats) 350 351 features = np.array(features) 352 if return_mean: 353 return features.mean(axis=0) 354 if return_mean_ptp: 355 mean = features.mean(axis=0) 356 ptp = features.ptp(axis=0) 357 return np.concatenate((mean,ptp)) 358 359 return features 360 361 362haralick_labels = ["Angular Second Moment", 363 "Contrast", 364 "Correlation", 365 "Sum of Squares: Variance", 366 "Inverse Difference Moment", 367 "Sum Average", 368 "Sum Variance", 369 "Sum Entropy", 370 "Entropy", 371 "Difference Variance", 372 "Difference Entropy", 373 "Information Measure of Correlation 1", 374 "Information Measure of Correlation 2", 375 "Maximal Correlation Coefficient"] 376 377_2d_deltas= [ 378 (0,1), 379 (1,1), 380 (1,0), 381 (1,-1)] 382 383_3d_deltas = [ 384 (1, 0, 0), 385 (1, 1, 0), 386 (0, 1, 0), 387 (1,-1, 0), 388 (0, 0, 1), 389 (1, 0, 1), 390 (0, 1, 1), 391 (1, 1, 1), 392 (1,-1, 1), 393 (1, 0,-1), 394 (0, 1,-1), 395 (1, 1,-1), 396 (1,-1,-1) ] 397 398def cooccurence(f, direction, output=None, symmetric=True, distance=1): 399 ''' 400 cooccurence_matrix = cooccurence(f, 401 direction, 402 output={new matrix}, 403 symmetric=True, 404 distance=1) 405 406 Compute grey-level cooccurence matrix 407 408 Parameters 409 ---------- 410 f : ndarray of integer type 411 The input image 412 direction : integer 413 Direction as index into (horizontal [default], diagonal 414 [nw-se], vertical, diagonal [ne-sw]) 415 output : np.long 2 ndarray, optional 416 preallocated result. 417 symmetric : boolean, optional 418 whether return a symmetric matrix (default: True) 419 distance : integer, optional (default=1) 420 Distance to the central pixel to consider. 421 422 Returns 423 ------- 424 cooccurence_matrix : cooccurence matrix 425 ''' 426 _verify_is_integer_type(f, 'mahotas.cooccurence') 427 if len(f.shape) == 2 and not (0 <= direction < 4): 428 raise ValueError('mahotas.texture.cooccurence: `direction` {0} is not in range(4).'.format(direction)) 429 elif len(f.shape) == 3 and not (0 <= direction < 13): 430 raise ValueError('mahotas.texture.cooccurence: `direction` {0} is not in range(13).'.format(direction)) 431 elif len(f.shape) not in (2,3): 432 raise ValueError('mahotas.texture.cooccurence: cannot handle images of %s dimensions.' % len(f.shape)) 433 434 if output is None: 435 mf = f.max() 436 output = np.zeros((mf+1, mf+1), np.int32) 437 else: 438 assert np.min(output.shape) >= f.max(), 'mahotas.texture.cooccurence: output is not large enough' 439 assert output.dtype == np.int32, 'mahotas.texture.cooccurence: output is not of type np.int32' 440 output.fill(0) 441 442 if len(f.shape) == 2: 443 mask_size = 2 * distance + 1 444 Bc = np.zeros((mask_size, mask_size), f.dtype) 445 y, x = tuple(distance * i for i in _2d_deltas[direction]) 446 Bc[y + distance, x + distance] = 1 447 else: 448 mask_size = 2 * distance + 1 449 Bc = np.zeros((mask_size, mask_size, mask_size), f.dtype) 450 y, x, z = tuple(distance * i for i in _3d_deltas[direction]) 451 Bc[y + distance, x + distance, z + distance] = 1 452 _texture.cooccurence(f, output, Bc, symmetric) 453 return output 454 455