1# Copyright (C) 2008-2016, Luis Pedro Coelho <luis@luispedro.org>
2# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
3#
4# License: MIT (see COPYING file)
5
6
7import numpy as np
8from . import _texture
9from ..internal import _verify_is_integer_type
10
11__all__ = [
12    'haralick',
13    'haralick_labels',
14    'cooccurence',
15    ]
16
17def _entropy(p):
18    p = p.ravel()
19    p1 = p.copy()
20    p1 += (p==0)
21    return -np.dot(np.log2(p1), p)
22
23
24def haralick(f,
25            ignore_zeros=False,
26            preserve_haralick_bug=False,
27            compute_14th_feature=False,
28            return_mean=False,
29            return_mean_ptp=False,
30            use_x_minus_y_variance=False,
31            distance=1
32            ):
33    '''
34    feats = haralick(f,
35            ignore_zeros=False,
36            preserve_haralick_bug=False,
37            compute_14th_feature=False,
38            return_mean=False,
39            return_mean_ptp=False,
40            use_x_minus_y_variance=False,
41            distance=1
42            )
43
44    Compute Haralick texture features
45
46    Computes the Haralick texture features for the four 2-D directions or
47    thirteen 3-D directions (depending on the dimensions of `f`).
48
49    ``ignore_zeros`` can be used to have the function ignore any zero-valued
50    pixels (as background). If there are no-nonzero neighbour pairs in all
51    directions, an exception is raised. Note that this can happen even with
52    some non-zero pixels, e.g.::
53
54         0 0 0 0
55         0 1 0 0
56         0 1 0 0
57         0 0 0 0
58
59    would trigger an error when ``ignore_zeros=True`` as there are no
60    horizontal non-zero pairs!
61
62    Notes
63    -----
64    Haralick's paper has a typo in one of the equations. This function
65    implements the correct feature unless `preserve_haralick_bug` is True. The
66    only reason why you'd want the buggy behaviour is if you want to match
67    another implementation.
68
69    References
70    ----------
71
72    Cite the following reference for these features::
73
74        @article{Haralick1973,
75            author = {Haralick, Robert M. and Dinstein, Its'hak and Shanmugam, K.},
76            journal = {Ieee Transactions On Systems Man And Cybernetics},
77            number = {6},
78            pages = {610--621},
79            publisher = {IEEE},
80            title = {Textural features for image classification},
81            url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4309314},
82            volume = {3},
83            year = {1973}
84        }
85
86    Parameters
87    ----------
88    f : ndarray of integer type
89        input image. 2-D and 3-D images are supported.
90    distance: int, optional (default=1)
91        The distance to consider while computing the cooccurence matrix.
92    ignore_zeros : bool, optional
93        Whether to ignore zero pixels (default: False).
94
95    Other Parameters
96    ----------------
97    preserve_haralick_bug : bool, optional
98        whether to replicate Haralick's typo (default: False).
99        You probably want to always set this to ``False`` unless you want to
100        replicate someone else's wrong implementation.
101    compute_14th_feature : bool, optional
102        whether to compute & return the 14-th feature
103    return_mean : bool, optional
104        When set, the function returns the mean across all the directions
105        (default: False).
106    return_mean_ptp : bool, optional
107        When set, the function returns the mean and ptp (point-to-point, i.e.,
108        difference between max() and min()) across all the directions (default:
109        False).
110    use_x_minus_y_variance : bool, optional
111        Feature 10 (index 9) has two interpretations, as the variance of \|x-y\|
112        or as the variance of P(\|x-y\|). In order to achieve compatibility with
113        other software and previous versions of mahotas, mahotas defaults to
114        using ``VAR[P(\|x-y\|)]``; if this argument is True, then it uses
115        ``VAR[\|x-y\|]`` (default: False)
116
117
118    Returns
119    -------
120    feats : ndarray of np.double
121        A 4x13 or 4x14 feature vector (one row per direction) if `f` is 2D,
122        13x13 or 13x14 if it is 3D. The exact number of features depends on the
123        value of ``compute_14th_feature`` Also, if either ``return_mean`` or
124        ``return_mean_ptp`` is set, then a single dimensional array is
125        returned.
126    '''
127    _verify_is_integer_type(f, 'mahotas.haralick')
128
129    if len(f.shape) == 2:
130        nr_dirs = len(_2d_deltas)
131    elif len(f.shape) == 3:
132        nr_dirs = len(_3d_deltas)
133    else:
134        raise ValueError('mahotas.texture.haralick: Can only handle 2D and 3D images.')
135    fm1 = f.max() + 1
136    cmat = np.empty((fm1, fm1), np.int32)
137    def all_cmatrices():
138        for dir in range(nr_dirs):
139            cooccurence(f, dir, cmat, symmetric=True, distance=distance)
140            yield cmat
141    return haralick_features(all_cmatrices(),
142                        ignore_zeros=ignore_zeros,
143                        preserve_haralick_bug=preserve_haralick_bug,
144                        compute_14th_feature=compute_14th_feature,
145                        return_mean=return_mean,
146                        return_mean_ptp=return_mean_ptp,
147                        use_x_minus_y_variance=use_x_minus_y_variance,
148                        )
149
150def haralick_features(cmats,
151                    ignore_zeros=False,
152                    preserve_haralick_bug=False,
153                    compute_14th_feature=False,
154                    return_mean=False,
155                    return_mean_ptp=False,
156                    use_x_minus_y_variance=False,
157                    ):
158    '''
159    features = haralick_features(cmats,
160                    ignore_zeros=False,
161                    preserve_haralick_bug=False,
162                    compute_14th_feature=False,
163                    return_mean=False,
164                    return_mean_ptp=False,
165                    use_x_minus_y_variance=False,
166                    )
167
168    Computers Haralick features for the given cooccurrence matrices.
169
170    This function is not usually necessary, as you can call ``haralick`` with
171    an image to obtain features for that image. Use only if you know what you
172    are doing.
173
174    Notes
175    -----
176    Haralick's paper has a typo in one of the equations. This function
177    implements the correct feature unless `preserve_haralick_bug` is True. The
178    only reason why you'd want the buggy behaviour is if you want to match
179    another implementation.
180
181    References
182    ----------
183
184    Cite the following reference for these features::
185
186        @article{Haralick1973,
187            author = {Haralick, Robert M. and Dinstein, Its'hak and Shanmugam, K.},
188            journal = {Ieee Transactions On Systems Man And Cybernetics},
189            number = {6},
190            pages = {610--621},
191            publisher = {IEEE},
192            title = {Textural features for image classification},
193            url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4309314},
194            volume = {3},
195            year = {1973}
196        }
197
198
199    Parameters
200    ----------
201    cmats : sequence of ndarrays
202        This should be a sequence of ndarrays, all square and all of the same
203        shape.
204    ignore_zeros : bool, optional
205        Whether to ignore zero pixels (default: False).
206
207    Other Parameters
208    ----------------
209    preserve_haralick_bug : bool, optional
210        whether to replicate Haralick's typo (default: False).
211        You probably want to always set this to ``False`` unless you want to
212        replicate someone else's wrong implementation.
213    compute_14th_feature : bool, optional
214        whether to compute & return the 14-th feature
215    return_mean : bool, optional
216        When set, the function returns the mean across all the directions
217        (default: False).
218    return_mean_ptp : bool, optional
219        When set, the function returns the mean and ptp (point-to-point, i.e.,
220        difference between max() and min()) across all the directions (default:
221        False).
222    use_x_minus_y_variance : bool, optional
223        Feature 10 (index 9) has two interpretations, as the variance of |x-y|
224        or as the variance of P(|x-y|). In order to achieve compatibility with
225        other software and previous versions of mahotas, mahotas defaults to
226        using ``VAR[P(|x-y|)]``; if this argument is True, then it uses
227        ``VAR[|x-y|]`` (default: False)
228
229    Returns
230    -------
231    feats : ndarray of np.double
232        A 4x13 or 4x14 feature vector (one row per direction) if `f` is 2D,
233        13x13 or 13x14 if it is 3D. The exact number of features depends on the
234        value of ``compute_14th_feature`` Also, if either ``return_mean`` or
235        ``return_mean_ptp`` is set, then a single dimensional array is
236        returned.
237
238    See Also
239    --------
240    haralick : function
241        compute Haralick features for an image
242    '''
243    if return_mean and return_mean_ptp:
244        raise ValueError("mahotas.haralick_features: Cannot set both `return_mean` and `return_mean_ptp`")
245    features = []
246    for cmat in cmats:
247        feats = np.zeros(13 + bool(compute_14th_feature), np.double)
248        if ignore_zeros:
249            cmat[0] = 0
250            cmat[:,0] = 0
251        T = cmat.sum()
252        if not T:
253            raise ValueError('mahotas.haralick_features: the input is empty. Cannot compute features!\n' +
254                                'This can happen if you are using `ignore_zeros`' )
255        if not len(features):
256            maxv = len(cmat)
257            k = np.arange(maxv)
258            k2 = k**2
259            tk = np.arange(2*maxv)
260            tk2 = tk**2
261            i,j = np.mgrid[:maxv,:maxv]
262            ij = i*j
263            i_j2_p1 = (i - j)**2
264            i_j2_p1 += 1
265            i_j2_p1 = 1. / i_j2_p1
266            i_j2_p1 = i_j2_p1.ravel()
267            px_plus_y = np.empty(2*maxv, np.double)
268            px_minus_y = np.empty(maxv, np.double)
269        elif maxv != len(cmat):
270            raise ValueError('mahotas.haralick_features: All cmatrices must be of the same size')
271
272        p = cmat / float(T)
273        pravel = p.ravel()
274        px = p.sum(0)
275        py = p.sum(1)
276
277        ux = np.dot(px, k)
278        uy = np.dot(py, k)
279        vx = np.dot(px, k2) - ux**2
280        vy = np.dot(py, k2) - uy**2
281
282        sx = np.sqrt(vx)
283        sy = np.sqrt(vy)
284        px_plus_y.fill(0)
285        px_minus_y.fill(0)
286        _texture.compute_plus_minus(p, px_plus_y, px_minus_y)
287
288        feats[0] = np.dot(pravel, pravel)
289        feats[1] = np.dot(k2, px_minus_y)
290
291        if sx == 0. or sy == 0.:
292            feats[2] = 1.
293        else:
294            feats[2] = (1. / sx / sy) * (np.dot(ij.ravel(), pravel) - ux * uy)
295
296        feats[3] = vx
297        feats[4] = np.dot(i_j2_p1, pravel)
298        feats[5] = np.dot(tk, px_plus_y)
299
300        feats[7] = _entropy(px_plus_y)
301
302        # There is some confusion w.r.t. feats[6].
303        #
304        # Haralick's paper uses feats[7] in its computation, but it is
305        # clear that feats[5] should be used (i.e., it computes a
306        # variance).
307        #
308        if preserve_haralick_bug:
309            feats[6] = ((tk-feats[7])**2*px_plus_y).sum()
310        else:
311            feats[6] = np.dot(tk2, px_plus_y) - feats[5]**2
312
313        feats[ 8] = _entropy(pravel)
314        if use_x_minus_y_variance:
315            mu_x_minus_y = np.dot(px_minus_y, k)
316            mu_x_minus_y_sq = np.dot(px_minus_y, k2)
317            feats[ 9] = mu_x_minus_y_sq - mu_x_minus_y**2
318        else:
319            feats[ 9] = px_minus_y.var()
320        feats[10] = _entropy(px_minus_y)
321
322        HX = _entropy(px)
323        HY = _entropy(py)
324        crosspxpy = np.outer(px,py)
325        crosspxpy += (crosspxpy == 0) # This makes log(0) become log(1), and thus evaluate to zero, such that everything works below:
326        crosspxpy = crosspxpy.ravel()
327        HXY1 = -np.dot(pravel, np.log2(crosspxpy))
328        HXY2 = _entropy(crosspxpy)
329
330        if max(HX, HY) == 0.:
331            feats[11] = (feats[8]-HXY1)
332        else:
333            feats[11] = (feats[8]-HXY1)/max(HX,HY)
334        feats[12] = np.sqrt(max(0,1 - np.exp( -2. * (HXY2 - feats[8]))))
335
336        if compute_14th_feature:
337            # Square root of the second largest eigenvalue of the correlation matrix
338            # Probably the faster way to do this is just SVD the whole (likely rank deficient) matrix
339            # grab the second highest singular value . . . Instead, we just amputate the empty rows/cols and move on.
340            nzero_rc = px != 0
341            nz_pmat = p[nzero_rc,:][:,nzero_rc] # Symmetric, so this is ok!
342            if nz_pmat.shape[0] > 2:
343                ccm = np.corrcoef(nz_pmat)
344                e_vals = np.linalg.eigvalsh(ccm)
345                e_vals.sort()
346                feats[13] = np.sqrt(e_vals[-2])
347            else:
348                feats[13] = 0
349        features.append(feats)
350
351    features = np.array(features)
352    if return_mean:
353        return features.mean(axis=0)
354    if return_mean_ptp:
355        mean = features.mean(axis=0)
356        ptp = features.ptp(axis=0)
357        return np.concatenate((mean,ptp))
358
359    return features
360
361
362haralick_labels = ["Angular Second Moment",
363                   "Contrast",
364                   "Correlation",
365                   "Sum of Squares: Variance",
366                   "Inverse Difference Moment",
367                   "Sum Average",
368                   "Sum Variance",
369                   "Sum Entropy",
370                   "Entropy",
371                   "Difference Variance",
372                   "Difference Entropy",
373                   "Information Measure of Correlation 1",
374                   "Information Measure of Correlation 2",
375                   "Maximal Correlation Coefficient"]
376
377_2d_deltas= [
378    (0,1),
379    (1,1),
380    (1,0),
381    (1,-1)]
382
383_3d_deltas = [
384    (1, 0, 0),
385    (1, 1, 0),
386    (0, 1, 0),
387    (1,-1, 0),
388    (0, 0, 1),
389    (1, 0, 1),
390    (0, 1, 1),
391    (1, 1, 1),
392    (1,-1, 1),
393    (1, 0,-1),
394    (0, 1,-1),
395    (1, 1,-1),
396    (1,-1,-1) ]
397
398def cooccurence(f, direction, output=None, symmetric=True, distance=1):
399    '''
400    cooccurence_matrix = cooccurence(f,
401                            direction,
402                            output={new matrix},
403                            symmetric=True,
404                            distance=1)
405
406    Compute grey-level cooccurence matrix
407
408    Parameters
409    ----------
410    f : ndarray of integer type
411        The input image
412    direction : integer
413        Direction as index into (horizontal [default], diagonal
414        [nw-se], vertical, diagonal [ne-sw])
415    output : np.long 2 ndarray, optional
416        preallocated result.
417    symmetric : boolean, optional
418        whether return a symmetric matrix (default: True)
419    distance : integer, optional (default=1)
420        Distance to the central pixel to consider.
421
422    Returns
423    -------
424      cooccurence_matrix : cooccurence matrix
425    '''
426    _verify_is_integer_type(f, 'mahotas.cooccurence')
427    if len(f.shape) == 2 and not (0 <= direction < 4):
428        raise ValueError('mahotas.texture.cooccurence: `direction` {0} is not in range(4).'.format(direction))
429    elif len(f.shape) == 3 and not (0 <= direction < 13):
430        raise ValueError('mahotas.texture.cooccurence: `direction` {0} is not in range(13).'.format(direction))
431    elif len(f.shape) not in (2,3):
432        raise ValueError('mahotas.texture.cooccurence: cannot handle images of %s dimensions.' % len(f.shape))
433
434    if output is None:
435        mf = f.max()
436        output = np.zeros((mf+1, mf+1), np.int32)
437    else:
438        assert np.min(output.shape) >= f.max(), 'mahotas.texture.cooccurence: output is not large enough'
439        assert output.dtype == np.int32, 'mahotas.texture.cooccurence: output is not of type np.int32'
440        output.fill(0)
441
442    if len(f.shape) == 2:
443        mask_size = 2 * distance + 1
444        Bc = np.zeros((mask_size, mask_size), f.dtype)
445        y, x = tuple(distance * i for i in _2d_deltas[direction])
446        Bc[y + distance, x + distance] = 1
447    else:
448        mask_size = 2 * distance + 1
449        Bc = np.zeros((mask_size, mask_size, mask_size), f.dtype)
450        y, x, z = tuple(distance * i for i in _3d_deltas[direction])
451        Bc[y + distance, x + distance, z + distance] = 1
452    _texture.cooccurence(f, output, Bc, symmetric)
453    return output
454
455