1'''
2Basic algorithms and data handling code.
3'''
4
5from __future__ import absolute_import, division, print_function, unicode_literals
6
7import math
8from numbers import Integral
9import numpy as np
10import pickle
11
12import spectral as spy
13from ..io.spyfile import SpyFile, TransformedImage
14from ..utilities.errors import has_nan, NaNValueError
15from .spymath import matrix_sqrt
16from .transforms import LinearTransform
17
18class Iterator:
19    '''
20    Base class for iterators over pixels (spectra).
21    '''
22    def __init__(self):
23        pass
24
25    def __iter__(self):
26        raise NotImplementedError('Must override __iter__ in child class.')
27
28    def get_num_elements(self):
29        raise NotImplementedError(
30            'Must override get_num_elements in child class.')
31
32    def get_num_bands(self):
33        raise NotImplementedError(
34            'Must override get_num_bands in child class.')
35
36
37class ImageIterator(Iterator):
38    '''
39    An iterator over all pixels in an image.
40    '''
41    def __init__(self, im):
42        self.image = im
43        self.numElements = im.shape[0] * im.shape[1]
44
45    def get_num_elements(self):
46        return self.numElements
47
48    def get_num_bands(self):
49        return self.image.shape[2]
50
51    def __iter__(self):
52        (M, N) = self.image.shape[:2]
53        count = 0
54        for i in range(M):
55            self.row = i
56            for j in range(N):
57                self.col = j
58                yield self.image[i, j]
59
60
61class ImageMaskIterator(Iterator):
62    '''
63    An iterator over all pixels in an image corresponding to a specified mask.
64    '''
65    def __init__(self, image, mask, index=None):
66        if mask.shape != image.shape[:len(mask.shape)]:
67            raise ValueError('Mask shape does not match image.')
68        self.image = image
69        self.index = index
70        # Get the proper mask for the training set
71        if index:
72            self.mask = np.equal(mask, index)
73        else:
74            self.mask = np.not_equal(mask, 0)
75        self.n_elements = sum(self.mask.ravel())
76
77    def get_num_elements(self):
78        return self.n_elements
79
80    def get_num_bands(self):
81        return self.image.shape[2]
82
83    def __iter__(self):
84        coords = np.argwhere(self.mask)
85        for (i, j) in coords:
86            (self.row, self.col) = (i, j)
87            yield self.image[i, j].astype(self.image.dtype).squeeze()
88
89def iterator(image, mask=None, index=None):
90    '''
91    Returns an iterator over pixels in the image.
92
93    Arguments:
94
95        `image` (ndarray or :class:`spectral.Image`):
96
97            An image over whose pixels will be iterated.
98
99        `mask` (ndarray) [default None]:
100
101            An array of integers that specify over which pixels in `image`
102            iteration should be performed.
103
104        `index` (int) [default None]:
105
106            Specifies which value in `mask` should be used for iteration.
107
108    Returns (:class:`spectral.Iterator`):
109
110        An iterator over image pixels.
111
112    If neither `mask` nor `index` are defined, iteration is performed over all
113    pixels.  If `mask` (but not `index`) is defined, iteration is performed
114    over all pixels for which `mask` is nonzero.  If both `mask` and `index`
115    are defined, iteration is performed over all pixels `image[i,j]` for which
116    `mask[i,j] == index`.
117    '''
118
119    if isinstance(image, Iterator):
120        return image
121    elif mask is not None:
122        return ImageMaskIterator(image, mask, index)
123    else:
124        return ImageIterator(image)
125
126
127def iterator_ij(mask, index=None):
128    '''
129    Returns an iterator over image pixel coordinates for a given mask.
130
131    Arguments:
132
133        `mask` (ndarray) [default None]:
134
135            An array of integers that specify which coordinates should
136            be returned.
137
138        `index` (int) [default None]:
139
140            Specifies which value in `mask` should be used for iteration.
141
142    Returns:
143
144        An iterator over image pixel coordinates. Each returned item is a
145        2-tuple of the form (row, col).
146
147    If `index` is not defined, iteration is performed over all non-zero
148    elements.  If `index` is defined, iteration is performed over all
149    coordinates for whch `mask[i,j] == index`.
150    '''
151
152    if mask.ndim != 2:
153        raise ValueError('Invalid mask shape.')
154    if index is None:
155        mask = mask != 0
156    else:
157        mask = mask == index
158    for rc in np.argwhere(mask):
159        yield tuple(rc)
160
161
162def mean_cov(image, mask=None, index=None):
163    '''
164    Return the mean and covariance of the set of vectors.
165
166    Usage::
167
168        (mean, cov, S) = mean_cov(vectors [, mask=None [, index=None]])
169
170    Arguments:
171
172        `image` (ndarrray, :class:`~spectral.Image`, or :class:`spectral.Iterator`):
173
174            If an ndarray, it should have shape `MxNxB` and the mean &
175            covariance will be calculated for each band (third dimension).
176
177        `mask` (ndarray):
178
179            If `mask` is specified, mean & covariance will be calculated for
180            all pixels indicated in the mask array.  If `index` is specified,
181            all pixels in `image` for which `mask == index` will be used;
182            otherwise, all nonzero elements of `mask` will be used.
183
184        `index` (int):
185
186            Specifies which value in `mask` to use to select pixels from
187            `image`. If not specified but `mask` is, then all nonzero elements
188            of `mask` will be used.
189
190        If neither `mask` nor `index` are specified, all samples in `vectors`
191        will be used.
192
193    Returns a 3-tuple containing:
194
195        `mean` (ndarray):
196
197            The length-`B` mean vectors
198
199        `cov` (ndarray):
200
201            The `BxB` unbiased estimate (dividing by N-1) of the covariance
202            of the vectors.
203
204        `S` (int):
205
206            Number of samples used to calculate mean & cov
207
208    Calculate the mean and covariance of of the given vectors. The argument
209    can be an Iterator, a SpyFile object, or an `MxNxB` array.
210    '''
211    status = spy._status
212
213    if isinstance(image, np.ndarray):
214        X = image.astype(np.float64)
215        if X.ndim == 3:
216            X = image.reshape(-1, image.shape[-1]).T
217        if mask is not None:
218            mask = mask.ravel()
219            if index is not None:
220                ii = np.argwhere(mask == index)
221            else:
222                ii = np.argwhere(mask != 0)
223            X = np.take(X, ii.squeeze(), axis=1)
224        m = np.average(X, axis=1)
225        C = np.cov(X)
226        return (m, C, X.shape[1])
227
228    if not isinstance(image, Iterator):
229        it = iterator(image, mask, index)
230    else:
231        it = image
232
233    nSamples = it.get_num_elements()
234    B = it.get_num_bands()
235
236    sumX = np.zeros((B,), 'd')
237    sumX2 = np.zeros((B, B), 'd')
238    count = 0
239
240    statusInterval = max(1, nSamples / 100)
241    status.display_percentage('Covariance.....')
242    for x in it:
243        if not count % statusInterval:
244            status.update_percentage(float(count) / nSamples * 100.)
245        count += 1
246        sumX += x
247        x = x.astype(np.float64)[:, np.newaxis]
248        sumX2 += x.dot(x.T)
249    mean = (sumX / count)
250    sumX = sumX[:, np.newaxis]
251    cov = (sumX2 - sumX.dot(sumX.T) / count) / (count - 1)
252    status.end_percentage()
253    return (mean, cov, count)
254
255
256def cov_avg(image, mask, weighted=True):
257    '''Calculates the covariance averaged over a set of classes.
258
259    Arguments:
260
261        `image` (ndarrray, :class:`~spectral.Image`, or :class:`spectral.Iterator`):
262
263            If an ndarray, it should have shape `MxNxB` and the mean &
264            covariance will be calculated for each band (third dimension).
265
266        `mask` (integer-valued ndarray):
267
268            Elements specify the classes associated with pixels in `image`.
269            All pixels associeted with non-zero elements of `mask` will be
270            used in the covariance calculation.
271
272        `weighted` (bool, default True):
273
274            Specifies whether the individual class covariances should be
275            weighted when computing the average. If True, each class will
276            be weighted by the number of pixels provided for the class;
277            otherwise, a simple average of the class covariances is performed.
278
279    Returns a class-averaged covariance matrix. The number of covariances used
280    in the average is equal to the number of non-zero elements of `mask`.
281    '''
282    ids = set(mask.ravel()) - set((0,))
283    classes = [calc_stats(image, mask, i) for i in ids]
284    N = sum([c.nsamples for c in classes])
285    if weighted:
286        return np.sum([((c.nsamples - 1) / float(N - 1)) * c.cov
287                       for c in classes], axis=0, dtype=np.float64)
288    else:
289        return np.mean([c.cov for c in classes], axis=0, dtype=np.float64)
290
291def covariance(*args):
292    '''
293    Returns the covariance of the set of vectors.
294
295    Usage::
296
297        C = covariance(vectors [, mask=None [, index=None]])
298
299    Arguments:
300
301        `vectors` (ndarrray, :class:`~spectral.Image`, or :class:`spectral.Iterator`):
302
303            If an ndarray, it should have shape `MxNxB` and the mean &
304            covariance will be calculated for each band (third dimension).
305
306        `mask` (ndarray):
307
308            If `mask` is specified, mean & covariance will be calculated for
309            all pixels indicated in the mask array.  If `index` is specified,
310            all pixels in `image` for which `mask == index` will be used;
311            otherwise, all nonzero elements of `mask` will be used.
312
313        `index` (int):
314
315            Specifies which value in `mask` to use to select pixels from
316            `image`. If not specified but `mask` is, then all nonzero elements
317            of `mask` will be used.
318
319        If neither `mask` nor `index` are specified, all samples in `vectors`
320        will be used.
321
322    Returns:
323
324        `C` (ndarray):
325
326            The `BxB` unbiased estimate (dividing by N-1) of the covariance
327            of the vectors.
328
329
330    To also return the mean vector and number of samples, call
331    :func:`~spectral.algorithms.algorithms.mean_cov` instead.
332    '''
333    return mean_cov(*args)[1]
334
335
336class PrincipalComponents:
337    '''
338    An object for storing a data set's principal components.  The
339    object has the following members:
340
341        `eigenvalues`:
342
343            A length B array of eigenvalues sorted in descending order
344
345        `eigenvectors`:
346
347            A `BxB` array of normalized eigenvectors (in columns)
348
349        `stats` (:class:`GaussianStats`):
350
351            A statistics object containing `mean`, `cov`, and `nsamples`.
352
353        `transform`:
354
355            A callable function to transform data to the space of the
356            principal components.
357
358        `reduce`:
359
360            A method to return a reduced set of principal components based
361            on either a fixed number of components or a fraction of total
362            variance.
363
364        `denoise`:
365
366            A callable function to denoise data using a reduced set of
367            principal components.
368
369        `get_denoising_transform`:
370
371            A callable function that returns a function for denoising data.
372    '''
373    def __init__(self, vals, vecs, stats):
374        self.eigenvalues = vals
375        self.eigenvectors = vecs
376        self.stats = stats
377        self.transform = LinearTransform(self.eigenvectors.T, pre=-self.mean)
378
379    @property
380    def mean(self):
381        return self.stats.mean
382
383    @property
384    def cov(self):
385        return self.stats.cov
386
387    def reduce(self, N=0, **kwargs):
388        '''Reduces the number of principal components.
389
390        Keyword Arguments (one of the following must be specified):
391
392            `num` (integer):
393
394                Number of eigenvalues/eigenvectors to retain.  The top `num`
395                eigenvalues will be retained.
396
397            `eigs` (list):
398
399                A list of indices of eigenvalues/eigenvectors to be retained.
400
401            `fraction` (float):
402
403                The fraction of total image variance to retain.  Eigenvalues
404                will be retained (starting from greatest to smallest) until
405                `fraction` of total image variance is retained.
406        '''
407        status = spy._status
408
409        num = kwargs.get('num', None)
410        eigs = kwargs.get('eigs', None)
411        fraction = kwargs.get('fraction', None)
412        if num is not None:
413            return PrincipalComponents(self.eigenvalues[:num],
414                                       self.eigenvectors[:, :num],
415                                       self.stats)
416        elif eigs is not None:
417            vals = self.eigenvalues[eigs]
418            vecs = self.eigenvectors[:, eigs]
419            return PrincipalComponents(vals, vecs, self.stats)
420        elif fraction is not None:
421            if not 0 < fraction <= 1:
422                raise Exception('fraction must be in range (0,1].')
423            N = len(self.eigenvalues)
424            cumsum = np.cumsum(self.eigenvalues)
425            sum = cumsum[-1]
426            # Count how many values to retain.
427            for i in range(N):
428                if (cumsum[i] / sum) >= fraction:
429                    break
430            if i == (N - 1):
431                # No reduction
432                status.write('No reduction in eigenvectors achieved.')
433                return self
434
435            vals = self.eigenvalues[:i + 1]
436            vecs = self.eigenvectors[:, :i + 1]
437            return PrincipalComponents(vals, vecs, self.stats)
438        else:
439            raise Exception('Must specify one of the following keywords:'
440                            '`num`, `eigs`, `fraction`.')
441
442    def denoise(self, X, **kwargs):
443        '''Returns a de-noised version of `X`.
444
445        Arguments:
446
447            `X` (np.ndarray):
448
449                Data to be de-noised. Can be a single pixel or an image.
450
451        Keyword Arguments (one of the following must be specified):
452
453            `num` (integer):
454
455                Number of eigenvalues/eigenvectors to use.  The top `num`
456                eigenvalues will be used.
457
458            `eigs` (list):
459
460                A list of indices of eigenvalues/eigenvectors to be used.
461
462            `fraction` (float):
463
464                The fraction of total image variance to retain.  Eigenvalues
465                will be included (starting from greatest to smallest) until
466                `fraction` of total image variance is retained.
467
468        Returns denoised image data with same shape as `X`.
469
470        Note that calling this method is equivalent to calling the
471        `get_denoising_transform` method with same keyword and applying the
472        returned transform to `X`. If you only intend to denoise data with the
473        same parameters multiple times, then it is more efficient to get the
474        denoising transform and reuse it, rather than calling this method
475        multilple times.
476        '''
477        f = self.get_denoising_transform(**kwargs)
478        return f(X)
479
480    def get_denoising_transform(self, **kwargs):
481        '''Returns a function for denoising image data.
482
483        Keyword Arguments (one of the following must be specified):
484
485            `num` (integer):
486
487                Number of eigenvalues/eigenvectors to use.  The top `num`
488                eigenvalues will be used.
489
490            `eigs` (list):
491
492                A list of indices of eigenvalues/eigenvectors to be used.
493
494            `fraction` (float):
495
496                The fraction of total image variance to retain.  Eigenvalues
497                will be included (starting from greatest to smallest) until
498                `fraction` of total image variance is retained.
499
500        Returns a callable :class:`~spectral.algorithms.transforms.LinearTransform`
501        object for denoising image data.
502        '''
503        V = self.reduce(self, **kwargs).eigenvectors
504        f = LinearTransform(V.dot(V.T), pre=-self.mean,
505                            post=self.mean)
506        return f
507
508
509def principal_components(image):
510    '''
511    Calculate Principal Component eigenvalues & eigenvectors for an image.
512
513    Usage::
514
515        pc = principal_components(image)
516
517    Arguments:
518
519        `image` (ndarray, :class:`spectral.Image`, :class:`GaussianStats`):
520
521            An `MxNxB` image
522
523    Returns a :class:`~spectral.algorithms.algorithms.PrincipalComponents`
524    object with the following members:
525
526        `eigenvalues`:
527
528            A length B array of eigenvalues
529
530        `eigenvectors`:
531
532            A `BxB` array of normalized eigenvectors
533
534        `stats` (:class:`GaussianStats`):
535
536            A statistics object containing `mean`, `cov`, and `nsamples`.
537
538        `transform`:
539
540            A callable function to transform data to the space of the
541            principal components.
542
543        `reduce`:
544
545            A method to reduce the number of eigenvalues.
546
547        `denoise`:
548
549            A callable function to denoise data using a reduced set of
550            principal components.
551
552        `get_denoising_transform`:
553
554            A callable function that returns a function for denoising data.
555    '''
556    if isinstance(image, GaussianStats):
557        stats = image
558    else:
559        stats = calc_stats(image)
560
561    (L, V) = np.linalg.eig(stats.cov)
562
563    # numpy says eigenvalues may not be sorted so we'll sort them, if needed.
564    if not np.alltrue(np.diff(L) <= 0):
565        ii = list(reversed(np.argsort(L)))
566        L = L[ii]
567        V = V[:, ii]
568
569    return PrincipalComponents(L, V, stats)
570
571
572class FisherLinearDiscriminant:
573    '''
574    An object for storing a data set's linear discriminant data.  For `C`
575    classes with `B`-dimensional data, the object has the following members:
576
577        `eigenvalues`:
578
579            A length `C-1` array of eigenvalues
580
581        `eigenvectors`:
582
583            A `BxC` array of normalized eigenvectors
584
585        `mean`:
586
587            The length `B` mean vector of the image pixels (from all classes)
588
589        `cov_b`:
590
591            The `BxB` matrix of covariance *between* classes
592
593        `cov_w`:
594
595            The `BxB` matrix of average covariance *within* each class
596
597        `transform`:
598
599            A callable function to transform data to the space of the
600            linear discriminant.
601    '''
602    def __init__(self, vals, vecs, mean, cov_b, cov_w):
603        self.eigenvalues = vals
604        self.eigenvectors = vecs
605        self.mean = mean
606        self.cov_b = cov_b
607        self.cov_w = cov_w
608        self.transform = LinearTransform(self.eigenvectors.T, pre=-self.mean)
609
610
611def linear_discriminant(classes, whiten=True):
612    '''
613    Solve Fisher's linear discriminant for eigenvalues and eigenvectors.
614
615    Usage: (L, V, Cb, Cw) = linear_discriminant(classes)
616
617    Arguments:
618
619        `classes` (:class:`~spectral.algorithms.TrainingClassSet`):
620
621            The set of `C` classes to discriminate.
622
623    Returns a `FisherLinearDiscriminant` object containing the within/between-
624    class covariances, mean vector, and a callable transform to convert data to
625    the transform's space.
626
627    This function determines the solution to the generalized eigenvalue problem
628
629            Cb * x = lambda * Cw * x
630
631    Since cov_w is normally invertable, the reduces to
632
633            (inv(Cw) * Cb) * x = lambda * x
634
635    References:
636
637        Richards, J.A. & Jia, X. Remote Sensing Digital Image Analysis: An
638        Introduction. (Springer: Berlin, 1999).
639    '''
640    C = len(classes)            # Number of training sets
641    rank = len(classes) - 1
642
643    classes.calc_stats()
644
645    # Calculate total # of training pixels and total mean
646    N = 0
647    B = classes.nbands
648    K = len(classes)
649    mean = np.zeros(B, dtype=np.float64)
650    for s in classes:
651        N += s.size()
652        mean += s.size() * s.stats.mean
653    mean /= N
654
655    cov_b = np.zeros((B, B), np.float64)            # cov between classes
656    cov_w = np.zeros((B, B), np.float64)            # cov within classes
657    for s in classes:
658        cov_w += ((s.size() - 1) / float(N - 1)) * s.stats.cov
659        m = s.stats.mean - mean
660        cov_b += (s.size() / float(N) / (K - 1)) * np.outer(m, m)
661
662    inv_cov_w = np.linalg.inv(cov_w)
663    (vals, vecs) = np.linalg.eig(inv_cov_w.dot(cov_b))
664    vals = vals[:rank]
665    vecs = vecs[:, :rank]
666
667    if whiten:
668        # Diagonalize cov_within in the new space
669        v = vecs.T.dot(cov_w).dot(vecs)
670        d = np.sqrt(np.diag(v) * np.diag(v).conj())
671        for i in range(vecs.shape[1]):
672            vecs[:, i] /= math.sqrt(d[i].real)
673
674    return FisherLinearDiscriminant(vals.real, vecs.real, mean, cov_b, cov_w)
675
676# Alias for Linear Discriminant Analysis (LDA)
677lda = linear_discriminant
678
679
680def log_det(x):
681    return sum(np.log([eigv for eigv in np.linalg.eigvals(x)
682                          if eigv > 0]))
683
684
685class GaussianStats(object):
686    '''A class for storing Gaussian statistics for a data set.
687
688    Statistics stored include:
689
690        `mean`:
691
692            Mean vector
693
694        `cov`:
695
696            Covariance matrix
697
698        `nsamples`:
699
700            Number of samples used in computing the statistics
701
702    Several derived statistics are computed on-demand (and cached) and are
703    available as property attributes. These include:
704
705        `inv_cov`:
706
707            Inverse of the covariance
708
709        `sqrt_cov`:
710
711            Matrix square root of covariance: sqrt_cov.dot(sqrt_cov) == cov
712
713        `sqrt_inv_cov`:
714
715            Matrix square root of the inverse of covariance
716
717        `log_det_cov`:
718
719            The log of the determinant of the covariance matrix
720
721        `principal_components`:
722
723            The principal components of the data, based on mean and cov.
724    '''
725
726    def __init__(self, mean=None, cov=None, nsamples=None, inv_cov=None):
727        self.cov = cov
728        self._inv_cov = inv_cov
729        self.mean = mean
730        self.nsamples = nsamples
731
732    @property
733    def cov(self):
734        '''Property method returning the covariance matrix.'''
735        return self._cov
736
737    @cov.setter
738    def cov(self, C):
739        self.reset_derived_stats()
740        self._cov = C
741
742    @property
743    def inv_cov(self):
744        '''Property method returning the inverse of the covariance matrix.'''
745        if self._inv_cov is None:
746            self._inv_cov = np.linalg.inv(self._cov)
747        return self._inv_cov
748
749    def reset_derived_stats(self):
750        self._cov = self._inv_cov = None
751        self._sqrt_cov = self._sqrt_inv_cov = self._pcs = None
752        self._log_det_cov = None
753
754    @property
755    def sqrt_cov(self):
756        '''Property method returning the matrix square root of the covariance.
757        If `C` is the covariance, then the returned value is a matrix `S`
758        such that S.dot(S) == C.
759        '''
760        if self._sqrt_cov is None:
761            pcs = self.principal_components
762            self._sqrt_cov = matrix_sqrt(eigs=(pcs.eigenvalues,
763                                               pcs.eigenvectors),
764                                         symmetric=True)
765        return self._sqrt_cov
766
767    @property
768    def sqrt_inv_cov(self):
769        '''Property method returning matrix square root of inverse of cov.
770        If `C` is the covariance, then the returned value is a matrix `S`
771        such that S.dot(S) == inv(C).
772        '''
773        if self._sqrt_inv_cov is None:
774            pcs = self.principal_components
775            self._sqrt_inv_cov = matrix_sqrt(eigs=(pcs.eigenvalues,
776                                                   pcs.eigenvectors),
777                                             symmetric=True,
778                                             inverse=True)
779        return self._sqrt_inv_cov
780
781    @property
782    def principal_components(self):
783        if self._pcs is None:
784            (evals, evecs) = np.linalg.eigh(self._cov)
785            self._pcs = PrincipalComponents(evals, evecs, self)
786        return self._pcs
787
788    @property
789    def log_det_cov(self):
790        if self._log_det_cov is None:
791            evals = self.principal_components.eigenvalues
792            self._log_det_cov = np.sum(np.log([v for v in evals if v > 0]))
793        return self._log_det_cov
794
795    def transform(self, xform):
796        '''Returns a version of the stats transformed by a linear transform.'''
797        if not isinstance(xform, LinearTransform):
798            raise TypeError('Expected a LinearTransform object.')
799        m = xform(self.mean)
800        C = xform._A.dot(self.cov).dot(xform._A.T)
801        return GaussianStats(mean=m, cov=C, nsamples=self.nsamples)
802
803    def get_whitening_transform(self):
804        '''Returns transform that centers and whitens data for these stats.'''
805        C_1 = np.linalg.inv(self.cov)
806        return LinearTransform(matrix_sqrt(C_1, True), pre=-self.mean)
807
808
809def calc_stats(image, mask=None, index=None, allow_nan=False):
810    '''Computes Gaussian stats for image data..
811
812    Arguments:
813
814        `image` (ndarrray, :class:`~spectral.Image`, or :class:`spectral.Iterator`):
815
816            If an ndarray, it should have shape `MxNxB` and the mean &
817            covariance will be calculated for each band (third dimension).
818
819        `mask` (ndarray):
820
821            If `mask` is specified, mean & covariance will be calculated for
822            all pixels indicated in the mask array.  If `index` is specified,
823            all pixels in `image` for which `mask == index` will be used;
824            otherwise, all nonzero elements of `mask` will be used.
825
826        `index` (int):
827
828            Specifies which value in `mask` to use to select pixels from
829            `image`. If not specified but `mask` is, then all nonzero elements
830            of `mask` will be used.
831
832        `allow_nan` (bool, default False):
833
834            If True, statistics will be computed even if `np.nan` values are
835            present in the data; otherwise, `~spectral.algorithms.spymath.NaNValueError`
836            is raised.
837
838        If neither `mask` nor `index` are specified, all samples in `vectors`
839        will be used.
840
841    Returns:
842
843        `GaussianStats` object:
844
845            This object will have members `mean`, `cov`, and `nsamples`.
846    '''
847    (mean, cov, N) = mean_cov(image, mask, index)
848    if has_nan(mean) and not allow_nan:
849        raise NaNValueError('NaN values present in data.')
850    return GaussianStats(mean=mean, cov=cov, nsamples=N)
851
852
853class TrainingClass:
854    def __init__(self, image, mask, index=0, class_prob=1.0):
855        '''Creates a new training class defined by applying `mask` to `image`.
856
857        Arguments:
858
859            `image` (:class:`spectral.Image` or :class:`numpy.ndarray`):
860
861                The `MxNxB` image over which the training class is defined.
862
863            `mask` (:class:`numpy.ndarray`):
864
865                An `MxN` array of integers that specifies which pixels in
866                `image` are associated with the class.
867
868            `index` (int) [default 0]:
869
870                if `index` == 0, all nonzero elements of `mask` are associated
871                with the class.  If `index` is nonzero, all elements of `mask`
872                equal to `index` are associated with the class.
873
874            `class_prob` (float) [default 1.0]:
875
876                Defines the prior probability associated with the class, which
877                is used in maximum likelihood classification.  If `classProb`
878                is 1.0, prior probabilities are ignored by classifiers, giving
879                all class equal weighting.
880        '''
881        self.image = image
882        if image is not None:
883            self.nbands = image.shape[2]
884        self.nbands = None
885        self.mask = mask
886        self.index = index
887        self.class_prob = class_prob
888        self.stats = None
889
890        self._stats_valid = False
891
892    def __iter__(self):
893        '''Returns an iterator over all samples for the class.'''
894        it = ImageMaskIterator(self.image, self.mask, self.index)
895        for i in it:
896            yield i
897
898    def stats_valid(self, tf=None):
899        '''
900        Sets statistics for the TrainingClass to be valid or invalid.
901
902        Arguments:
903
904            `tf` (bool or None):
905
906                A value evaluating to False indicates that statistics should be
907                recalculated prior to being used. If the argument is `None`,
908                a value will be returned indicating whether stats need to be
909                recomputed.
910        '''
911        if tf is None:
912            return self._stats_valid
913        self._stats_valid = tf
914
915    def size(self):
916        '''Returns the number of pixels/samples in the training set.'''
917
918        # If the stats are invalid, the number of pixels in the
919        # training set may have changed.
920        if self._stats_valid:
921            return self.stats.nsamples
922
923        if self.index:
924            return np.sum(np.equal(self.mask, self.index).ravel())
925        else:
926            return np.sum(np.not_equal(self.mask, 0).ravel())
927
928
929    def calc_stats(self):
930        '''
931        Calculates statistics for the class.
932
933        This function causes the :attr:`stats` attribute of the class to be
934        updated, where `stats` will have the following attributes:
935
936        =============  ======================   ===================================
937        Attribute      Type                          Description
938        =============  ======================   ===================================
939        `mean`         :class:`numpy.ndarray`   length-`B` mean vector
940        `cov`          :class:`numpy.ndarray`   `BxB` covariance matrix
941        `inv_cov`      :class:`numpy.ndarray`   Inverse of `cov`
942        `log_det_cov`  float                    Natural log of determinant of `cov`
943        =============  ======================   ===================================
944        '''
945        self.stats = calc_stats(self.image, self.mask, self.index)
946        self.nbands = self.image.shape[-1]
947        self._stats_valid = True
948
949    def transform(self, transform):
950        '''
951        Perform a linear transformation on the statistics of the training set.
952
953        Arguments:
954
955            `transform` (:class:numpy.ndarray or LinearTransform):
956
957                The linear transform array.  If the class has `B` bands, then
958                `transform` must have shape `(C,B)`.
959
960        After `transform` is applied, the class statistics will have `C` bands.
961        '''
962        if isinstance(transform, np.ndarray):
963            transform = LinearTransform(transform)
964        self.stats.mean = transform(self.stats.mean)
965        self.stats.cov = np.dot(
966            transform._A, self.stats.cov).dot(transform._A.T)
967        self.nbands = transform.dim_out
968
969
970class SampleIterator:
971    '''Iterator over all classes and samples in a TrainingClassSet object.'''
972    def __init__(self, trainingData):
973        self.classes = trainingData
974
975    def __iter__(self):
976        for cl in self.classes:
977            for sample in cl:
978                yield sample
979
980
981class TrainingClassSet:
982    '''A class to manage a set of :class:`~spectral.TrainingClass` objects.'''
983    def __init__(self):
984        self.classes = {}
985        self.nbands = None
986
987    def __getitem__(self, i):
988        '''Returns the training class having ID i.'''
989        return self.classes[i]
990
991    def __len__(self):
992        '''Returns number of training classes in the set.'''
993        return len(self.classes)
994
995    def add_class(self, cl):
996        '''Adds a new class to the training set.
997
998        Arguments:
999
1000            `cl` (:class:`spectral.TrainingClass`):
1001
1002                `cl.index` must not duplicate a class already in the set.
1003        '''
1004        if cl.index in self.classes:
1005            raise Exception('Attempting to add class with duplicate index.')
1006        self.classes[cl.index] = cl
1007        if not self.nbands:
1008            self.nbands = cl.nbands
1009
1010    def transform(self, X):
1011        '''Applies linear transform, M, to all training classes.
1012
1013        Arguments:
1014
1015            `X` (:class:numpy.ndarray):
1016
1017                The linear transform array.  If the classes have `B` bands, then
1018                `X` must have shape `(C,B)`.
1019
1020        After the transform is applied, all classes will have `C` bands.
1021        '''
1022        for cl in list(self.classes.values()):
1023            cl.transform(X)
1024        self.nbands = list(self.classes.values())[0].nbands
1025
1026    def __iter__(self):
1027        '''An iterator over all training classes in the set.'''
1028        for cl in list(self.classes.values()):
1029            yield cl
1030
1031    def all_samples(self):
1032        '''An iterator over all samples in all classes.'''
1033        return SampleIterator(self)
1034
1035    def calc_stats(self):
1036        '''Computes statistics for each class, if not already computed.'''
1037        for c in list(self.classes.values()):
1038            if not c.stats_valid():
1039                c.calc_stats()
1040        self.nbands = list(self.classes.values())[0].nbands
1041
1042    def save(self, filename, calc_stats=False):
1043        for c in list(self.classes.values()):
1044            if c.stats is None:
1045                if calc_stats == False:
1046                    msg = 'Class statistics are missing from at least one ' \
1047                      'class and are required to save the training class ' \
1048                      'data. Call the `save` method with keyword ' \
1049                      '`calc_stats=True` if you want to compute them and ' \
1050                      'then save the class data.'
1051                    raise Exception (msg)
1052                else:
1053                    c.calc_stats()
1054        f = open(filename, 'wb')
1055        ids = sorted(self.classes.keys())
1056        pickle.dump(self.classes[ids[0]].mask, f)
1057        pickle.dump(len(self), f)
1058        for id in ids:
1059            c = self.classes[id]
1060            pickle.dump(c.index, f)
1061            pickle.dump(c.stats.cov, f)
1062            pickle.dump(c.stats.mean, f)
1063            pickle.dump(c.stats.nsamples, f)
1064            pickle.dump(c.class_prob, f)
1065        f.close()
1066
1067    def load(self, filename, image):
1068        f = open(filename, 'rb')
1069        mask = pickle.load(f)
1070        nclasses = pickle.load(f)
1071        for i in range(nclasses):
1072            index = pickle.load(f)
1073            cov = pickle.load(f)
1074            mean = pickle.load(f)
1075            nsamples = pickle.load(f)
1076            class_prob = pickle.load(f)
1077            c = TrainingClass(image, mask, index, class_prob)
1078            c.stats = GaussianStats(mean=mean, cov=cov, nsamples=nsamples)
1079            if not (cov is None or mean is None or nsamples is None):
1080                c.stats_valid(True)
1081                c.nbands = len(mean)
1082            self.add_class(c)
1083        f.close
1084
1085
1086def create_training_classes(image, class_mask, calc_stats=False, indices=None):
1087    '''
1088    Creates a :class:spectral.algorithms.TrainingClassSet: from an indexed array.
1089
1090    USAGE:  sets = createTrainingClasses(classMask)
1091
1092    Arguments:
1093
1094        `image` (:class:`spectral.Image` or :class:`numpy.ndarray`):
1095
1096            The image data for which the training classes will be defined.
1097            `image` has shape `MxNxB`.
1098
1099        `class_mask` (:class:`numpy.ndarray`):
1100
1101            A rank-2 array whose elements are indices of various spectral
1102            classes.  if `class_mask[i,j]` == `k`, then `image[i,j]` is
1103            assumed to belong to class `k`.
1104
1105        `calc_stats` (bool):
1106
1107            An optional parameter which, if True, causes statistics to be
1108            calculated for all training classes.
1109
1110    Returns:
1111
1112        A :class:`spectral.algorithms.TrainingClassSet` object.
1113
1114    The dimensions of classMask should be the same as the first two dimensions
1115    of the corresponding image. Values of zero in classMask are considered
1116    unlabeled and are not added to a training set.
1117    '''
1118
1119    if indices is not None:
1120        class_indices = set(indices) - set((0,))
1121    else:
1122        class_indices = set(class_mask.ravel()) - set((0,))
1123    classes = TrainingClassSet()
1124    classes.nbands = image.shape[-1]
1125    for i in class_indices:
1126        cl = TrainingClass(image, class_mask, i)
1127        if calc_stats:
1128            cl.calc_stats()
1129        classes.add_class(cl)
1130    return classes
1131
1132
1133def ndvi(data, red, nir):
1134    '''Calculates Normalized Difference Vegetation Index (NDVI).
1135
1136    Arguments:
1137
1138        `data` (ndarray or :class:`spectral.Image`):
1139
1140            The array or SpyFile for which to calculate the index.
1141
1142        `red` (int or int range):
1143
1144            Index of the red band or an index range for multiple bands.
1145
1146        `nir` (int or int range):
1147
1148            An integer index of the near infrared band or an index range for
1149            multiple bands.
1150
1151    Returns an ndarray:
1152
1153        An array containing NDVI values in the range [-1.0, 1.0] for each
1154        corresponding element of data.
1155    '''
1156
1157    r = data[:, :, red].astype(float)
1158    if len(r.shape) == 3 and r.shape[2] > 1:
1159        r = sum(r, 2) / r.shape[2]
1160    n = data[:, :, nir].astype(float)
1161    if len(n.shape) == 3 and n.shape[2] > 1:
1162        n = sum(n, 2) / n.shape[2]
1163
1164    return (n - r) / (n + r)
1165
1166
1167def bdist(class1, class2):
1168    '''
1169    Calulates the Bhattacharyya distance between two classes.
1170
1171    USAGE:  bd = bdist(class1, class2)
1172
1173    Arguments:
1174
1175        `class1`, `class2` (:class:`~spectral.algorithms.algorithms.TrainingClass`)
1176
1177    Returns:
1178
1179        A float value for the Bhattacharyya Distance between the classes.  This
1180        function is aliased to :func:`~spectral.algorithms.algorithms.bDistance`.
1181
1182    References:
1183
1184        Richards, J.A. & Jia, X. Remote Sensing Digital Image Analysis: An
1185        Introduction. (Springer: Berlin, 1999).
1186    '''
1187    terms = bdist_terms(class1, class2)
1188    return terms[0] + terms[1]
1189
1190bDistance = bdist
1191
1192
1193def bdist_terms(a, b):
1194    '''
1195    Calulate the linear and quadratic terms of the Bhattacharyya distance
1196    between two classes.
1197
1198    USAGE:  (linTerm, quadTerm) = bDistanceTerms(a, b)
1199
1200    ARGUMENTS:
1201        (a, b)              The classes for which to determine the
1202                            B-distance.
1203    RETURN VALUE:
1204                            A 2-tuple of the linear and quadratic terms
1205    '''
1206    m = a.stats.mean - b.stats.mean
1207    avgCov = (a.stats.cov + b.stats.cov) / 2
1208
1209    lin_term = (1 / 8.) * np.dot(np.transpose(m), np.dot(np.inv(avgCov), m))
1210
1211    quad_term = 0.5 * (log_det(avgCov)
1212                       - 0.5 * a.stats.log_det_cov
1213                       - 0.5 * b.stats.log_det_cov)
1214
1215    return (lin_term, float(quad_term))
1216
1217
1218def transform_image(matrix, image):
1219    '''
1220    Performs linear transformation on all pixels in an image.
1221
1222    Arguments:
1223
1224        matrix (:class:`numpy.ndarray`):
1225
1226            A `CxB` linear transform to apply.
1227
1228        image  (:class:`numpy.ndarray` or :class:`spectral.Image`):
1229
1230            Image data to transform
1231
1232    Returns:
1233
1234        If `image` is an `MxNxB` :class:`numpy.ndarray`, the return will be a
1235        transformed :class:`numpy.ndarray` with shape `MxNxC`.  If `image` is
1236        :class:`spectral.Image`, the returned object will be a
1237        :class:`spectral.TransformedImage` object and no transformation of data
1238        will occur until elements of the object are accessed.
1239    '''
1240    if isinstance(image, SpyFile):
1241        return TransformedImage(matrix, image)
1242    elif isinstance(image, np.ndarray):
1243        (M, N, B) = image.shape
1244        ximage = np.zeros((M, N, matrix.shape[0]), float)
1245
1246        for i in range(M):
1247            for j in range(N):
1248                ximage[i, j] = np.dot(matrix, image[i, j].astype(float))
1249        return ximage
1250    else:
1251        raise 'Unrecognized image type passed to transform_image.'
1252
1253
1254def orthogonalize(vecs, start=0):
1255    '''
1256    Performs Gram-Schmidt Orthogonalization on a set of vectors.
1257
1258    Arguments:
1259
1260        `vecs` (:class:`numpy.ndarray`):
1261
1262            The set of vectors for which an orthonormal basis will be created.
1263            If there are `C` vectors of length `B`, `vecs` should be `CxB`.
1264
1265        `start` (int) [default 0]:
1266
1267            If `start` > 0, then `vecs[start]` will be assumed to already be
1268            orthonormal.
1269
1270    Returns:
1271
1272        A new `CxB` containing an orthonormal basis for the given vectors.
1273    '''
1274    (M, N) = vecs.shape
1275    basis = np.array(np.transpose(vecs))
1276    eye = np.identity(N).astype(float)
1277    for i in range(start, M):
1278        if i == 0:
1279            basis[:, 0] /= np.linalg.norm(basis[:, 0])
1280            continue
1281        v = basis[:, i] / np.linalg.norm(basis[:, i])
1282        U = basis[:, :i]
1283        P = eye - U.dot(np.linalg.inv(U.T.dot(U)).dot(U.T))
1284        basis[:, i] = P.dot(v)
1285        basis[:, i] /= np.linalg.norm(basis[:, i])
1286    return np.transpose(basis)
1287
1288
1289def unmix(data, members):
1290    '''
1291    Perform linear unmixing on image data.
1292
1293    USAGE: mix = unmix(data, members)
1294
1295    ARGUMENTS:
1296        data                The MxNxB image data to be unmixed
1297        members             An CxB array of C endmembers
1298    RETURN VALUE:
1299        mix                 An MxNxC array of endmember fractions.
1300
1301    unmix performs linear unmixing on the image data.  After calling the
1302    function, mix[:,:,i] will then represent the fractional abundances
1303    for the i'th endmember. If the result of unmix is returned into 'mix',
1304    then an array of indices of greatest fractional endmembers is obtained
1305    by argmax(mix).
1306
1307    Note that depending on endmembers given, fractional abundances for
1308    endmembers may be negative.
1309    '''
1310    assert members.shape[1] == data.shape[2], \
1311        'Matrix dimensions are not aligned.'
1312
1313    members = members.astype(float)
1314    # Calculate the pseudo inverse
1315    pi = np.dot(members, np.transpose(members))
1316    pi = np.dot(np.inv(pi), members)
1317
1318    (M, N, B) = data.shape
1319    unmixed = np.zeros((M, N, members.shape[0]), float)
1320    for i in range(M):
1321        for j in range(N):
1322            unmixed[i, j] = np.dot(pi, data[i, j].astype(float))
1323    return unmixed
1324
1325
1326def spectral_angles(data, members):
1327    '''Calculates spectral angles with respect to given set of spectra.
1328
1329    Arguments:
1330
1331        `data` (:class:`numpy.ndarray` or :class:`spectral.Image`):
1332
1333            An `MxNxB` image for which spectral angles will be calculated.
1334
1335        `members` (:class:`numpy.ndarray`):
1336
1337            `CxB` array of spectral endmembers.
1338
1339    Returns:
1340
1341        `MxNxC` array of spectral angles.
1342
1343
1344    Calculates the spectral angles between each vector in data and each of the
1345    endmembers.  The output of this function (angles) can be used to classify
1346    the data by minimum spectral angle by calling argmin(angles).
1347    '''
1348    assert members.shape[1] == data.shape[2], \
1349        'Matrix dimensions are not aligned.'
1350
1351    m = np.array(members, np.float64)
1352    m /= np.sqrt(np.einsum('ij,ij->i', m, m))[:, np.newaxis]
1353
1354    norms = np.sqrt(np.einsum('ijk,ijk->ij', data, data))
1355    dots = np.einsum('ijk,mk->ijm', data, m)
1356    dots = np.clip(dots / norms[:, :, np.newaxis], -1, 1)
1357    return np.arccos(dots)
1358
1359def msam(data, members):
1360    '''Modified SAM scores according to Oshigami, et al [1]. Endmembers are
1361    mean-subtracted prior to spectral angle calculation. Results are
1362    normalized such that the maximum value of 1 corresponds to a perfect match
1363    (zero spectral angle).
1364
1365    Arguments:
1366
1367        `data` (:class:`numpy.ndarray` or :class:`spectral.Image`):
1368
1369            An `MxNxB` image for which spectral angles will be calculated.
1370
1371        `members` (:class:`numpy.ndarray`):
1372
1373            `CxB` array of spectral endmembers.
1374
1375    Returns:
1376
1377        `MxNxC` array of MSAM scores with maximum value of 1 corresponding
1378        to a perfect match (zero spectral angle).
1379
1380    Calculates the spectral angles between each vector in data and each of the
1381    endmembers.  The output of this function (angles) can be used to classify
1382    the data by minimum spectral angle by calling argmax(angles).
1383
1384    References:
1385
1386    [1] Shoko Oshigami, Yasushi Yamaguchi, Tatsumi Uezato, Atsushi Momose,
1387    Yessy Arvelyna, Yuu Kawakami, Taro Yajima, Shuichi Miyatake, and
1388    Anna Nguno. 2013. Mineralogical mapping of southern Namibia by application
1389    of continuum-removal MSAM method to the HyMap data. Int. J. Remote Sens.
1390    34, 15 (August 2013), 5282-5295.
1391    '''
1392    # The modifications to the `spectral_angles` function were contributed by
1393    # Christian Mielke.
1394
1395    assert members.shape[1] == data.shape[2], \
1396        'Matrix dimensions are not aligned.'
1397
1398    (M, N, B) = data.shape
1399    m = np.array(members, np.float64)
1400    C = m.shape[0]
1401
1402    # Normalize endmembers
1403    for i in range(C):
1404        # Fisher z trafo type operation
1405        m[i] -= np.mean(m[i])
1406        m[i] /= np.sqrt(m[i].dot(m[i]))
1407
1408    angles = np.zeros((M, N, C), np.float64)
1409
1410    for i in range(M):
1411        for j in range(N):
1412            #Fisher z trafo type operation
1413            v = data[i, j] - np.mean(data[i, j])
1414            v /= np.sqrt(v.dot(v))
1415            v = np.clip(v, -1, 1)
1416            for k in range(C):
1417                # Calculate Mineral Index according to Oshigami et al.
1418                # (Intnl. J. of Remote Sens. 2013)
1419                a = np.clip(v.dot(m[k]), -1, 1)
1420                angles[i,j,k]= 1.0 - np.arccos(a) / (math.pi / 2)
1421    return angles
1422
1423def noise_from_diffs(X, direction='lowerright'):
1424    '''Estimates noise statistcs by taking differences of adjacent pixels.
1425
1426    Arguments:
1427
1428        `X` (np.ndarray):
1429
1430            The data from which to estimage noise statistics. `X` should have
1431            shape `(nrows, ncols, nbands`).
1432
1433        `direction` (str, default "lowerright"):
1434
1435            The pixel direction along which to calculate pixel differences.
1436            Must be one of the following:
1437
1438                'lowerright':
1439                    Take difference with pixel diagonally to lower right
1440                'lowerleft':
1441                     Take difference with pixel diagonally to lower right
1442                'right':
1443                    Take difference with pixel to the right
1444                'lower':
1445                    Take differenece with pixel below
1446
1447    Returns a :class:`~spectral.algorithms.algorithms.GaussianStats` object.
1448    '''
1449    if direction.lower() not in ['lowerright', 'lowerleft', 'right', 'lower']:
1450        raise ValueError('Invalid `direction` value.')
1451    if direction == 'lowerright':
1452        deltas = X[:-1, :-1, :] - X[1:, 1:, :]
1453    elif direction == 'lowerleft':
1454        deltas = X[:-1, 1:, :] - X[1:, :-1, :]
1455    elif direction == 'right':
1456        deltas = X[:, :-1, :] - X[:, 1:, :]
1457    else:
1458        deltas = X[:-1, :, :] - X[1:, :, :]
1459
1460    stats = calc_stats(deltas)
1461    stats.cov /= 2.0
1462    return stats
1463
1464class MNFResult(object):
1465    '''Result object returned by :func:`~spectral.algorithms.algorithms.mnf`.
1466
1467    This object contains data associates with a Minimum Noise Fraction
1468    calculation, including signal and noise statistics, as well as the
1469    Noise-Adjusted Principal Components (NAPC). This object can be used to
1470    denoise image data or to reduce its dimensionality.
1471    '''
1472    def __init__(self, signal, noise, napc):
1473        '''
1474        Arguments:
1475
1476            `signal` (:class:`~spectral.GaussianStats`):
1477
1478                Signal statistics
1479
1480            `noise` (:class:`~spectral.GaussianStats`):
1481
1482                Noise statistics
1483
1484            `napc` (:class:`~spectral.PrincipalComponents`):
1485
1486                Noise-Adjusted Pricipal Components
1487        '''
1488        self.signal = signal
1489        self.noise = noise
1490        self.napc = napc
1491
1492    def _num_from_kwargs(self, **kwargs):
1493        '''Returns number of components to retain for the given kwargs.'''
1494        for key in kwargs:
1495            if key not in ('num', 'snr'):
1496                raise Exception('Keyword not recognized.')
1497        num = kwargs.get('num', None)
1498        snr = kwargs.get('snr', None)
1499        if num == snr == None:
1500            raise Exception('Must specify either `num` or `snr` keyword.')
1501        if None not in (num, snr):
1502            raise Exception('Can not specify both `num` and `snr` keywords.')
1503        if snr is not None:
1504            num = self.num_with_snr(snr)
1505        return num
1506
1507    def denoise(self, X, **kwargs):
1508        '''Returns a de-noised version of `X`.
1509
1510        Arguments:
1511
1512            `X` (np.ndarray):
1513
1514                Data to be de-noised. Can be a single pixel or an image.
1515
1516        One (and only one) of the following keywords must be specified:
1517
1518            `num` (int):
1519
1520                Number of Noise-Adjusted Principal Components to retain.
1521
1522            `snr` (float):
1523
1524                Threshold signal-to-noise ratio (SNR) to retain.
1525
1526        Returns denoised image data with same shape as `X`.
1527
1528        Note that calling this method is equivalent to calling the
1529        `get_denoising_transform` method with same keyword and applying the
1530        returned transform to `X`. If you only intend to denoise data with the
1531        same parameters multiple times, then it is more efficient to get the
1532        denoising transform and reuse it, rather than calling this method
1533        multilple times.
1534        '''
1535        f = self.get_denoising_transform(**kwargs)
1536        return f(X)
1537
1538    def get_denoising_transform(self, **kwargs):
1539        '''Returns a function for denoising image data.
1540
1541        One (and only one) of the following keywords must be specified:
1542
1543            `num` (int):
1544
1545                Number of Noise-Adjusted Principal Components to retain.
1546
1547            `snr` (float):
1548
1549                Threshold signal-to-noise ratio (SNR) to retain.
1550
1551        Returns a callable :class:`~spectral.algorithms.transforms.LinearTransform`
1552        object for denoising image data.
1553        '''
1554        N = self._num_from_kwargs(**kwargs)
1555        V = self.napc.eigenvectors
1556        Vr = np.array(V)
1557        Vr[:, N:] = 0.
1558        f = LinearTransform(self.noise.sqrt_cov.dot(Vr).dot(V.T) \
1559			    .dot(self.noise.sqrt_inv_cov),
1560                            pre=-self.signal.mean,
1561                            post=self.signal.mean)
1562        return f
1563
1564    def reduce(self, X, **kwargs):
1565        '''Reduces dimensionality of image data.
1566
1567        Arguments:
1568
1569            `X` (np.ndarray):
1570
1571                Data to be reduced. Can be a single pixel or an image.
1572
1573        One (and only one) of the following keywords must be specified:
1574
1575            `num` (int):
1576
1577                Number of Noise-Adjusted Principal Components to retain.
1578
1579            `snr` (float):
1580
1581                Threshold signal-to-noise ratio (SNR) to retain.
1582
1583        Returns a verions of `X` with reduced dimensionality.
1584
1585        Note that calling this method is equivalent to calling the
1586        `get_reduction_transform` method with same keyword and applying the
1587        returned transform to `X`. If you intend to denoise data with the
1588        same parameters multiple times, then it is more efficient to get the
1589        reduction transform and reuse it, rather than calling this method
1590        multilple times.
1591        '''
1592        f = self.get_reduction_transform(**kwargs)
1593        return f(X)
1594
1595    def get_reduction_transform(self, **kwargs):
1596        '''Reduces dimensionality of image data.
1597
1598        One (and only one) of the following keywords must be specified:
1599
1600            `num` (int):
1601
1602                Number of Noise-Adjusted Principal Components to retain.
1603
1604            `snr` (float):
1605
1606                Threshold signal-to-noise ratio (SNR) to retain.
1607
1608        Returns a callable :class:`~spectral.algorithms.transforms.LinearTransform`
1609        object for reducing the dimensionality of image data.
1610        '''
1611        N = self._num_from_kwargs(**kwargs)
1612        V = self.napc.eigenvectors
1613        f = LinearTransform(V[:, :N].T.dot(self.noise.sqrt_inv_cov),
1614                            pre=-self.signal.mean)
1615        return f
1616
1617    def num_with_snr(self, snr):
1618        '''Returns the number of components with SNR >= `snr`.'''
1619        return np.sum(self.napc.eigenvalues >= (snr + 1))
1620
1621def mnf(signal, noise):
1622    '''Computes Minimum Noise Fraction / Noise-Adjusted Principal Components.
1623
1624    Arguments:
1625
1626        `signal` (:class:`~spectral.algorithms.algorithms.GaussianStats`):
1627
1628            Estimated signal statistics
1629
1630        `noise` (:class:`~spectral.algorithms.algorithms.GaussianStats`):
1631
1632            Estimated noise statistics
1633
1634    Returns an :class:`~spectral.algorithms.algorithms.MNFResult` object,
1635    containing the Noise-Adjusted Principal Components (NAPC) and methods for
1636    denoising or reducing dimensionality of associated data.
1637
1638    The Minimum Noise Fraction (MNF) is similar to the Principal Components
1639    transformation with the difference that the Principal Components associated
1640    with the MNF are ordered by descending signal-to-noise ratio (SNR) rather
1641    than overall image variance. Note that the eigenvalues of the NAPC are
1642    equal to one plus the SNR in the transformed space (since noise has
1643    whitened unit variance in the NAPC coordinate space).
1644
1645    Example:
1646
1647        >>> data = open_image('92AV3C.lan').load()
1648        >>> signal = calc_stats(data)
1649        >>> noise = noise_from_diffs(data[117: 137, 85: 122, :])
1650        >>> mnfr = mnf(signal, noise)
1651
1652        >>> # De-noise the data by eliminating NAPC components where SNR < 10.
1653        >>> # The de-noised data will be in the original coordinate space (at
1654        >>> # full dimensionality).
1655        >>> denoised = mnfr.denoise(data, snr=10)
1656
1657        >>> # Reduce dimensionality, retaining NAPC components where SNR >= 10.
1658        >>> reduced = mnfr.reduce(data, snr=10)
1659
1660        >>> # Reduce dimensionality, retaining top 50 NAPC components.
1661        >>> reduced = mnfr.reduce(data, num=50)
1662
1663    References:
1664
1665        Lee, James B., A. Stephen Woodyatt, and Mark Berman. "Enhancement of
1666        high spectral resolution remote-sensing data by a noise-adjusted
1667        principal components transform." Geoscience and Remote Sensing, IEEE
1668        Transactions on 28.3 (1990): 295-304.
1669    '''
1670    C = noise.sqrt_inv_cov.dot(signal.cov).dot(noise.sqrt_inv_cov)
1671    (L, V) = np.linalg.eig(C)
1672    # numpy says eigenvalues may not be sorted so we'll sort them, if needed.
1673    if not np.alltrue(np.diff(L) <= 0):
1674        ii = list(reversed(np.argsort(L)))
1675        L = L[ii]
1676        V = V[:, ii]
1677    wstats = GaussianStats(mean=np.zeros_like(L), cov=C)
1678    napc = PrincipalComponents(L, V, wstats)
1679    return MNFResult(signal, noise, napc)
1680
1681def ppi(X, niters, threshold=0, centered=False, start=None, display=0,
1682        **imshow_kwargs):
1683    '''Returns pixel purity indices for an image.
1684
1685    Arguments:
1686
1687        `X` (ndarray):
1688
1689            Image data for which to calculate pixel purity indices
1690
1691        `niters` (int):
1692
1693            Number of iterations to perform. Each iteration corresponds to a
1694            projection of the image data onto a random unit vector.
1695
1696        `threshold` (numeric):
1697
1698            If this value is zero, only the two most extreme pixels will have
1699            their indices incremented for each random vector. If the value is
1700            greater than zero, then all pixels whose projections onto the
1701            random vector are with `threshold` data units of either of the two
1702            extreme pixels will also have their indices incremented.
1703
1704        `centered` (bool):
1705
1706            If True, then the pixels in X are assumed to have their mean
1707            already subtracted; otherwise, the mean of `X` will be computed
1708            and subtracted prior to computing the purity indices.
1709
1710        `start` (ndarray):
1711
1712            An optional array of initial purity indices. This can be used to
1713            continue computing PPI values after a previous call to `ppi` (i.e.,
1714            set `start` equal to the return value from a previou call to `ppi`.
1715            This should be an integer-valued array whose dimensions are equal
1716            to the first two dimensions of `X`.
1717
1718        `display` (integer):
1719
1720            If set to a postive integer, a :class:`~spectral.graphics.spypylab.ImageView`
1721            window will be opened and dynamically display PPI values as the
1722            function iterates. The value specifies the number of PPI iterations
1723            between display updates. It is recommended to use a value around
1724            100 or higher. If the `stretch` keyword (see :func:`~spectral.graphics.graphics.get_rgb`
1725            for meaning) is not provided, a default stretch of (0.99, 0.999)
1726            is used.
1727
1728    Return value:
1729
1730        An ndarray of integers that represent the pixel purity indices of the
1731        input image. The return array will have dimensions equal to the first
1732        two dimensions of the input image.
1733
1734    Keyword Arguments:
1735
1736        Any keyword accepted by :func:`~spectral.graphics.spypylab.imshow`.
1737        These keywords will be passed to the image display and only have an
1738        effect if the `display` argument is nonzero.
1739
1740    This function can be interruped with a KeyboardInterrupt (ctrl-C), in which
1741    case, the most recent value of the PPI array will be returned. This can be
1742    used in conjunction with the `display` argument to view the progression of
1743    the PPI values until they appear stable, then terminate iteration using
1744    ctrl-C.
1745
1746    References:
1747
1748    Boardman J.W., Kruse F.A, and Green R.O., "Mapping Target Signatures via
1749    Partial Unmixing of AVIRIS Data," Pasadena, California, USA, 23 Jan 1995,
1750    URI: http://hdl.handle.net/2014/33635
1751    '''
1752    if display is not None:
1753        if not isinstance(display, Integral) or isinstance(display, bool) or \
1754            display < 0:
1755            msg = '`display` argument must be a non-negative integer.'
1756            raise ValueError(msg)
1757
1758    if not centered:
1759        stats = calc_stats(X)
1760        X = X - stats.mean
1761
1762    shape = X.shape
1763    X = X.reshape(-1, X.shape[-1])
1764    nbands = X.shape[-1]
1765
1766    fig = None
1767    updating = False
1768
1769    if start is not None:
1770        counts = np.array(start.ravel())
1771    else:
1772        counts = np.zeros(X.shape[0], dtype=np.uint32)
1773
1774    if 'stretch' not in imshow_kwargs:
1775        imshow_kwargs['stretch'] = (0.99, 0.999)
1776
1777    msg = 'Running {0} pixel purity iterations...'.format(niters)
1778    spy._status.display_percentage(msg)
1779
1780    try:
1781        for i in range(niters):
1782            r = np.random.rand(nbands) - 0.5
1783            r /= np.sqrt(np.sum(r * r))
1784            s = X.dot(r)
1785            imin = np.argmin(s)
1786            imax = np.argmax(s)
1787
1788            updating = True
1789            if threshold == 0:
1790                # Only the two extreme pixels are incremented
1791                counts[imin] += 1
1792                counts[imax] += 1
1793            else:
1794                # All pixels within threshold distance from the two extremes
1795                counts[s >= (s[imax] - threshold)] += 1
1796                counts[s <= (s[imin] + threshold)] += 1
1797            updating = False
1798
1799            if display > 0 and (i + 1) % display == 0:
1800                if fig is not None:
1801                    fig.set_data(counts.reshape(shape[:2]), **imshow_kwargs)
1802                else:
1803                    fig = spy.imshow(counts.reshape(shape[:2]), **imshow_kwargs)
1804                fig.set_title('PPI ({} iterations)'.format(i + 1))
1805
1806            if not (i + 1) % 10:
1807                spy._status.update_percentage(100 * (i + 1) / niters)
1808
1809    except KeyboardInterrupt:
1810        spy._status.end_percentage('interrupted')
1811        if not updating:
1812            msg = 'KeyboardInterrupt received. Returning pixel purity ' \
1813              'values after {0} iterations.'.format(i)
1814            spy._status.write(msg)
1815            return counts.reshape(shape[:2])
1816        else:
1817            msg = 'KeyboardInterrupt received during array update. PPI ' \
1818              'values may be corrupt. Returning None'
1819            spy._status.write(msg)
1820            return None
1821
1822    spy._status.end_percentage()
1823
1824    return counts.reshape(shape[:2])
1825
1826
1827def smacc(spectra, min_endmembers=None, max_residual_norm=float('Inf')):
1828    '''Returns SMACC decomposition (H = F * S + R) matrices for an image or
1829    array of spectra.
1830
1831    Let `H` be matrix of shape NxB, where B is number of bands, and N number of
1832    spectra, then if `spectra` is of the same shape, `H` will be equal to `spectra`.
1833    Otherwise, `spectra` is assumed to be 3D spectral image, and it is reshaped
1834    to match shape of `H`.
1835
1836    Arguments:
1837
1838        `spectra` (ndarray):
1839
1840            Image data for which to calculate SMACC decomposition matrices.
1841
1842        `min_endmembers` (int):
1843
1844            Minimal number of endmembers to find. Defaults to rank of `H`,
1845            computed numerically with `numpy.linalg.matrix_rank`.
1846
1847        `max_residual_norm`:
1848
1849            Maximum value of residual vectors' norms. Algorithm will keep finding
1850            new endmembers until max value of residual norms is less than this
1851            argument. Defaults to float('Inf')
1852
1853    Returns:
1854        3 matrices, S, F and R, such that H = F * S + R (but it might not always hold).
1855        F is matrix of expansion coefficients of shape N x num_endmembers.
1856        All values of F are equal to, or greater than zero.
1857        S is matrix of endmember spectra, extreme vectors, of shape num_endmembers x B.
1858        R is matrix of residuals of same shape as H (N x B).
1859
1860        If values of H are large (few tousands), H = F * S + R, might not hold,
1861        because of numeric errors. It is advisable to scale numbers, by dividing
1862        by 10000, for example. Depending on how accurate you want it to be,
1863        you can check if H is really strictly equal to F * S + R,
1864        and adjust R: R = H - np.matmul(F, S).
1865
1866    References:
1867
1868        John H. Gruninger, Anthony J. Ratkowski, and Michael L. Hoke "The sequential
1869        maximum angle convex cone (SMACC) endmember model", Proc. SPIE 5425, Algorithms
1870        and Technologies for Multispectral, Hyperspectral, and Ultraspectral Imagery X,
1871        (12 August 2004); https://doi.org/10.1117/12.543794
1872    '''
1873    # Indices of vectors in S.
1874    q = []
1875
1876    H = spectra if len(spectra.shape) == 2 else spectra.reshape(
1877        (spectra.shape[0] * spectra.shape[1], spectra.shape[2]))
1878    R = H
1879    Fs = []
1880
1881    F = None
1882    S = None
1883
1884    if min_endmembers is None:
1885        min_endmembers = np.linalg.matrix_rank(H)
1886
1887    # Add the longest vector to q.
1888    residual_norms = np.sqrt(np.einsum('ij,ij->i', H, H))
1889    current_max_residual_norm = np.max(residual_norms)
1890
1891    if max_residual_norm is None:
1892        max_residual_norm = current_max_residual_norm / min_endmembers
1893
1894    while len(q) < min_endmembers or current_max_residual_norm > max_residual_norm:
1895        q.append(np.argmax(residual_norms))
1896        n = len(q) - 1
1897        # Current basis vector.
1898        w = R[q[n]]
1899        # Temporary to be used for projection calculation.
1900        wt = w / (np.dot(w, w))
1901        # Calculate projection coefficients.
1902        On = np.dot(R, wt)
1903        alpha = np.ones(On.shape, dtype=np.float64)
1904        # Make corrections to satisfy convex cone conditions.
1905        # First correct alphas for oblique projection when needed.
1906        for k in range(len(Fs)):
1907            t = On * Fs[k][q[n]]
1908            # This is not so important for the algorithm itself.
1909            # These values correpond to values where On == 0.0, and these
1910            # will be zeroed out below. But to avoid divide-by-zero warning
1911            # we set small values instead of zero.
1912            t[t == 0.0] = 1e-10
1913            np.minimum(Fs[k]/t, alpha, out=alpha)
1914        # Clip negative projection coefficients.
1915        alpha[On <= 0.0] = 0.0
1916        # Current extreme vector should always be removed completely.
1917        alpha[q[n]] = 1.0
1918        # Calculate oblique projection coefficients.
1919        Fn = alpha * On
1920        # Correction for numerical stability.
1921        Fn[Fn <= 0.0] = 0.0
1922        # Remove projection to current basis from R.
1923        R = R - np.outer(Fn, w)
1924        # Update projection coefficients.
1925        for k in range(len(Fs)):
1926            Fs[k] -= Fs[k][q[n]] * Fn
1927            # Correction because of numerical problems.
1928            Fs[k][Fs[k] <= 0.0] = 0.0
1929
1930        # Add new Fn.
1931        Fs.append(Fn)
1932
1933        residual_norms[:] = np.sqrt(np.einsum('ij,ij->i', R, R))
1934        current_max_residual_norm = np.max(residual_norms)
1935        print('Found {0} endmembers, current max residual norm is {1:.4f}\r'
1936            .format(len(q), current_max_residual_norm), end='')
1937
1938    # Correction as suggested in the SMACC paper.
1939    for k, s in enumerate(q):
1940        Fs[k][q] = 0.0
1941        Fs[k][s] = 1.0
1942
1943    F = np.array(Fs).T
1944    S = H[q]
1945
1946    # H = F * S + R
1947    return S, F, R
1948