1__docformat__ = "restructuredtext en"
2
3import sys as _sys
4import mdp
5from mdp import Node, NodeException, numx, numx_rand
6from mdp.nodes import WhiteningNode
7from mdp.utils import (DelayCovarianceMatrix, MultipleCovarianceMatrices,
8                       rotate, mult)
9
10
11# TODO: support floats of size different than 64-bit; will need to change SQRT_EPS_D
12
13# rename often used functions
14sum, cos, sin, PI = numx.sum, numx.cos, numx.sin, numx.pi
15SQRT_EPS_D = numx.sqrt(numx.finfo('d').eps)
16
17def _triu(m, k=0):
18    """ returns the elements on and above the k-th diagonal of m.  k=0 is the
19        main diagonal, k > 0 is above and k < 0 is below the main diagonal."""
20    N = m.shape[0]
21    M = m.shape[1]
22    x = numx.greater_equal(numx.subtract.outer(numx.arange(N),
23                                               numx.arange(M)),1-k)
24    out = (1-x)*m
25    return out
26
27#############
28class ISFANode(Node):
29    """
30    Perform Independent Slow Feature Analysis on the input data.
31
32    **Internal variables of interest**
33
34      ``self.RP``
35          The global rotation-permutation matrix. This is the filter
36          applied on input_data to get output_data
37
38      ``self.RPC``
39          The *complete* global rotation-permutation matrix. This
40          is a matrix of dimension input_dim x input_dim (the 'outer space'
41          is retained)
42
43      ``self.covs``
44          A `mdp.utils.MultipleCovarianceMatrices` instance containing
45          the current time-delayed covariance matrices of the input_data.
46          After convergence the uppermost ``output_dim`` x ``output_dim``
47          submatrices should be almost diagonal.
48
49          ``self.covs[n-1]`` is the covariance matrix relative to the ``n``-th
50          time-lag
51
52          Note: they are not cleared after convergence. If you need to free
53          some memory, you can safely delete them with::
54
55              >>> del self.covs
56
57      ``self.initial_contrast``
58          A dictionary with the starting contrast and the SFA and ICA parts of
59          it.
60
61      ``self.final_contrast``
62          Like the above but after convergence.
63
64    Note: If you intend to use this node for large datasets please have
65    a look at the ``stop_training`` method documentation for
66    speeding things up.
67
68    References:
69    Blaschke, T. , Zito, T., and Wiskott, L. (2007).
70    Independent Slow Feature Analysis and Nonlinear Blind Source Separation.
71    Neural Computation 19(4):994-1021 (2007)
72    http://itb.biologie.hu-berlin.de/~wiskott/Publications/BlasZitoWisk2007-ISFA-NeurComp.pdf
73    """
74    def __init__(self, lags=1, sfa_ica_coeff=(1., 1.), icaweights=None,
75                 sfaweights=None, whitened=False, white_comp = None,
76                 white_parm = None, eps_contrast=1e-6, max_iter=10000,
77                 RP=None, verbose=False, input_dim=None, output_dim=None,
78                 dtype=None):
79        """
80        Perform Independent Slow Feature Analysis.
81
82        The notation is the same used in the paper by Blaschke et al. Please
83        refer to the paper for more information.
84
85        :Parameters:
86          lags
87            list of time-lags to generate the time-delayed covariance
88            matrices (in the paper this is the set of \tau). If
89            lags is an integer, time-lags 1,2,...,'lags' are used.
90            Note that time-lag == 0 (instantaneous correlation) is
91            always implicitly used.
92
93          sfa_ica_coeff
94            a list of float with two entries, which defines the
95            weights of the SFA and ICA part of the objective
96            function. They are called b_{SFA} and b_{ICA} in the
97            paper.
98
99          sfaweights
100            weighting factors for the covariance matrices relative
101            to the SFA part of the objective function (called
102            \kappa_{SFA}^{\tau} in the paper). Default is
103            [1., 0., ..., 0.]
104            For possible values see the description of icaweights.
105
106          icaweights
107            weighting factors for the cov matrices relative
108            to the ICA part of the objective function (called
109            \kappa_{ICA}^{\tau} in the paper). Default is 1.
110            Possible values are:
111
112            - an integer ``n``: all matrices are weighted the same
113              (note that it does not make sense to have ``n != 1``)
114
115            - a list or array of floats of ``len == len(lags)``:
116              each element of the list is used for weighting the
117              corresponding matrix
118
119            - ``None``: use the default values.
120
121          whitened
122            ``True`` if input data is already white, ``False``
123            otherwise (the data will be whitened internally).
124
125          white_comp
126            If whitened is false, you can set ``white_comp`` to the
127            number of whitened components to keep during the
128            calculation (i.e., the input dimensions are reduced to
129            ``white_comp`` by keeping the components of largest variance).
130          white_parm
131            a dictionary with additional parameters for whitening.
132            It is passed directly to the WhiteningNode constructor.
133            Ex: white_parm = { 'svd' : True }
134
135          eps_contrast
136            Convergence is achieved when the relative
137            improvement in the contrast is below this threshold.
138            Values in the range [1E-4, 1E-10] are usually
139            reasonable.
140
141          max_iter
142            If the algorithms does not achieve convergence within
143            max_iter iterations raise an Exception. Should be
144            larger than 100.
145
146          RP
147            Starting rotation-permutation matrix. It is an
148            input_dim x input_dim matrix used to initially rotate the
149            input components. If not set, the identity matrix is used.
150            In the paper this is used to start the algorithm at the
151            SFA solution (which is often quite near to the optimum).
152
153          verbose
154            print progress information during convergence. This can
155            slow down the algorithm, but it's the only way to see
156            the rate of improvement and immediately spot if something
157            is going wrong.
158
159          output_dim
160            sets the number of independent components that have to
161            be extracted. Note that if this is not smaller than
162            input_dim, the problem is solved linearly and SFA
163            would give the same solution only much faster.
164        """
165        # check that the "lags" argument has some meaningful value
166        if isinstance(lags, (int, long)):
167            lags = range(1, lags+1)
168        elif isinstance(lags, (list, tuple)):
169            lags = numx.array(lags, "i")
170        elif isinstance(lags, numx.ndarray):
171            if not (lags.dtype.char in ['i', 'l']):
172                err_str = "lags must be integer!"
173                raise NodeException(err_str)
174            else:
175                pass
176        else:
177            err_str = ("Lags must be int, list or array. Found "
178                       "%s!" % (type(lags).__name__))
179            raise NodeException(err_str)
180        self.lags = lags
181
182        # sanity checks for weights
183        if icaweights is None:
184            self.icaweights = 1.
185        else:
186            if (len(icaweights) != len(lags)):
187                err = ("icaweights vector length is %d, "
188                       "should be %d" % (str(len(icaweights)), str(len(lags))))
189                raise NodeException(err)
190            self.icaweights = icaweights
191
192        if sfaweights is None:
193            self.sfaweights = [0]*len(lags)
194            self.sfaweights[0] = 1.
195        else:
196            if (len(sfaweights) != len(lags)):
197                err = ("sfaweights vector length is %d, "
198                       "should be %d" % (str(len(sfaweights)), str(len(lags))))
199                raise NodeException(err)
200            self.sfaweights = sfaweights
201
202        # store attributes
203        self.sfa_ica_coeff = sfa_ica_coeff
204        self.max_iter = max_iter
205        self.verbose = verbose
206        self.eps_contrast = eps_contrast
207
208        # if input is not white, insert a WhiteningNode
209        self.whitened = whitened
210        if not whitened:
211            if white_parm is None:
212                white_parm = {}
213            if output_dim is not None:
214                white_comp = output_dim
215            elif white_comp is not None:
216                output_dim = white_comp
217            self.white = WhiteningNode(input_dim=input_dim,
218                                       output_dim=white_comp,
219                                       dtype=dtype, **white_parm)
220
221        # initialize covariance matrices
222        self.covs = [ DelayCovarianceMatrix(dt, dtype=dtype) for dt in lags ]
223
224        # initialize the global rotation-permutation matrix
225        # if not set that we'll eventually be an identity matrix
226        self.RP = RP
227
228        # initialize verbose structure to print nice and useful progress info
229        if verbose:
230            info = { 'sweep' : max(len(str(self.max_iter)), 5),
231                     'perturbe': max(len(str(self.max_iter)), 5),
232                     'float' : 5+8,
233                     'fmt' : "%.5e",
234                     'sep' : " | "}
235            f1 = "Sweep".center(info['sweep'])
236            f1_2 = "Pertb". center(info['perturbe'])
237            f2 = "SFA part".center(info['float'])
238            f3 = "ICA part".center(info['float'])
239            f4 = "Contrast".center(info['float'])
240            header = info['sep'].join([f1, f1_2, f2, f3, f4])
241            info['header'] = header+'\n'
242            info['line'] = len(header)*"-"
243            self._info = info
244
245        # finally call base class constructor
246        super(ISFANode, self).__init__(input_dim, output_dim, dtype)
247
248    def _get_supported_dtypes(self):
249        """Return the list of dtypes supported by this node.
250
251        Support floating point types with size larger or equal than 64 bits.
252        """
253        return [t for t in mdp.utils.get_dtypes('Float') if t.itemsize>=8]
254
255    def _set_dtype(self, dtype):
256        # when typecode is set, we set the whitening node if needed and
257        # the SFA and ICA weights
258        self._dtype = dtype
259        if not self.whitened and self.white.dtype is None:
260            self.white.dtype = dtype
261        self.icaweights = numx.array(self.icaweights, dtype)
262        self.sfaweights = numx.array(self.sfaweights, dtype)
263
264    def _set_input_dim(self, n):
265        self._input_dim = n
266        if not self.whitened and self.white.output_dim is not None:
267            self._effective_input_dim = self.white.output_dim
268        else:
269            self._effective_input_dim = n
270
271    def _train(self, x):
272        # train the whitening node if needed
273        if not self.whitened:
274            self.white.train(x)
275        # update the covariance matrices
276        [self.covs[i].update(x) for i in range(len(self.lags))]
277
278    def _execute(self, x):
279        # filter through whitening node if needed
280        if not self.whitened:
281            x = self.white.execute(x)
282        # rotate input
283        return mult(x, self.RP)
284
285    def _inverse(self, y):
286        # counter-rotate input
287        x = mult(y, self.RP.T)
288        # invert whitening node if needed
289        if not self.whitened:
290            x = self.white.inverse(x)
291        return x
292
293    def _fmt_prog_info(self, sweep, pert, contrast, sfa = None, ica = None):
294        # for internal use only!
295        # format the progress information
296        # don't try to understand this code: it Just Works (TM)
297        fmt = self._info
298        sweep_str = str(sweep).rjust(fmt['sweep'])
299        pert_str = str(pert).rjust(fmt['perturbe'])
300        if sfa is None:
301            sfa_str = fmt['float']*' '
302        else:
303            sfa_str = (fmt['fmt']%(sfa)).rjust(fmt['float'])
304        if ica is None:
305            ica_str = fmt['float']*' '
306        else:
307            ica_str = (fmt['fmt'] % (ica)).rjust(fmt['float'])
308        contrast_str = (fmt['fmt'] % (contrast)).rjust(fmt['float'])
309        table_entry = fmt['sep'].join([sweep_str,
310                                       pert_str,
311                                       sfa_str,
312                                       ica_str,
313                                       contrast_str])
314        return table_entry
315
316    def _get_eye(self):
317        # return an identity matrix with the right dimensions and type
318        return numx.eye(self._effective_input_dim, dtype=self.dtype)
319
320    def _get_rnd_rotation(self, dim):
321        # return a random rot matrix with the right dimensions and type
322        return mdp.utils.random_rot(dim, self.dtype)
323
324    def _get_rnd_permutation(self, dim):
325        # return a random permut matrix with the right dimensions and type
326        zero = numx.zeros((dim, dim), dtype=self.dtype)
327        row = numx_rand.permutation(dim)
328        for col in range(dim):
329            zero[row[col], col] = 1.
330        return zero
331
332    def _givens_angle(self, i, j, covs, bica_bsfa=None, complete=0):
333        # Return the Givens rotation angle for which the contrast function
334        # is minimal
335        if bica_bsfa is None:
336            bica_bsfa = self._bica_bsfa
337        if j < self.output_dim:
338            return self._givens_angle_case1(i, j, covs,
339                                            bica_bsfa, complete=complete)
340        else:
341            return self._givens_angle_case2(i, j, covs,
342                                            bica_bsfa, complete=complete)
343
344
345    def _givens_angle_case2(self, m, n, covs, bica_bsfa, complete=0):
346        # This function makes use of the constants computed in the paper
347        #
348        # R -> R
349        # m -> \mu
350        # n -> \nu
351        #
352        # Note that the minus sign before the angle phi is there because
353        # in the paper the rotation convention is the opposite of ours.
354
355        ncovs = covs.ncovs
356        covs = covs.covs
357        icaweights = self.icaweights
358        sfaweights = self.sfaweights
359        R = self.output_dim
360        bica, bsfa = bica_bsfa
361
362        Cmm, Cmn, Cnn = covs[m, m, :], covs[m, n, :], covs[n, n, :]
363        d0 =   (sfaweights * Cmm*Cmm).sum()
364        d1 = 4*(sfaweights * Cmn*Cmm).sum()
365        d2 = 2*(sfaweights * (2*Cmn*Cmn + Cmm*Cnn)).sum()
366        d3 = 4*(sfaweights * Cmn*Cnn).sum()
367        d4 =   (sfaweights * Cnn*Cnn).sum()
368        e0 = 2*(icaweights * ((covs[:R, m, :]*covs[:R, m, :]).sum(axis=0)
369                              - Cmm*Cmm)).sum()
370        e1 = 4*(icaweights * ((covs[:R, m, :]*covs[:R, n, :]).sum(axis=0)
371                              - Cmm*Cmn)).sum()
372        e2 = 2*(icaweights * ((covs[:R, n, :]*covs[:R, n, :]).sum(axis=0)
373                              - Cmn*Cmn)).sum()
374
375        s22 = 0.25 * bsfa*(d1+d3)   + 0.5* bica*(e1)
376        c22 = 0.5  * bsfa*(d0-d4)   + 0.5* bica*(e0-e2)
377        s24 = 0.125* bsfa*(d1-d3)
378        c24 = 0.125* bsfa*(d0-d2+d4)
379
380        # Compute the contrast function in a grid of angles to find a
381        # first approximation for the minimum.  Repeat two times
382        # (effectively doubling the resolution). Note that we can do
383        # that because we know we have a single minimum.
384        #
385        # npoints should not be too large otherwise the contrast
386        # funtion appears to be constant. This is because we hit the
387        # maximum resolution for the cosine function (ca. 1e-15)
388        npoints = 100
389        left = -PI/2 - PI/(npoints+1)
390        right = PI/2 + PI/(npoints+1)
391        for iter in (1, 2):
392            phi = numx.linspace(left, right, npoints+3)
393            contrast = c22*cos(-2*phi)+s22*sin(-2*phi)+\
394                       c24*cos(-4*phi)+s24*sin(-4*phi)
395            minidx = contrast.argmin()
396            left = phi[max(minidx-1, 0)]
397            right = phi[min(minidx+1, len(phi)-1)]
398
399        # The contrast is almost a parabola around the minimum.
400        # To find the minimum we can therefore compute the derivative
401        # (which should be a line) and calculate its root.
402        # This step helps to overcome the resolution limit of the
403        # cosine function and clearly improve the final result.
404        der_left = 2*c22*sin(-2*left)- 2*s22*cos(-2*left)+\
405                   4*c24*sin(-4*left)- 4*s24*cos(-4*left)
406        der_right = 2*c22*sin(-2*right)-2*s22*cos(-2*right)+\
407                    4*c24*sin(-4*right)-4*s24*cos(-4*right)
408        if abs(der_left - der_right) < SQRT_EPS_D:
409            minimum = phi[minidx]
410        else:
411            minimum = right - der_right*(right-left)/(der_right-der_left)
412
413        dc = numx.zeros((ncovs,), dtype = self.dtype)
414        for t in range(ncovs):
415            dg = covs[:R, :R, t].diagonal()
416            dc[t] = (dg*dg).sum(axis=0)
417        dc = ((dc-Cmm*Cmm)*sfaweights).sum()
418
419        ec = numx.zeros((ncovs, ), dtype = self.dtype)
420        for t in range(ncovs):
421            ec[t] = sum([covs[i, j, t]*covs[i, j, t] for i in range(R-1)
422                         for j in range(i+1, R) if i != m and j != m])
423        ec = 2*(ec*icaweights).sum()
424        a20 = 0.125*bsfa*(3*d0+d2+3*d4+8*dc)+0.5*bica*(e0+e2+2*ec)
425        minimum_contrast = a20+c22*cos(-2*minimum)+s22*sin(-2*minimum)+\
426                           c24*cos(-4*minimum)+s24*sin(-4*minimum)
427        if complete:
428            # Compute the contrast between -pi/2 and pi/2
429            # (useful for testing purposes)
430            npoints = 1000
431            phi = numx.linspace(-PI/2, PI/2, npoints+1)
432            contrast = a20 + c22*cos(-2*phi) + s22*sin(-2*phi) +\
433                       c24*cos(-4*phi) + s24*sin(-4*phi)
434            return phi, contrast, minimum, minimum_contrast
435        else:
436            return minimum, minimum_contrast
437
438
439    def _givens_angle_case1(self, m, n, covs, bica_bsfa, complete=0):
440        # This function makes use of the constants computed in the paper
441        #
442        # R -> R
443        # m -> \mu
444        # n -> \nu
445        #
446        # Note that the minus sign before the angle phi is there because
447        # in the paper the rotation convention is the opposite of ours.
448        ncovs = covs.ncovs
449        covs = covs.covs
450        icaweights = self.icaweights
451        sfaweights = self.sfaweights
452        bica, bsfa = bica_bsfa
453
454        Cmm, Cmn, Cnn = covs[m, m, :], covs[m, n, :], covs[n, n, :]
455        d0 =   (sfaweights * (Cmm*Cmm+Cnn*Cnn)).sum()
456        d1 = 4*(sfaweights * (Cmm*Cmn-Cmn*Cnn)).sum()
457        d2 = 2*(sfaweights * (2*Cmn*Cmn+Cmm*Cnn)).sum()
458        e0 = 2*(icaweights * Cmn*Cmn).sum()
459        e1 = 4*(icaweights * (Cmn*Cnn-Cmm*Cmn)).sum()
460        e2 =   (icaweights * ((Cmm-Cnn)*(Cmm-Cnn)-2*Cmn*Cmn)).sum()
461
462        s24 = 0.25* (bsfa * d1    + bica * e1)
463        c24 = 0.25* (bsfa *(d0-d2)+ bica *(e0-e2))
464
465        # compute the exact minimum
466        # Note that 'arctan' finds always the first maximum
467        # because s24sin(4p)+c24cos(4p)=const*cos(4p-arctan)
468        # the minimum lies +pi/4 apart (period = pi/2).
469        # In other words we want that: abs(minimum) < pi/4
470        phi4 = numx.arctan2(s24, c24)
471        # use if-structure until bug in numx.sign is solved
472        if  phi4 >= 0:
473            minimum = -0.25*(phi4-PI)
474        else:
475            minimum = -0.25*(phi4+PI)
476
477        # compute all constants:
478        R = self.output_dim
479        dc = numx.zeros((ncovs, ), dtype = self.dtype)
480        for t in range(ncovs):
481            dg = covs[:R, :R, t].diagonal()
482            dc[t] = (dg*dg).sum(axis=0)
483        dc = ((dc-Cnn*Cnn-Cmm*Cmm)*sfaweights).sum()
484        ec = numx.zeros((ncovs, ), dtype = self.dtype)
485        for t in range(ncovs):
486            triu_covs = _triu(covs[:R, :R, t], 1).ravel()
487            ec[t] = ((triu_covs*triu_covs).sum() - covs[m, n, t]*covs[m, n, t])
488        ec = 2*(icaweights*ec).sum()
489        a20 = 0.25*(bsfa*(4*dc+d2+3*d0)+bica*(4*ec+e2+3*e0))
490        minimum_contrast = a20+c24*cos(-4*minimum)+s24*sin(-4*minimum)
491        npoints = 1000
492        if complete == 1:
493            # Compute the contrast between -pi/2 and pi/2
494            # (useful for testing purposes)
495            phi = numx.linspace(-PI/2, PI/2, npoints+1)
496            contrast = a20 + c24*cos(-4*phi) + s24*sin(-4*phi)
497            return phi, contrast, minimum, minimum_contrast
498        elif complete == 2:
499            phi = numx.linspace(-PI/4, PI/4, npoints+1)
500            contrast = a20 + c24*cos(-4*phi) + s24*sin(-4*phi)
501            return phi, contrast, minimum, minimum_contrast
502        else:
503            return minimum, minimum_contrast
504
505
506    def _get_contrast(self, covs, bica_bsfa = None):
507        if bica_bsfa is None:
508            bica_bsfa = self._bica_bsfa
509        # return current value of the contrast
510        R = self.output_dim
511        ncovs = covs.ncovs
512        covs = covs.covs
513        icaweights = self.icaweights
514        sfaweights = self.sfaweights
515        # unpack the bsfa and bica coefficients
516        bica, bsfa = bica_bsfa
517        sfa = numx.zeros((ncovs, ), dtype=self.dtype)
518        ica = numx.zeros((ncovs, ), dtype=self.dtype)
519        for t in range(ncovs):
520            sq_corr =  covs[:R, :R, t]*covs[:R, :R, t]
521            sfa[t] = sq_corr.trace()
522            ica[t] = 2*_triu(sq_corr, 1).ravel().sum()
523        return (bsfa*sfaweights*sfa).sum(), (bica*icaweights*ica).sum()
524
525    def _adjust_ica_sfa_coeff(self):
526        # adjust sfa/ica ratio. ica and sfa term are scaled
527        # differently because sfa accounts for the diagonal terms
528        # whereas ica accounts for the off-diagonal terms
529        ncomp = self.output_dim
530        if ncomp > 1:
531            bica =  self.sfa_ica_coeff[1]/(ncomp*(ncomp-1))
532            bsfa = -self.sfa_ica_coeff[0]/ncomp
533        else:
534            bica =  0.#self.sfa_ica_coeff[1]
535            bsfa = -self.sfa_ica_coeff[0]
536        self._bica_bsfa = [bica, bsfa]
537
538    def _fix_covs(self, covs=None):
539        # fiv covariance matrices
540        if covs is None:
541            covs = self.covs
542            if not self.whitened:
543                white = self.white
544                white.stop_training()
545                proj = white.get_projmatrix(transposed=0)
546            else:
547                proj = None
548            # fix and whiten the covariance matrices
549            for i in range(len(self.lags)):
550                covs[i], avg, avg_dt, tlen = covs[i].fix(proj)
551
552            # send the matrices to the container class
553            covs = MultipleCovarianceMatrices(covs)
554            # symmetrize the cov matrices
555            covs.symmetrize()
556        self.covs = covs
557
558    def _optimize(self):
559        # optimize contrast function
560
561        # save initial contrast
562        sfa, ica = self._get_contrast(self.covs)
563        self.initial_contrast = {'SFA': sfa,
564                                 'ICA': ica,
565                                 'TOT': sfa + ica}
566        # info headers
567        if self.verbose:
568            print self._info['header']+self._info['line']
569
570        # initialize control variables
571        # contrast
572        contrast = sfa+ica
573        # local rotation matrix
574        Q = self._get_eye()
575        # local copy of correlation matrices
576        covs = self.covs.copy()
577        # maximum improvement in the contrast function
578        max_increase = self.eps_contrast
579        # Number of sweeps
580        sweep = 0
581        # flag for stopping sweeping
582        sweeping = True
583        # flag to check if we already perturbed the outer space
584        # - negative means that we exit from this routine
585        #   because we hit numerical precision or because
586        #   there's no outer space to be perturbed (input_dim == outpu_dim)
587        # - positive means the number of perturbations done
588        #   before finding no further improvement
589        perturbed = 0
590
591        # size of the perturbation matrix
592        psize = self._effective_input_dim-self.output_dim
593
594        # if there is no outer space don't perturbe
595        if self._effective_input_dim == self.output_dim:
596            perturbed = -1
597
598        # local eye matrix
599        eye = self._get_eye()
600
601        # main loop
602        # we'll keep on sweeping until the contrast has improved less
603        # then self.eps_contrast
604        part_sweep = 0
605        while sweeping:
606            # update number of sweeps
607            sweep += 1
608
609            # perform a single sweep
610            max_increase, covs, Q, contrast = self._do_sweep(covs, Q, contrast)
611
612            if max_increase < 0 or contrast == 0:
613                # we hit numerical precision, exit!
614                sweeping = False
615                if perturbed == 0:
616                    perturbed = -1
617                else:
618                    perturbed = -perturbed
619
620            if (max_increase < self.eps_contrast) and (max_increase) >= 0 :
621                # rate of change is small for all pairs in a sweep
622                if perturbed == 0:
623                    # perturbe the outer space one time with a random rotation
624                    perturbed = 1
625                elif perturbed >= 1 and part_sweep == sweep-1:
626                    # after the last pertubation no useful step has
627                    # been done. exit!
628                    sweeping = False
629                elif perturbed < 0:
630                    # we can't perturbe anymore
631                    sweeping = False
632                # keep track of the last sweep we perturbed
633                part_sweep = sweep
634
635            # perform perturbation if needed
636            if perturbed >= 1 and sweeping is True:
637                # generate a random rotation matrix for the external subspace
638                PRT = eye.copy()
639                rot = self._get_rnd_rotation(psize)
640                # generate a random permutation matrix for the ext. subspace
641                perm = self._get_rnd_permutation(psize)
642                # combine rotation and permutation
643                rot_perm = mult(rot, perm)
644                # apply rotation+permutation
645                PRT[self.output_dim:, self.output_dim:] = rot_perm
646                covs.transform(PRT)
647                Q = mult(Q, PRT)
648                # increment perturbation counter
649                perturbed += 1
650
651            # verbose progress information
652            if self.verbose:
653                table_entry = self._fmt_prog_info(sweep,
654                                                  perturbed,
655                                                  contrast)
656                _sys.stdout.write(table_entry+len(table_entry)*'\b')
657                _sys.stdout.flush()
658
659            # if we made too many sweeps exit with error!
660            if sweep == self.max_iter:
661                err_str = ("Failed to converge, maximum increase= "
662                           "%.5e" % (max_increase))
663                raise NodeException(err_str)
664
665        # if we land here, we have converged!
666        # calculate output contrast
667
668        sfa, ica =  self._get_contrast(covs)
669        contrast = sfa+ica
670        # print final information
671        if self.verbose:
672            print self._fmt_prog_info(sweep, perturbed,
673                                      contrast, sfa, ica)
674            print self._info['line']
675
676        self.final_contrast = {'SFA': sfa,
677                               'ICA': ica,
678                               'TOT': sfa + ica}
679
680        # finally return optimal rotation matrix
681        return Q
682
683    def _do_sweep(self, covs, Q, prev_contrast):
684        # perform a single sweep
685
686        # initialize maximal improvement in a single sweep
687        max_increase = -1
688        # shuffle rotation order
689        numx_rand.shuffle(self.rot_axis)
690        # sweep through all axes combinations
691        for (i, j) in self.rot_axis:
692            # get the angle that minimizes the contrast
693            # and the contrast value
694            angle, contrast = self._givens_angle(i, j, covs)
695            if contrast == 0:
696                # we hit numerical precision in case when b_sfa == 0
697                # we can only break things from here on, better quit!
698                max_increase = -1
699                break
700
701            # relative improvement in the contrast function
702            relative_diff = (prev_contrast-contrast)/abs(prev_contrast)
703            if relative_diff < 0:
704                # if rate of change is negative we hit numerical precision
705                # or we already sit on the optimum for this pair of axis.
706                # don't rotate anymore and go to the next pair
707                continue
708
709            # update the rotation matrix
710            rotate(Q, angle, [i, j])
711            # rotate the covariance matrices
712            covs.rotate(angle, [i, j])
713
714            # store maximum and previous rate of change
715            max_increase = max(max_increase, relative_diff)
716            prev_contrast = contrast
717
718        return max_increase, covs, Q, contrast
719
720    def _stop_training(self, covs=None):
721        """Stop the training phase.
722
723        If the node is used on large datasets it may be wise to first
724        learn the covariance matrices, and then tune the parameters
725        until a suitable parameter set has been found (learning the
726        covariance matrices is the slowest part in this case).  This
727        could be done for example in the following way (assuming the
728        data is already white):
729
730        >>> covs=[mdp.utils.DelayCovarianceMatrix(dt, dtype=dtype)
731        ...       for dt in lags]
732        >>> for block in data:
733        ...     [covs[i].update(block) for i in range(len(lags))]
734
735        You can then initialize the ISFANode with the desired parameters,
736        do a fake training with some random data to set the internal
737        node structure and then call stop_training with the stored covariance
738        matrices. For example:
739
740        >>> isfa = ISFANode(lags, .....)
741        >>> x = mdp.numx_rand.random((100, input_dim)).astype(dtype)
742        >>> isfa.train(x)
743        >>> isfa.stop_training(covs=covs)
744
745        This trick has been used in the paper to apply ISFA to surrogate
746        matrices, i.e. covariance matrices that were not learnt on a
747        real dataset.
748        """
749        # fix, whiten, symmetrize and weight the covariance matrices
750        # the functions sets also the number of input components self.ncomp
751        self._fix_covs(covs)
752        # if output_dim were not set, set it to be the number of input comps
753        if self.output_dim is None:
754            self.output_dim = self._effective_input_dim
755        # adjust b_sfa and b_ica
756        self._adjust_ica_sfa_coeff()
757        # initialize all possible rotation axes
758        self.rot_axis = [(i, j) for i in range(0, self.output_dim)
759                         for j in range(i+1, self._effective_input_dim)]
760
761        # initialize the global rotation-permutation matrix (RP):
762        RP = self.RP
763        if RP is None:
764            RP = self._get_eye()
765        else:
766            # apply the global rotation matrix
767            self.covs.transform(RP)
768
769        # find optimal rotation
770        Q = self._optimize()
771        RP = mult(RP, Q)
772        # rotate and permute the covariance matrices
773        # we do it here in one step, to avoid the cumulative errors
774        # of multiple rotations in _optimize
775        self.covs.transform(Q)
776
777        # keep the complete rotation-permutation matrix
778        self.RPC = RP.copy()
779
780        # Reduce dimension to match output_dim#
781        RP = RP[:, :self.output_dim]
782        # the variance for the derivative of a whitened signal is
783        # 0 <= v <= 4, therefore the diagonal elements of the delayed
784        # covariance matrice with time lag = 1 (covs[:,:,0]) are
785        # -1 <= v' <= +1
786        # reorder the components to have them ordered by slowness
787        d = (self.covs.covs[:self.output_dim, :self.output_dim, 0]).diagonal()
788        idx = d.argsort()[::-1]
789        self.RP = RP.take(idx, axis=1)
790
791        # we could in principle clean up self.covs, as we do in SFANode or
792        # PCANode, but this algorithm is not stable enough to rule out
793        # possible problems. When these occcurs examining the covariance
794        # matrices is often the only way to debug.
795        #del self.covs
796