1__docformat__ = "restructuredtext en" 2 3import sys as _sys 4import mdp 5from mdp import Node, NodeException, numx, numx_rand 6from mdp.nodes import WhiteningNode 7from mdp.utils import (DelayCovarianceMatrix, MultipleCovarianceMatrices, 8 rotate, mult) 9 10 11# TODO: support floats of size different than 64-bit; will need to change SQRT_EPS_D 12 13# rename often used functions 14sum, cos, sin, PI = numx.sum, numx.cos, numx.sin, numx.pi 15SQRT_EPS_D = numx.sqrt(numx.finfo('d').eps) 16 17def _triu(m, k=0): 18 """ returns the elements on and above the k-th diagonal of m. k=0 is the 19 main diagonal, k > 0 is above and k < 0 is below the main diagonal.""" 20 N = m.shape[0] 21 M = m.shape[1] 22 x = numx.greater_equal(numx.subtract.outer(numx.arange(N), 23 numx.arange(M)),1-k) 24 out = (1-x)*m 25 return out 26 27############# 28class ISFANode(Node): 29 """ 30 Perform Independent Slow Feature Analysis on the input data. 31 32 **Internal variables of interest** 33 34 ``self.RP`` 35 The global rotation-permutation matrix. This is the filter 36 applied on input_data to get output_data 37 38 ``self.RPC`` 39 The *complete* global rotation-permutation matrix. This 40 is a matrix of dimension input_dim x input_dim (the 'outer space' 41 is retained) 42 43 ``self.covs`` 44 A `mdp.utils.MultipleCovarianceMatrices` instance containing 45 the current time-delayed covariance matrices of the input_data. 46 After convergence the uppermost ``output_dim`` x ``output_dim`` 47 submatrices should be almost diagonal. 48 49 ``self.covs[n-1]`` is the covariance matrix relative to the ``n``-th 50 time-lag 51 52 Note: they are not cleared after convergence. If you need to free 53 some memory, you can safely delete them with:: 54 55 >>> del self.covs 56 57 ``self.initial_contrast`` 58 A dictionary with the starting contrast and the SFA and ICA parts of 59 it. 60 61 ``self.final_contrast`` 62 Like the above but after convergence. 63 64 Note: If you intend to use this node for large datasets please have 65 a look at the ``stop_training`` method documentation for 66 speeding things up. 67 68 References: 69 Blaschke, T. , Zito, T., and Wiskott, L. (2007). 70 Independent Slow Feature Analysis and Nonlinear Blind Source Separation. 71 Neural Computation 19(4):994-1021 (2007) 72 http://itb.biologie.hu-berlin.de/~wiskott/Publications/BlasZitoWisk2007-ISFA-NeurComp.pdf 73 """ 74 def __init__(self, lags=1, sfa_ica_coeff=(1., 1.), icaweights=None, 75 sfaweights=None, whitened=False, white_comp = None, 76 white_parm = None, eps_contrast=1e-6, max_iter=10000, 77 RP=None, verbose=False, input_dim=None, output_dim=None, 78 dtype=None): 79 """ 80 Perform Independent Slow Feature Analysis. 81 82 The notation is the same used in the paper by Blaschke et al. Please 83 refer to the paper for more information. 84 85 :Parameters: 86 lags 87 list of time-lags to generate the time-delayed covariance 88 matrices (in the paper this is the set of \tau). If 89 lags is an integer, time-lags 1,2,...,'lags' are used. 90 Note that time-lag == 0 (instantaneous correlation) is 91 always implicitly used. 92 93 sfa_ica_coeff 94 a list of float with two entries, which defines the 95 weights of the SFA and ICA part of the objective 96 function. They are called b_{SFA} and b_{ICA} in the 97 paper. 98 99 sfaweights 100 weighting factors for the covariance matrices relative 101 to the SFA part of the objective function (called 102 \kappa_{SFA}^{\tau} in the paper). Default is 103 [1., 0., ..., 0.] 104 For possible values see the description of icaweights. 105 106 icaweights 107 weighting factors for the cov matrices relative 108 to the ICA part of the objective function (called 109 \kappa_{ICA}^{\tau} in the paper). Default is 1. 110 Possible values are: 111 112 - an integer ``n``: all matrices are weighted the same 113 (note that it does not make sense to have ``n != 1``) 114 115 - a list or array of floats of ``len == len(lags)``: 116 each element of the list is used for weighting the 117 corresponding matrix 118 119 - ``None``: use the default values. 120 121 whitened 122 ``True`` if input data is already white, ``False`` 123 otherwise (the data will be whitened internally). 124 125 white_comp 126 If whitened is false, you can set ``white_comp`` to the 127 number of whitened components to keep during the 128 calculation (i.e., the input dimensions are reduced to 129 ``white_comp`` by keeping the components of largest variance). 130 white_parm 131 a dictionary with additional parameters for whitening. 132 It is passed directly to the WhiteningNode constructor. 133 Ex: white_parm = { 'svd' : True } 134 135 eps_contrast 136 Convergence is achieved when the relative 137 improvement in the contrast is below this threshold. 138 Values in the range [1E-4, 1E-10] are usually 139 reasonable. 140 141 max_iter 142 If the algorithms does not achieve convergence within 143 max_iter iterations raise an Exception. Should be 144 larger than 100. 145 146 RP 147 Starting rotation-permutation matrix. It is an 148 input_dim x input_dim matrix used to initially rotate the 149 input components. If not set, the identity matrix is used. 150 In the paper this is used to start the algorithm at the 151 SFA solution (which is often quite near to the optimum). 152 153 verbose 154 print progress information during convergence. This can 155 slow down the algorithm, but it's the only way to see 156 the rate of improvement and immediately spot if something 157 is going wrong. 158 159 output_dim 160 sets the number of independent components that have to 161 be extracted. Note that if this is not smaller than 162 input_dim, the problem is solved linearly and SFA 163 would give the same solution only much faster. 164 """ 165 # check that the "lags" argument has some meaningful value 166 if isinstance(lags, (int, long)): 167 lags = range(1, lags+1) 168 elif isinstance(lags, (list, tuple)): 169 lags = numx.array(lags, "i") 170 elif isinstance(lags, numx.ndarray): 171 if not (lags.dtype.char in ['i', 'l']): 172 err_str = "lags must be integer!" 173 raise NodeException(err_str) 174 else: 175 pass 176 else: 177 err_str = ("Lags must be int, list or array. Found " 178 "%s!" % (type(lags).__name__)) 179 raise NodeException(err_str) 180 self.lags = lags 181 182 # sanity checks for weights 183 if icaweights is None: 184 self.icaweights = 1. 185 else: 186 if (len(icaweights) != len(lags)): 187 err = ("icaweights vector length is %d, " 188 "should be %d" % (str(len(icaweights)), str(len(lags)))) 189 raise NodeException(err) 190 self.icaweights = icaweights 191 192 if sfaweights is None: 193 self.sfaweights = [0]*len(lags) 194 self.sfaweights[0] = 1. 195 else: 196 if (len(sfaweights) != len(lags)): 197 err = ("sfaweights vector length is %d, " 198 "should be %d" % (str(len(sfaweights)), str(len(lags)))) 199 raise NodeException(err) 200 self.sfaweights = sfaweights 201 202 # store attributes 203 self.sfa_ica_coeff = sfa_ica_coeff 204 self.max_iter = max_iter 205 self.verbose = verbose 206 self.eps_contrast = eps_contrast 207 208 # if input is not white, insert a WhiteningNode 209 self.whitened = whitened 210 if not whitened: 211 if white_parm is None: 212 white_parm = {} 213 if output_dim is not None: 214 white_comp = output_dim 215 elif white_comp is not None: 216 output_dim = white_comp 217 self.white = WhiteningNode(input_dim=input_dim, 218 output_dim=white_comp, 219 dtype=dtype, **white_parm) 220 221 # initialize covariance matrices 222 self.covs = [ DelayCovarianceMatrix(dt, dtype=dtype) for dt in lags ] 223 224 # initialize the global rotation-permutation matrix 225 # if not set that we'll eventually be an identity matrix 226 self.RP = RP 227 228 # initialize verbose structure to print nice and useful progress info 229 if verbose: 230 info = { 'sweep' : max(len(str(self.max_iter)), 5), 231 'perturbe': max(len(str(self.max_iter)), 5), 232 'float' : 5+8, 233 'fmt' : "%.5e", 234 'sep' : " | "} 235 f1 = "Sweep".center(info['sweep']) 236 f1_2 = "Pertb". center(info['perturbe']) 237 f2 = "SFA part".center(info['float']) 238 f3 = "ICA part".center(info['float']) 239 f4 = "Contrast".center(info['float']) 240 header = info['sep'].join([f1, f1_2, f2, f3, f4]) 241 info['header'] = header+'\n' 242 info['line'] = len(header)*"-" 243 self._info = info 244 245 # finally call base class constructor 246 super(ISFANode, self).__init__(input_dim, output_dim, dtype) 247 248 def _get_supported_dtypes(self): 249 """Return the list of dtypes supported by this node. 250 251 Support floating point types with size larger or equal than 64 bits. 252 """ 253 return [t for t in mdp.utils.get_dtypes('Float') if t.itemsize>=8] 254 255 def _set_dtype(self, dtype): 256 # when typecode is set, we set the whitening node if needed and 257 # the SFA and ICA weights 258 self._dtype = dtype 259 if not self.whitened and self.white.dtype is None: 260 self.white.dtype = dtype 261 self.icaweights = numx.array(self.icaweights, dtype) 262 self.sfaweights = numx.array(self.sfaweights, dtype) 263 264 def _set_input_dim(self, n): 265 self._input_dim = n 266 if not self.whitened and self.white.output_dim is not None: 267 self._effective_input_dim = self.white.output_dim 268 else: 269 self._effective_input_dim = n 270 271 def _train(self, x): 272 # train the whitening node if needed 273 if not self.whitened: 274 self.white.train(x) 275 # update the covariance matrices 276 [self.covs[i].update(x) for i in range(len(self.lags))] 277 278 def _execute(self, x): 279 # filter through whitening node if needed 280 if not self.whitened: 281 x = self.white.execute(x) 282 # rotate input 283 return mult(x, self.RP) 284 285 def _inverse(self, y): 286 # counter-rotate input 287 x = mult(y, self.RP.T) 288 # invert whitening node if needed 289 if not self.whitened: 290 x = self.white.inverse(x) 291 return x 292 293 def _fmt_prog_info(self, sweep, pert, contrast, sfa = None, ica = None): 294 # for internal use only! 295 # format the progress information 296 # don't try to understand this code: it Just Works (TM) 297 fmt = self._info 298 sweep_str = str(sweep).rjust(fmt['sweep']) 299 pert_str = str(pert).rjust(fmt['perturbe']) 300 if sfa is None: 301 sfa_str = fmt['float']*' ' 302 else: 303 sfa_str = (fmt['fmt']%(sfa)).rjust(fmt['float']) 304 if ica is None: 305 ica_str = fmt['float']*' ' 306 else: 307 ica_str = (fmt['fmt'] % (ica)).rjust(fmt['float']) 308 contrast_str = (fmt['fmt'] % (contrast)).rjust(fmt['float']) 309 table_entry = fmt['sep'].join([sweep_str, 310 pert_str, 311 sfa_str, 312 ica_str, 313 contrast_str]) 314 return table_entry 315 316 def _get_eye(self): 317 # return an identity matrix with the right dimensions and type 318 return numx.eye(self._effective_input_dim, dtype=self.dtype) 319 320 def _get_rnd_rotation(self, dim): 321 # return a random rot matrix with the right dimensions and type 322 return mdp.utils.random_rot(dim, self.dtype) 323 324 def _get_rnd_permutation(self, dim): 325 # return a random permut matrix with the right dimensions and type 326 zero = numx.zeros((dim, dim), dtype=self.dtype) 327 row = numx_rand.permutation(dim) 328 for col in range(dim): 329 zero[row[col], col] = 1. 330 return zero 331 332 def _givens_angle(self, i, j, covs, bica_bsfa=None, complete=0): 333 # Return the Givens rotation angle for which the contrast function 334 # is minimal 335 if bica_bsfa is None: 336 bica_bsfa = self._bica_bsfa 337 if j < self.output_dim: 338 return self._givens_angle_case1(i, j, covs, 339 bica_bsfa, complete=complete) 340 else: 341 return self._givens_angle_case2(i, j, covs, 342 bica_bsfa, complete=complete) 343 344 345 def _givens_angle_case2(self, m, n, covs, bica_bsfa, complete=0): 346 # This function makes use of the constants computed in the paper 347 # 348 # R -> R 349 # m -> \mu 350 # n -> \nu 351 # 352 # Note that the minus sign before the angle phi is there because 353 # in the paper the rotation convention is the opposite of ours. 354 355 ncovs = covs.ncovs 356 covs = covs.covs 357 icaweights = self.icaweights 358 sfaweights = self.sfaweights 359 R = self.output_dim 360 bica, bsfa = bica_bsfa 361 362 Cmm, Cmn, Cnn = covs[m, m, :], covs[m, n, :], covs[n, n, :] 363 d0 = (sfaweights * Cmm*Cmm).sum() 364 d1 = 4*(sfaweights * Cmn*Cmm).sum() 365 d2 = 2*(sfaweights * (2*Cmn*Cmn + Cmm*Cnn)).sum() 366 d3 = 4*(sfaweights * Cmn*Cnn).sum() 367 d4 = (sfaweights * Cnn*Cnn).sum() 368 e0 = 2*(icaweights * ((covs[:R, m, :]*covs[:R, m, :]).sum(axis=0) 369 - Cmm*Cmm)).sum() 370 e1 = 4*(icaweights * ((covs[:R, m, :]*covs[:R, n, :]).sum(axis=0) 371 - Cmm*Cmn)).sum() 372 e2 = 2*(icaweights * ((covs[:R, n, :]*covs[:R, n, :]).sum(axis=0) 373 - Cmn*Cmn)).sum() 374 375 s22 = 0.25 * bsfa*(d1+d3) + 0.5* bica*(e1) 376 c22 = 0.5 * bsfa*(d0-d4) + 0.5* bica*(e0-e2) 377 s24 = 0.125* bsfa*(d1-d3) 378 c24 = 0.125* bsfa*(d0-d2+d4) 379 380 # Compute the contrast function in a grid of angles to find a 381 # first approximation for the minimum. Repeat two times 382 # (effectively doubling the resolution). Note that we can do 383 # that because we know we have a single minimum. 384 # 385 # npoints should not be too large otherwise the contrast 386 # funtion appears to be constant. This is because we hit the 387 # maximum resolution for the cosine function (ca. 1e-15) 388 npoints = 100 389 left = -PI/2 - PI/(npoints+1) 390 right = PI/2 + PI/(npoints+1) 391 for iter in (1, 2): 392 phi = numx.linspace(left, right, npoints+3) 393 contrast = c22*cos(-2*phi)+s22*sin(-2*phi)+\ 394 c24*cos(-4*phi)+s24*sin(-4*phi) 395 minidx = contrast.argmin() 396 left = phi[max(minidx-1, 0)] 397 right = phi[min(minidx+1, len(phi)-1)] 398 399 # The contrast is almost a parabola around the minimum. 400 # To find the minimum we can therefore compute the derivative 401 # (which should be a line) and calculate its root. 402 # This step helps to overcome the resolution limit of the 403 # cosine function and clearly improve the final result. 404 der_left = 2*c22*sin(-2*left)- 2*s22*cos(-2*left)+\ 405 4*c24*sin(-4*left)- 4*s24*cos(-4*left) 406 der_right = 2*c22*sin(-2*right)-2*s22*cos(-2*right)+\ 407 4*c24*sin(-4*right)-4*s24*cos(-4*right) 408 if abs(der_left - der_right) < SQRT_EPS_D: 409 minimum = phi[minidx] 410 else: 411 minimum = right - der_right*(right-left)/(der_right-der_left) 412 413 dc = numx.zeros((ncovs,), dtype = self.dtype) 414 for t in range(ncovs): 415 dg = covs[:R, :R, t].diagonal() 416 dc[t] = (dg*dg).sum(axis=0) 417 dc = ((dc-Cmm*Cmm)*sfaweights).sum() 418 419 ec = numx.zeros((ncovs, ), dtype = self.dtype) 420 for t in range(ncovs): 421 ec[t] = sum([covs[i, j, t]*covs[i, j, t] for i in range(R-1) 422 for j in range(i+1, R) if i != m and j != m]) 423 ec = 2*(ec*icaweights).sum() 424 a20 = 0.125*bsfa*(3*d0+d2+3*d4+8*dc)+0.5*bica*(e0+e2+2*ec) 425 minimum_contrast = a20+c22*cos(-2*minimum)+s22*sin(-2*minimum)+\ 426 c24*cos(-4*minimum)+s24*sin(-4*minimum) 427 if complete: 428 # Compute the contrast between -pi/2 and pi/2 429 # (useful for testing purposes) 430 npoints = 1000 431 phi = numx.linspace(-PI/2, PI/2, npoints+1) 432 contrast = a20 + c22*cos(-2*phi) + s22*sin(-2*phi) +\ 433 c24*cos(-4*phi) + s24*sin(-4*phi) 434 return phi, contrast, minimum, minimum_contrast 435 else: 436 return minimum, minimum_contrast 437 438 439 def _givens_angle_case1(self, m, n, covs, bica_bsfa, complete=0): 440 # This function makes use of the constants computed in the paper 441 # 442 # R -> R 443 # m -> \mu 444 # n -> \nu 445 # 446 # Note that the minus sign before the angle phi is there because 447 # in the paper the rotation convention is the opposite of ours. 448 ncovs = covs.ncovs 449 covs = covs.covs 450 icaweights = self.icaweights 451 sfaweights = self.sfaweights 452 bica, bsfa = bica_bsfa 453 454 Cmm, Cmn, Cnn = covs[m, m, :], covs[m, n, :], covs[n, n, :] 455 d0 = (sfaweights * (Cmm*Cmm+Cnn*Cnn)).sum() 456 d1 = 4*(sfaweights * (Cmm*Cmn-Cmn*Cnn)).sum() 457 d2 = 2*(sfaweights * (2*Cmn*Cmn+Cmm*Cnn)).sum() 458 e0 = 2*(icaweights * Cmn*Cmn).sum() 459 e1 = 4*(icaweights * (Cmn*Cnn-Cmm*Cmn)).sum() 460 e2 = (icaweights * ((Cmm-Cnn)*(Cmm-Cnn)-2*Cmn*Cmn)).sum() 461 462 s24 = 0.25* (bsfa * d1 + bica * e1) 463 c24 = 0.25* (bsfa *(d0-d2)+ bica *(e0-e2)) 464 465 # compute the exact minimum 466 # Note that 'arctan' finds always the first maximum 467 # because s24sin(4p)+c24cos(4p)=const*cos(4p-arctan) 468 # the minimum lies +pi/4 apart (period = pi/2). 469 # In other words we want that: abs(minimum) < pi/4 470 phi4 = numx.arctan2(s24, c24) 471 # use if-structure until bug in numx.sign is solved 472 if phi4 >= 0: 473 minimum = -0.25*(phi4-PI) 474 else: 475 minimum = -0.25*(phi4+PI) 476 477 # compute all constants: 478 R = self.output_dim 479 dc = numx.zeros((ncovs, ), dtype = self.dtype) 480 for t in range(ncovs): 481 dg = covs[:R, :R, t].diagonal() 482 dc[t] = (dg*dg).sum(axis=0) 483 dc = ((dc-Cnn*Cnn-Cmm*Cmm)*sfaweights).sum() 484 ec = numx.zeros((ncovs, ), dtype = self.dtype) 485 for t in range(ncovs): 486 triu_covs = _triu(covs[:R, :R, t], 1).ravel() 487 ec[t] = ((triu_covs*triu_covs).sum() - covs[m, n, t]*covs[m, n, t]) 488 ec = 2*(icaweights*ec).sum() 489 a20 = 0.25*(bsfa*(4*dc+d2+3*d0)+bica*(4*ec+e2+3*e0)) 490 minimum_contrast = a20+c24*cos(-4*minimum)+s24*sin(-4*minimum) 491 npoints = 1000 492 if complete == 1: 493 # Compute the contrast between -pi/2 and pi/2 494 # (useful for testing purposes) 495 phi = numx.linspace(-PI/2, PI/2, npoints+1) 496 contrast = a20 + c24*cos(-4*phi) + s24*sin(-4*phi) 497 return phi, contrast, minimum, minimum_contrast 498 elif complete == 2: 499 phi = numx.linspace(-PI/4, PI/4, npoints+1) 500 contrast = a20 + c24*cos(-4*phi) + s24*sin(-4*phi) 501 return phi, contrast, minimum, minimum_contrast 502 else: 503 return minimum, minimum_contrast 504 505 506 def _get_contrast(self, covs, bica_bsfa = None): 507 if bica_bsfa is None: 508 bica_bsfa = self._bica_bsfa 509 # return current value of the contrast 510 R = self.output_dim 511 ncovs = covs.ncovs 512 covs = covs.covs 513 icaweights = self.icaweights 514 sfaweights = self.sfaweights 515 # unpack the bsfa and bica coefficients 516 bica, bsfa = bica_bsfa 517 sfa = numx.zeros((ncovs, ), dtype=self.dtype) 518 ica = numx.zeros((ncovs, ), dtype=self.dtype) 519 for t in range(ncovs): 520 sq_corr = covs[:R, :R, t]*covs[:R, :R, t] 521 sfa[t] = sq_corr.trace() 522 ica[t] = 2*_triu(sq_corr, 1).ravel().sum() 523 return (bsfa*sfaweights*sfa).sum(), (bica*icaweights*ica).sum() 524 525 def _adjust_ica_sfa_coeff(self): 526 # adjust sfa/ica ratio. ica and sfa term are scaled 527 # differently because sfa accounts for the diagonal terms 528 # whereas ica accounts for the off-diagonal terms 529 ncomp = self.output_dim 530 if ncomp > 1: 531 bica = self.sfa_ica_coeff[1]/(ncomp*(ncomp-1)) 532 bsfa = -self.sfa_ica_coeff[0]/ncomp 533 else: 534 bica = 0.#self.sfa_ica_coeff[1] 535 bsfa = -self.sfa_ica_coeff[0] 536 self._bica_bsfa = [bica, bsfa] 537 538 def _fix_covs(self, covs=None): 539 # fiv covariance matrices 540 if covs is None: 541 covs = self.covs 542 if not self.whitened: 543 white = self.white 544 white.stop_training() 545 proj = white.get_projmatrix(transposed=0) 546 else: 547 proj = None 548 # fix and whiten the covariance matrices 549 for i in range(len(self.lags)): 550 covs[i], avg, avg_dt, tlen = covs[i].fix(proj) 551 552 # send the matrices to the container class 553 covs = MultipleCovarianceMatrices(covs) 554 # symmetrize the cov matrices 555 covs.symmetrize() 556 self.covs = covs 557 558 def _optimize(self): 559 # optimize contrast function 560 561 # save initial contrast 562 sfa, ica = self._get_contrast(self.covs) 563 self.initial_contrast = {'SFA': sfa, 564 'ICA': ica, 565 'TOT': sfa + ica} 566 # info headers 567 if self.verbose: 568 print self._info['header']+self._info['line'] 569 570 # initialize control variables 571 # contrast 572 contrast = sfa+ica 573 # local rotation matrix 574 Q = self._get_eye() 575 # local copy of correlation matrices 576 covs = self.covs.copy() 577 # maximum improvement in the contrast function 578 max_increase = self.eps_contrast 579 # Number of sweeps 580 sweep = 0 581 # flag for stopping sweeping 582 sweeping = True 583 # flag to check if we already perturbed the outer space 584 # - negative means that we exit from this routine 585 # because we hit numerical precision or because 586 # there's no outer space to be perturbed (input_dim == outpu_dim) 587 # - positive means the number of perturbations done 588 # before finding no further improvement 589 perturbed = 0 590 591 # size of the perturbation matrix 592 psize = self._effective_input_dim-self.output_dim 593 594 # if there is no outer space don't perturbe 595 if self._effective_input_dim == self.output_dim: 596 perturbed = -1 597 598 # local eye matrix 599 eye = self._get_eye() 600 601 # main loop 602 # we'll keep on sweeping until the contrast has improved less 603 # then self.eps_contrast 604 part_sweep = 0 605 while sweeping: 606 # update number of sweeps 607 sweep += 1 608 609 # perform a single sweep 610 max_increase, covs, Q, contrast = self._do_sweep(covs, Q, contrast) 611 612 if max_increase < 0 or contrast == 0: 613 # we hit numerical precision, exit! 614 sweeping = False 615 if perturbed == 0: 616 perturbed = -1 617 else: 618 perturbed = -perturbed 619 620 if (max_increase < self.eps_contrast) and (max_increase) >= 0 : 621 # rate of change is small for all pairs in a sweep 622 if perturbed == 0: 623 # perturbe the outer space one time with a random rotation 624 perturbed = 1 625 elif perturbed >= 1 and part_sweep == sweep-1: 626 # after the last pertubation no useful step has 627 # been done. exit! 628 sweeping = False 629 elif perturbed < 0: 630 # we can't perturbe anymore 631 sweeping = False 632 # keep track of the last sweep we perturbed 633 part_sweep = sweep 634 635 # perform perturbation if needed 636 if perturbed >= 1 and sweeping is True: 637 # generate a random rotation matrix for the external subspace 638 PRT = eye.copy() 639 rot = self._get_rnd_rotation(psize) 640 # generate a random permutation matrix for the ext. subspace 641 perm = self._get_rnd_permutation(psize) 642 # combine rotation and permutation 643 rot_perm = mult(rot, perm) 644 # apply rotation+permutation 645 PRT[self.output_dim:, self.output_dim:] = rot_perm 646 covs.transform(PRT) 647 Q = mult(Q, PRT) 648 # increment perturbation counter 649 perturbed += 1 650 651 # verbose progress information 652 if self.verbose: 653 table_entry = self._fmt_prog_info(sweep, 654 perturbed, 655 contrast) 656 _sys.stdout.write(table_entry+len(table_entry)*'\b') 657 _sys.stdout.flush() 658 659 # if we made too many sweeps exit with error! 660 if sweep == self.max_iter: 661 err_str = ("Failed to converge, maximum increase= " 662 "%.5e" % (max_increase)) 663 raise NodeException(err_str) 664 665 # if we land here, we have converged! 666 # calculate output contrast 667 668 sfa, ica = self._get_contrast(covs) 669 contrast = sfa+ica 670 # print final information 671 if self.verbose: 672 print self._fmt_prog_info(sweep, perturbed, 673 contrast, sfa, ica) 674 print self._info['line'] 675 676 self.final_contrast = {'SFA': sfa, 677 'ICA': ica, 678 'TOT': sfa + ica} 679 680 # finally return optimal rotation matrix 681 return Q 682 683 def _do_sweep(self, covs, Q, prev_contrast): 684 # perform a single sweep 685 686 # initialize maximal improvement in a single sweep 687 max_increase = -1 688 # shuffle rotation order 689 numx_rand.shuffle(self.rot_axis) 690 # sweep through all axes combinations 691 for (i, j) in self.rot_axis: 692 # get the angle that minimizes the contrast 693 # and the contrast value 694 angle, contrast = self._givens_angle(i, j, covs) 695 if contrast == 0: 696 # we hit numerical precision in case when b_sfa == 0 697 # we can only break things from here on, better quit! 698 max_increase = -1 699 break 700 701 # relative improvement in the contrast function 702 relative_diff = (prev_contrast-contrast)/abs(prev_contrast) 703 if relative_diff < 0: 704 # if rate of change is negative we hit numerical precision 705 # or we already sit on the optimum for this pair of axis. 706 # don't rotate anymore and go to the next pair 707 continue 708 709 # update the rotation matrix 710 rotate(Q, angle, [i, j]) 711 # rotate the covariance matrices 712 covs.rotate(angle, [i, j]) 713 714 # store maximum and previous rate of change 715 max_increase = max(max_increase, relative_diff) 716 prev_contrast = contrast 717 718 return max_increase, covs, Q, contrast 719 720 def _stop_training(self, covs=None): 721 """Stop the training phase. 722 723 If the node is used on large datasets it may be wise to first 724 learn the covariance matrices, and then tune the parameters 725 until a suitable parameter set has been found (learning the 726 covariance matrices is the slowest part in this case). This 727 could be done for example in the following way (assuming the 728 data is already white): 729 730 >>> covs=[mdp.utils.DelayCovarianceMatrix(dt, dtype=dtype) 731 ... for dt in lags] 732 >>> for block in data: 733 ... [covs[i].update(block) for i in range(len(lags))] 734 735 You can then initialize the ISFANode with the desired parameters, 736 do a fake training with some random data to set the internal 737 node structure and then call stop_training with the stored covariance 738 matrices. For example: 739 740 >>> isfa = ISFANode(lags, .....) 741 >>> x = mdp.numx_rand.random((100, input_dim)).astype(dtype) 742 >>> isfa.train(x) 743 >>> isfa.stop_training(covs=covs) 744 745 This trick has been used in the paper to apply ISFA to surrogate 746 matrices, i.e. covariance matrices that were not learnt on a 747 real dataset. 748 """ 749 # fix, whiten, symmetrize and weight the covariance matrices 750 # the functions sets also the number of input components self.ncomp 751 self._fix_covs(covs) 752 # if output_dim were not set, set it to be the number of input comps 753 if self.output_dim is None: 754 self.output_dim = self._effective_input_dim 755 # adjust b_sfa and b_ica 756 self._adjust_ica_sfa_coeff() 757 # initialize all possible rotation axes 758 self.rot_axis = [(i, j) for i in range(0, self.output_dim) 759 for j in range(i+1, self._effective_input_dim)] 760 761 # initialize the global rotation-permutation matrix (RP): 762 RP = self.RP 763 if RP is None: 764 RP = self._get_eye() 765 else: 766 # apply the global rotation matrix 767 self.covs.transform(RP) 768 769 # find optimal rotation 770 Q = self._optimize() 771 RP = mult(RP, Q) 772 # rotate and permute the covariance matrices 773 # we do it here in one step, to avoid the cumulative errors 774 # of multiple rotations in _optimize 775 self.covs.transform(Q) 776 777 # keep the complete rotation-permutation matrix 778 self.RPC = RP.copy() 779 780 # Reduce dimension to match output_dim# 781 RP = RP[:, :self.output_dim] 782 # the variance for the derivative of a whitened signal is 783 # 0 <= v <= 4, therefore the diagonal elements of the delayed 784 # covariance matrice with time lag = 1 (covs[:,:,0]) are 785 # -1 <= v' <= +1 786 # reorder the components to have them ordered by slowness 787 d = (self.covs.covs[:self.output_dim, :self.output_dim, 0]).diagonal() 788 idx = d.argsort()[::-1] 789 self.RP = RP.take(idx, axis=1) 790 791 # we could in principle clean up self.covs, as we do in SFANode or 792 # PCANode, but this algorithm is not stable enough to rule out 793 # possible problems. When these occcurs examining the covariance 794 # matrices is often the only way to debug. 795 #del self.covs 796