1""" 2Implementation of algorithm for sparse multi-subjects learning of Gaussian 3graphical models. 4""" 5# Authors: Philippe Gervais 6# License: simplified BSD 7 8import warnings 9import collections.abc 10import operator 11import itertools 12 13import numpy as np 14import scipy.linalg 15 16from joblib import Memory, delayed, Parallel 17from sklearn.base import BaseEstimator 18from sklearn.covariance import empirical_covariance 19from sklearn.model_selection import check_cv 20from sklearn.utils.extmath import fast_logdet 21 22from .._utils import CacheMixin 23from .._utils import logger 24from .._utils.extmath import is_spd 25 26 27def compute_alpha_max(emp_covs, n_samples): 28 """Compute the critical value of the regularization parameter. 29 30 Above this value, the precisions matrices computed by 31 group_sparse_covariance are diagonal (complete sparsity) 32 33 This function also returns the value below which the precision 34 matrices are fully dense (i.e. minimal number of zero coefficients). 35 36 The formula used in this function was derived using the same method 37 as in :footcite:`Duchi2012`. 38 39 Parameters 40 ---------- 41 emp_covs : array-like, shape (n_features, n_features, n_subjects) 42 covariance matrix for each subject. 43 44 n_samples : array-like, shape (n_subjects,) 45 number of samples used in the computation of every covariance matrix. 46 n_samples.sum() can be arbitrary. 47 48 Returns 49 ------- 50 alpha_max : float 51 minimal value for the regularization parameter that gives a 52 fully sparse matrix. 53 54 alpha_min : float 55 minimal value for the regularization parameter that gives a fully 56 dense matrix. 57 58 References 59 --------- 60 .. footbibliography:: 61 62 """ 63 A = np.copy(emp_covs) 64 n_samples = np.asarray(n_samples).copy() 65 n_samples /= n_samples.sum() 66 67 for k in range(emp_covs.shape[-1]): 68 # Set diagonal to zero 69 A[..., k].flat[::A.shape[0] + 1] = 0 70 A[..., k] *= n_samples[k] 71 72 norms = np.sqrt((A ** 2).sum(axis=-1)) 73 74 return np.max(norms), np.min(norms[norms > 0]) 75 76 77def _update_submatrix(full, sub, sub_inv, p, h, v): 78 """Update submatrix and its inverse. 79 80 sub_inv is the inverse of the submatrix of "full" obtained by removing 81 the p-th row and column. 82 83 sub_inv is modified in-place. After execution of this function, it contains 84 the inverse of the submatrix of "full" obtained by removing the n+1-th row 85 and column. 86 87 This computation is based on the Sherman-Woodbury-Morrison identity. 88 89 """ 90 n = p - 1 91 v[:n + 1] = full[:n + 1, n] 92 v[n + 1:] = full[n + 2:, n] 93 h[:n + 1] = full[n, :n + 1] 94 h[n + 1:] = full[n, n + 2:] 95 96 # change row: first usage of SWM identity 97 coln = sub_inv[:, n:n + 1] # 2d array, useful for sub_inv below 98 V = h - sub[n, :] 99 coln = coln / (1. + np.dot(V, coln)) 100 # The following line is equivalent to 101 # sub_inv -= np.outer(coln, np.dot(V, sub_inv)) 102 sub_inv -= np.dot(coln, np.dot(V, sub_inv)[np.newaxis, :]) 103 sub[n, :] = h 104 105 # change column: second usage of SWM identity 106 rown = sub_inv[n:n + 1, :] # 2d array, useful for sub_inv below 107 U = v - sub[:, n] 108 rown = rown / (1. + np.dot(rown, U)) 109 # The following line is equivalent to (but faster) 110 # sub_inv -= np.outer(np.dot(sub_inv, U), rown) 111 sub_inv -= np.dot(np.dot(sub_inv, U)[:, np.newaxis], rown) 112 sub[:, n] = v # equivalent to sub[n, :] += U 113 114 # Make sub_inv symmetric (overcome some numerical limitations) 115 sub_inv += sub_inv.T.copy() 116 sub_inv /= 2. 117 118 119def _assert_submatrix(full, sub, n): 120 """Check that "sub" is the matrix obtained by removing the p-th col and row 121 in "full". Used only for debugging. 122 123 """ 124 true_sub = np.empty_like(sub) 125 true_sub[:n, :n] = full[:n, :n] 126 true_sub[n:, n:] = full[n + 1:, n + 1:] 127 true_sub[:n, n:] = full[:n, n + 1:] 128 true_sub[n:, :n] = full[n + 1:, :n] 129 130 np.testing.assert_almost_equal(true_sub, sub) 131 132 133def group_sparse_covariance(subjects, alpha, max_iter=50, tol=1e-3, verbose=0, 134 probe_function=None, precisions_init=None, 135 debug=False): 136 """Compute sparse precision matrices and covariance matrices. 137 138 The precision matrices returned by this function are sparse, and share a 139 common sparsity pattern: all have zeros at the same location. This is 140 achieved by simultaneous computation of all precision matrices at the 141 same time. 142 143 Running time is linear on max_iter, and number of subjects (len(subjects)), 144 but cubic on number of features (subjects[0].shape[1]). 145 146 The present algorithm is based on :footcite:`Honorio2015`. 147 148 Parameters 149 ---------- 150 subjects : list of numpy.ndarray 151 input subjects. Each subject is a 2D array, whose columns contain 152 signals. Each array shape must be (sample number, feature number). 153 The sample number can vary from subject to subject, but all subjects 154 must have the same number of features (i.e. of columns). 155 156 alpha : float 157 regularization parameter. With normalized covariances matrices and 158 number of samples, sensible values lie in the [0, 1] range(zero is 159 no regularization: output is not sparse) 160 161 max_iter : int, optional 162 maximum number of iterations. Default=50. 163 164 tol : positive float or None, optional 165 The tolerance to declare convergence: if the duality gap goes below 166 this value, optimization is stopped. If None, no check is performed. 167 Default=1e-3. 168 169 verbose : int, optional 170 verbosity level. Zero means "no message". Default=0. 171 172 probe_function : callable or None, optional 173 This value is called before the first iteration and after each 174 iteration. If it returns True, then optimization is stopped 175 prematurely. 176 The function is given as arguments (in that order): 177 178 - empirical covariances (ndarray), 179 - number of samples for each subject (ndarray), 180 - regularization parameter (float) 181 - maximum iteration number (integer) 182 - tolerance (float) 183 - current iteration number (integer). -1 means "before first iteration" 184 - current value of precisions (ndarray). 185 - previous value of precisions (ndarray). None before first iteration. 186 187 precisions_init : numpy.ndarray, optional 188 initial value of the precision matrices. If not provided, a diagonal 189 matrix with the variances of each input signal is used. 190 191 debug : bool, optional 192 if True, perform checks during computation. It can help find 193 numerical problems, but increases computation time a lot. 194 Default=False. 195 196 Returns 197 ------- 198 emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects) 199 empirical covariances matrices 200 201 precisions : numpy.ndarray, shape (n_features, n_features, n_subjects) 202 estimated precision matrices 203 204 References 205 ---------- 206 .. footbibliography:: 207 208 """ 209 210 emp_covs, n_samples = empirical_covariances( 211 subjects, assume_centered=False) 212 213 precisions = _group_sparse_covariance( 214 emp_covs, n_samples, alpha, max_iter=max_iter, tol=tol, 215 verbose=verbose, precisions_init=precisions_init, 216 probe_function=probe_function, debug=debug) 217 218 return emp_covs, precisions 219 220 221def _group_sparse_covariance(emp_covs, n_samples, alpha, max_iter=10, tol=1e-3, 222 precisions_init=None, probe_function=None, 223 verbose=0, debug=False): 224 """Internal version of group_sparse_covariance. 225 See its docstring for details. 226 227 """ 228 if tol == -1: 229 tol = None 230 if not isinstance(alpha, (int, float)) or alpha < 0: 231 raise ValueError("Regularization parameter alpha must be a " 232 "positive number.\n" 233 "You provided: {0}".format(str(alpha))) 234 235 n_subjects = emp_covs.shape[-1] 236 n_features = emp_covs[0].shape[0] 237 n_samples = np.asarray(n_samples) 238 n_samples /= n_samples.sum() # essential for numerical stability 239 240 # Check diagonal normalization. 241 ones = np.ones(emp_covs.shape[0]) 242 for k in range(n_subjects): 243 if (abs(emp_covs[..., k].flat[::emp_covs.shape[0] + 1] - ones) 244 > 0.1).any(): 245 warnings.warn("input signals do not all have unit variance. This " 246 "can lead to numerical instability.") 247 break 248 249 if precisions_init is None: 250 # Fortran order make omega[..., k] contiguous, which is often useful. 251 omega = np.ndarray(shape=emp_covs.shape, dtype=np.float64, 252 order="F") 253 for k in range(n_subjects): 254 # Values on main diagonals are far from zero, because they 255 # are timeseries energy. 256 omega[..., k] = np.diag(1. / np.diag(emp_covs[..., k])) 257 else: 258 omega = precisions_init.copy() 259 260 # Preallocate arrays 261 y = np.ndarray(shape=(n_subjects, n_features - 1), dtype=np.float64) 262 u = np.ndarray(shape=(n_subjects, n_features - 1), dtype=np.float64) 263 y_1 = np.ndarray(shape=(n_subjects, n_features - 2), dtype=np.float64) 264 h_12 = np.ndarray(shape=(n_subjects, n_features - 2), dtype=np.float64) 265 q = np.ndarray(shape=(n_subjects,), dtype=np.float64) 266 aq = np.ndarray(shape=(n_subjects,), dtype=np.float64) # temp. array 267 c = np.ndarray(shape=(n_subjects,), dtype=np.float64) 268 W = np.ndarray(shape=(omega.shape[0] - 1, omega.shape[1] - 1, 269 omega.shape[2]), 270 dtype=np.float64, order="F") 271 W_inv = np.ndarray(shape=W.shape, dtype=np.float64, order="F") 272 273 # Auxiliary arrays. 274 v = np.ndarray((omega.shape[0] - 1,), dtype=np.float64) 275 h = np.ndarray((omega.shape[1] - 1,), dtype=np.float64) 276 277 # Optional. 278 tolerance_reached = False 279 max_norm = None 280 281 omega_old = np.empty_like(omega) 282 if probe_function is not None: 283 # iteration number -1 means called before iteration loop. 284 probe_function(emp_covs, n_samples, alpha, max_iter, tol, 285 -1, omega, None) 286 probe_interrupted = False 287 288 # Start optimization loop. Variables are named following (mostly) the 289 # Honorio-Samaras paper notations. 290 291 # Used in the innermost loop. Computed here to save some computation. 292 alpha2 = alpha ** 2 293 294 for n in range(max_iter): 295 if max_norm is not None: 296 suffix = (" variation (max norm): {max_norm:.3e} ".format( 297 max_norm=max_norm)) 298 else: 299 suffix = "" 300 if verbose > 1: 301 logger.log("* iteration {iter_n:d} ({percentage:.0f} %){suffix}" 302 " ...".format(iter_n=n, percentage=100. * n / max_iter, 303 suffix=suffix), verbose=verbose) 304 305 omega_old[...] = omega 306 for p in range(n_features): 307 308 if p == 0: 309 # Initial state: remove first col/row 310 W = omega[1:, 1:, :].copy() # stack of W(k) 311 W_inv = np.ndarray(shape=W.shape, dtype=np.float64) 312 for k in range(W.shape[2]): 313 # stack of W^-1(k) 314 W_inv[..., k] = scipy.linalg.inv(W[..., k]) 315 if debug: 316 np.testing.assert_almost_equal( 317 np.dot(W_inv[..., k], W[..., k]), 318 np.eye(W_inv[..., k].shape[0]), decimal=10) 319 _assert_submatrix(omega[..., k], W[..., k], p) 320 assert(is_spd(W_inv[..., k])) 321 else: 322 # Update W and W_inv 323 if debug: 324 omega_orig = omega.copy() 325 326 for k in range(n_subjects): 327 _update_submatrix(omega[..., k], 328 W[..., k], W_inv[..., k], p, h, v) 329 330 if debug: 331 _assert_submatrix(omega[..., k], W[..., k], p) 332 assert(is_spd(W_inv[..., k], decimal=14)) 333 np.testing.assert_almost_equal( 334 np.dot(W[..., k], W_inv[..., k]), 335 np.eye(W_inv[..., k].shape[0]), decimal=10) 336 if debug: 337 # Check that omega has not been modified. 338 np.testing.assert_almost_equal(omega_orig, omega) 339 340 # In the following lines, implicit loop on k (subjects) 341 # Extract y and u 342 y[:, :p] = omega[:p, p, :].T 343 y[:, p:] = omega[p + 1:, p, :].T 344 345 u[:, :p] = emp_covs[:p, p, :].T 346 u[:, p:] = emp_covs[p + 1:, p, :].T 347 348 for m in range(n_features - 1): 349 # Coordinate descent on y 350 351 # T(k) -> n_samples[k] 352 # v(k) -> emp_covs[p, p, k] 353 # h_22(k) -> W_inv[m, m, k] 354 # h_12(k) -> W_inv[:m, m, k], W_inv[m+1:, m, k] 355 # y_1(k) -> y[k, :m], y[k, m+1:] 356 # u_2(k) -> u[k, m] 357 h_12[:, :m] = W_inv[:m, m, :].T 358 h_12[:, m:] = W_inv[m + 1:, m, :].T 359 y_1[:, :m] = y[:, :m] 360 y_1[:, m:] = y[:, m + 1:] 361 362 c[:] = - n_samples * ( 363 emp_covs[p, p, :] * (h_12 * y_1).sum(axis=1) + u[:, m] 364 ) 365 c2 = np.sqrt(np.dot(c, c)) 366 367 # x -> y[:][m] 368 if c2 <= alpha: 369 y[:, m] = 0 # x* = 0 370 else: 371 # q(k) -> T(k) * v(k) * h_22(k) 372 # \lambda -> gamma (lambda is a Python keyword) 373 q[:] = n_samples * emp_covs[p, p, :] * W_inv[m, m, :] 374 if debug: 375 assert(np.all(q > 0)) 376 # x* = \lambda* diag(1 + \lambda q)^{-1} c 377 378 # Newton-Raphson loop. Loosely based on Scipy's. 379 # Tolerance does not seem to be important for numerical 380 # stability (tolerance of 1e-2 works) but has an effect on 381 # overall convergence rate (the tighter the better.) 382 383 gamma = 0. # initial value 384 # Precompute some quantities 385 cc = c * c 386 two_ccq = 2. * cc * q 387 for _ in itertools.repeat(None, 100): 388 # Function whose zero must be determined (fval) and 389 # its derivative (fder). 390 # Written inplace to save some function calls. 391 aq = 1. + gamma * q 392 aq2 = aq * aq 393 fder = (two_ccq / (aq2 * aq)).sum() 394 395 if fder == 0: 396 msg = "derivative was zero." 397 warnings.warn(msg, RuntimeWarning) 398 break 399 fval = - (alpha2 - (cc / aq2).sum()) / fder 400 gamma = fval + gamma 401 if abs(fval) < 1.5e-8: 402 break 403 404 if abs(fval) > 0.1: 405 warnings.warn("Newton-Raphson step did not converge.\n" 406 "This may indicate a badly conditioned " 407 "system.") 408 409 if debug: 410 assert gamma >= 0., gamma 411 y[:, m] = (gamma * c) / aq # x* 412 413 # Copy back y in omega (column and row) 414 omega[:p, p, :] = y[:, :p].T 415 omega[p + 1:, p, :] = y[:, p:].T 416 omega[p, :p, :] = y[:, :p].T 417 omega[p, p + 1:, :] = y[:, p:].T 418 419 for k in range(n_subjects): 420 omega[p, p, k] = 1. / emp_covs[p, p, k] + np.dot( 421 np.dot(y[k, :], W_inv[..., k]), y[k, :]) 422 423 if debug: 424 assert(is_spd(omega[..., k])) 425 426 if probe_function is not None: 427 if probe_function(emp_covs, n_samples, alpha, max_iter, tol, 428 n, omega, omega_old) is True: 429 probe_interrupted = True 430 logger.log("probe_function interrupted loop", verbose=verbose, 431 msg_level=2) 432 break 433 434 # Compute max of variation 435 omega_old -= omega 436 omega_old = abs(omega_old) 437 max_norm = omega_old.max() 438 439 if tol is not None and max_norm < tol: 440 logger.log("tolerance reached at iteration number {0:d}: {1:.3e}" 441 "".format(n + 1, max_norm), verbose=verbose) 442 tolerance_reached = True 443 break 444 445 if tol is not None and not tolerance_reached and not probe_interrupted: 446 warnings.warn("Maximum number of iterations reached without getting " 447 "to the requested tolerance level.") 448 449 return omega 450 451 452class GroupSparseCovariance(BaseEstimator, CacheMixin): 453 """Covariance and precision matrix estimator. 454 455 The model used has been introduced in :footcite:`Varoquaux2010a`, and the 456 algorithm used is based on what is described in :footcite:`Honorio2015`. 457 458 Parameters 459 ---------- 460 alpha : float, optional 461 regularization parameter. With normalized covariances matrices and 462 number of samples, sensible values lie in the [0, 1] range(zero is 463 no regularization: output is not sparse). 464 Default=0.1. 465 466 tol : positive float, optional 467 The tolerance to declare convergence: if the dual gap goes below 468 this value, iterations are stopped. Default=1e-3. 469 470 max_iter : int, optional 471 maximum number of iterations. The default value is rather 472 conservative. Default=10. 473 474 verbose : int, optional 475 verbosity level. Zero means "no message". Default=0. 476 477 memory : instance of joblib.Memory or string, optional 478 Used to cache the masking process. 479 By default, no caching is done. If a string is given, it is the 480 path to the caching directory. 481 482 memory_level : int, optional 483 Caching aggressiveness. Higher values mean more caching. 484 Default=0. 485 486 Attributes 487 ---------- 488 `covariances_` : numpy.ndarray, shape (n_features, n_features, n_subjects) 489 empirical covariance matrices. 490 491 `precisions_` : numpy.ndarraye, shape (n_features, n_features, n_subjects) 492 precisions matrices estimated using the group-sparse algorithm. 493 494 References 495 ---------- 496 .. footbibliography:: 497 498 """ 499 500 def __init__(self, alpha=0.1, tol=1e-3, max_iter=10, verbose=0, 501 memory=Memory(location=None), memory_level=0): 502 self.alpha = alpha 503 self.tol = tol 504 self.max_iter = max_iter 505 506 self.memory = memory 507 self.memory_level = memory_level 508 self.verbose = verbose 509 510 def fit(self, subjects, y=None): 511 """Fits the group sparse precision model according to the given 512 training data and parameters. 513 514 Parameters 515 ---------- 516 subjects : list of numpy.ndarray with shapes (n_samples, n_features) 517 input subjects. Each subject is a 2D array, whose columns contain 518 signals. Sample number can vary from subject to subject, but all 519 subjects must have the same number of features (i.e. of columns). 520 521 Returns 522 ------- 523 self : GroupSparseCovariance instance 524 the object itself. Useful for chaining operations. 525 526 """ 527 logger.log("Computing covariance matrices", verbose=self.verbose) 528 self.covariances_, n_samples = empirical_covariances( 529 subjects, assume_centered=False) 530 531 logger.log("Computing precision matrices", verbose=self.verbose) 532 ret = self._cache(_group_sparse_covariance)( 533 self.covariances_, n_samples, self.alpha, 534 tol=self.tol, max_iter=self.max_iter, 535 verbose=max(0, self.verbose - 1), debug=False) 536 537 self.precisions_ = ret 538 return self 539 540 541def empirical_covariances(subjects, assume_centered=False, standardize=False): 542 """Compute empirical covariances for several signals. 543 544 Parameters 545 ---------- 546 subjects : list of numpy.ndarray, shape for each (n_samples, n_features) 547 input subjects. Each subject is a 2D array, whose columns contain 548 signals. Sample number can vary from subject to subject, but all 549 subjects must have the same number of features (i.e. of columns). 550 551 assume_centered : bool, optional 552 if True, assume that all input signals are centered. This slightly 553 decreases computation time by avoiding useless computation. 554 Default=False. 555 556 standardize : bool, optional 557 if True, set every signal variance to one before computing their 558 covariance matrix (i.e. compute a correlation matrix). 559 Default=False. 560 561 Returns 562 ------- 563 emp_covs : numpy.ndarray, shape : (feature number, feature number, subject number) 564 empirical covariances. 565 566 n_samples : numpy.ndarray, shape: (subject number,) 567 number of samples for each subject. dtype is np.float64. 568 569 """ 570 if not hasattr(subjects, "__iter__"): 571 raise ValueError("'subjects' input argument must be an iterable. " 572 "You provided {0}".format(subjects.__class__)) 573 574 n_subjects = [s.shape[1] for s in subjects] 575 if len(set(n_subjects)) > 1: 576 raise ValueError("All subjects must have the same number of " 577 "features.\nYou provided: {0}".format(str(n_subjects)) 578 ) 579 n_subjects = len(subjects) 580 n_features = subjects[0].shape[1] 581 582 # Enable to change dtype here because depending on user, conversion from 583 # single precision to double will be required or not. 584 emp_covs = np.empty((n_features, n_features, n_subjects), order="F") 585 for k, s in enumerate(subjects): 586 if standardize: 587 s = s / s.std(axis=0) # copy on purpose 588 M = empirical_covariance(s, assume_centered=assume_centered) 589 590 # Force matrix symmetry, for numerical stability 591 # of _group_sparse_covariance 592 emp_covs[..., k] = M + M.T 593 emp_covs /= 2 594 595 n_samples = np.asarray([s.shape[0] for s in subjects], dtype=np.float64) 596 597 return emp_covs, n_samples 598 599 600def group_sparse_scores(precisions, n_samples, emp_covs, alpha, 601 duality_gap=False, debug=False): 602 """Compute scores used by group_sparse_covariance. 603 604 The log-likelihood of a given list of empirical covariances / 605 precisions. 606 607 Parameters 608 ---------- 609 precisions : numpy.ndarray, shape (n_features, n_features, n_subjects) 610 estimated precisions. 611 612 n_samples : array-like, shape (n_subjects,) 613 number of samples used in estimating each subject in "precisions". 614 n_samples.sum() must be equal to 1. 615 616 emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects) 617 empirical covariance matrix 618 619 alpha : float 620 regularization parameter 621 622 duality_gap : bool, optional 623 if True, also returns a duality gap upper bound. Default=False. 624 625 debug : bool, optional 626 if True, some consistency checks are performed to help solving 627 numerical problems. Default=False. 628 629 Returns 630 ------- 631 log_lik : float 632 log-likelihood of precisions on the given covariances. This is the 633 opposite of the loss function, without the regularization term 634 635 objective : float 636 value of objective function. This is the value minimized by 637 group_sparse_covariance() 638 639 duality_gap : float 640 duality gap upper bound. The returned bound is tight: it vanishes for 641 the optimal precision matrices 642 643 """ 644 n_features, _, n_subjects = emp_covs.shape 645 646 log_lik = 0 647 for k in range(n_subjects): 648 log_lik_k = - np.sum(emp_covs[..., k] * precisions[..., k]) 649 log_lik_k += fast_logdet(precisions[..., k]) 650 log_lik += n_samples[k] * log_lik_k 651 652 l2 = np.sqrt((precisions ** 2).sum(axis=-1)) 653 l12 = l2.sum() - np.diag(l2).sum() # Do not count diagonal terms 654 objective = alpha * l12 - log_lik 655 ret = (log_lik, objective) 656 657 # Compute duality gap if requested 658 if duality_gap is True: 659 A = np.empty(precisions.shape, dtype=np.float64, order="F") 660 for k in range(n_subjects): 661 # TODO: can be computed more efficiently using W_inv. See 662 # Friedman, Jerome, Trevor Hastie, and Robert Tibshirani. 663 # 'Sparse Inverse Covariance Estimation with the Graphical Lasso'. 664 # Biostatistics 9, no. 3 (1 July 2008): 432-441. 665 precisions_inv = scipy.linalg.inv(precisions[..., k]) 666 if debug: 667 assert is_spd(precisions_inv) 668 669 A[..., k] = n_samples[k] * (precisions_inv - emp_covs[..., k]) 670 671 if debug: 672 np.testing.assert_almost_equal(A[..., k], A[..., k].T) 673 674 # Project A on the set of feasible points 675 alpha_max = np.sqrt((A ** 2).sum(axis=-1)) 676 mask = alpha_max > alpha 677 for k in range(A.shape[-1]): 678 A[mask, k] *= alpha / alpha_max[mask] 679 # Set zeros on diagonals. Essential to get an always positive 680 # duality gap. 681 A[..., k].flat[::A.shape[0] + 1] = 0 682 683 dual_obj = 0 # dual objective 684 for k in range(n_subjects): 685 B = emp_covs[..., k] + A[..., k] / n_samples[k] 686 dual_obj += n_samples[k] * (n_features + fast_logdet(B)) 687 688 # The previous computation can lead to a non-feasible point, because 689 # one of the Bs may not be positive definite. 690 # Use another value in this case, that ensure positive definiteness 691 # of B. The upper bound on the duality gap is not tight in the 692 # following, but is smaller than infinity, which is better in any case. 693 if not np.isfinite(dual_obj): 694 for k in range(n_subjects): 695 A[..., k] = - n_samples[k] * emp_covs[..., k] 696 A[..., k].flat[::A.shape[0] + 1] = 0 697 alpha_max = np.sqrt((A ** 2).sum(axis=-1)).max() 698 # the second value (0.05 is arbitrary: positive in ]0,1[) 699 gamma = min((alpha / alpha_max, 0.05)) 700 dual_obj = 0 701 for k in range(n_subjects): 702 # add gamma on the diagonal 703 B = ((1. - gamma) * emp_covs[..., k] 704 + gamma * np.eye(emp_covs.shape[0])) 705 dual_obj += n_samples[k] * (n_features + fast_logdet(B)) 706 707 gap = objective - dual_obj 708 ret = ret + (gap,) 709 return ret 710 711 712def group_sparse_covariance_path(train_subjs, alphas, test_subjs=None, 713 tol=1e-3, max_iter=10, precisions_init=None, 714 verbose=0, debug=False, probe_function=None): 715 """Get estimated precision matrices for different values of alpha. 716 717 Calling this function is faster than calling group_sparse_covariance() 718 repeatedly, because it makes use of the first result to initialize the 719 next computation. 720 721 Parameters 722 ---------- 723 train_subjs : list of numpy.ndarray 724 list of signals. 725 726 alphas : list of float 727 values of alpha to use. Best results for sorted values (decreasing) 728 729 test_subjs : list of numpy.ndarray, optional 730 list of signals, independent from those in train_subjs, on which to 731 compute a score. If None, no score is computed. 732 733 verbose : int, optional 734 verbosity level. Default=0. 735 736 tol, max_iter, debug, precisions_init : 737 Passed to group_sparse_covariance(). See the corresponding docstring 738 for details. 739 740 probe_function : callable, optional 741 This value is called before the first iteration and after each 742 iteration. If it returns True, then optimization is stopped 743 prematurely. 744 The function is given as arguments (in that order): 745 746 - empirical covariances (ndarray), 747 - number of samples for each subject (ndarray), 748 - regularization parameter (float) 749 - maximum iteration number (integer) 750 - tolerance (float) 751 - current iteration number (integer). -1 means "before first iteration" 752 - current value of precisions (ndarray). 753 - previous value of precisions (ndarray). None before first iteration. 754 755 Returns 756 ------- 757 precisions_list : list of numpy.ndarray 758 estimated precisions for each value of alpha provided. The length of 759 this list is the same as that of parameter "alphas". 760 761 scores : list of float 762 for each estimated precision, score obtained on the test set. Output 763 only if test_subjs is not None. 764 765 """ 766 train_covs, train_n_samples = empirical_covariances( 767 train_subjs, assume_centered=False, standardize=True) 768 769 scores = [] 770 precisions_list = [] 771 for alpha in alphas: 772 precisions = _group_sparse_covariance( 773 train_covs, train_n_samples, alpha, tol=tol, max_iter=max_iter, 774 precisions_init=precisions_init, verbose=max(0, verbose - 1), 775 debug=debug, probe_function=probe_function) 776 777 # Compute log-likelihood 778 if test_subjs is not None: 779 test_covs, _ = empirical_covariances( 780 test_subjs, assume_centered=False, standardize=True) 781 scores.append(group_sparse_scores(precisions, train_n_samples, 782 test_covs, 0)[0]) 783 precisions_list.append(precisions) 784 precisions_init = precisions 785 786 if test_subjs is not None: 787 return precisions_list, scores 788 else: 789 return precisions_list 790 791 792class EarlyStopProbe(object): 793 """Callable probe for early stopping in GroupSparseCovarianceCV. 794 795 Stop optimizing as soon as the score on the test set starts decreasing. 796 An instance of this class is supposed to be passed in the probe_function 797 argument of group_sparse_covariance(). 798 799 """ 800 def __init__(self, test_subjs, verbose=0): 801 self.test_emp_covs, _ = empirical_covariances(test_subjs) 802 self.verbose = verbose 803 804 def __call__(self, emp_covs, n_samples, alpha, max_iter, tol, 805 iter_n, omega, prev_omega): 806 log_lik, _ = group_sparse_scores( 807 omega, n_samples, self.test_emp_covs, alpha) 808 if iter_n > -1 and self.last_log_lik > log_lik: 809 logger.log("Log-likelihood on test set is decreasing. " 810 "Stopping at iteration %d" 811 % iter_n, verbose=self.verbose) 812 return True 813 self.last_log_lik = log_lik 814 815 816class GroupSparseCovarianceCV(BaseEstimator, CacheMixin): 817 """Sparse inverse covariance w/ cross-validated choice of the parameter. 818 819 A cross-validated value for the regularization parameter is first 820 determined using several calls to group_sparse_covariance. Then a final 821 optimization is run to get a value for the precision matrices, using the 822 selected value of the parameter. Different values of tolerance and of 823 maximum iteration number can be used in these two phases (see the tol 824 and tol_cv keyword below for example). 825 826 Parameters 827 ---------- 828 alphas : integer, optional 829 initial number of points in the grid of regularization parameter 830 values. Each step of grid refinement adds that many points as well. 831 Default=4. 832 833 n_refinements : integer, optional 834 number of times the initial grid should be refined. Default=4. 835 836 cv : integer, optional 837 number of folds in a K-fold cross-validation scheme. If None is passed, 838 defaults to 3. 839 840 tol_cv : float, optional 841 tolerance used to get the optimal alpha value. It has the same meaning 842 as the `tol` parameter in :func:`group_sparse_covariance`. 843 Default=1e-2. 844 845 max_iter_cv : integer, optional 846 maximum number of iterations for each optimization, during the alpha- 847 selection phase. Default=50. 848 849 tol : float, optional 850 tolerance used during the final optimization for determining precision 851 matrices value. Default=1e-3. 852 853 max_iter : integer, optional 854 maximum number of iterations in the final optimization. Default=100. 855 856 verbose : integer, optional 857 verbosity level. 0 means nothing is printed to the user. Default=0. 858 859 n_jobs : integer, optional 860 maximum number of cpu cores to use. The number of cores actually used 861 at the same time cannot exceed the number of folds in folding strategy 862 (that is, the value of cv). Default=1. 863 864 debug : bool, optional 865 if True, activates some internal checks for consistency. Only useful 866 for nilearn developers, not users. Default=False. 867 868 early_stopping : bool, optional 869 if True, reduce computation time by using a heuristic to reduce the 870 number of iterations required to get the optimal value for alpha. Be 871 aware that this can lead to slightly different values for the optimal 872 alpha compared to early_stopping=False. Default=True. 873 874 Attributes 875 ---------- 876 `covariances_` : numpy.ndarray, shape (n_features, n_features, n_subjects) 877 covariance matrices, one per subject. 878 879 `precisions_` : numpy.ndarray, shape (n_features, n_features, n_subjects) 880 precision matrices, one per subject. All matrices have the same 881 sparsity pattern (if a coefficient is zero for a given matrix, it 882 is also zero for every other.) 883 884 `alpha_` : float 885 penalization parameter value selected. 886 887 `cv_alphas_` : list of floats 888 all values of the penalization parameter explored. 889 890 `cv_scores_` : numpy.ndarray, shape (n_alphas, n_folds) 891 scores obtained on test set for each value of the penalization 892 parameter explored. 893 894 See also 895 -------- 896 GroupSparseCovariance, 897 sklearn.covariance.GraphicalLassoCV 898 899 Notes 900 ----- 901 The search for the optimal penalization parameter (alpha) is done on an 902 iteratively refined grid: first the cross-validated scores on a grid are 903 computed, then a new refined grid is centered around the maximum, and so 904 on. 905 906 """ 907 def __init__(self, alphas=4, n_refinements=4, cv=None, 908 tol_cv=1e-2, max_iter_cv=50, 909 tol=1e-3, max_iter=100, verbose=0, 910 n_jobs=1, debug=False, early_stopping=True): 911 self.alphas = alphas 912 self.n_refinements = n_refinements 913 self.tol_cv = tol_cv 914 self.max_iter_cv = max_iter_cv 915 self.cv = cv 916 self.tol = tol 917 self.max_iter = max_iter 918 919 self.verbose = verbose 920 self.n_jobs = n_jobs 921 self.debug = debug 922 self.early_stopping = early_stopping 923 924 def fit(self, subjects, y=None): 925 """Compute cross-validated group-sparse precisions. 926 927 Parameters 928 ---------- 929 subjects : list of numpy.ndarray with shapes (n_samples, n_features) 930 input subjects. Each subject is a 2D array, whose columns contain 931 signals. Sample number can vary from subject to subject, but all 932 subjects must have the same number of features (i.e. of columns.) 933 934 Returns 935 ------- 936 self : GroupSparseCovarianceCV 937 the object instance itself. 938 939 """ 940 # Empirical covariances 941 emp_covs, n_samples = \ 942 empirical_covariances(subjects, assume_centered=False) 943 n_subjects = emp_covs.shape[2] 944 945 # One cv generator per subject must be created, because each subject 946 # can have a different number of samples from the others. 947 cv = [] 948 for k in range(n_subjects): 949 cv.append(check_cv( 950 self.cv, np.ones(subjects[k].shape[0]), 951 classifier=False 952 ).split(subjects[k]) 953 ) 954 path = list() # List of (alpha, scores, covs) 955 n_alphas = self.alphas 956 957 if isinstance(n_alphas, collections.abc.Sequence): 958 alphas = list(self.alphas) 959 n_refinements = 1 960 else: 961 n_refinements = self.n_refinements 962 alpha_1, _ = compute_alpha_max(emp_covs, n_samples) 963 alpha_0 = 1e-2 * alpha_1 964 alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1), 965 n_alphas)[::-1] 966 967 covs_init = itertools.repeat(None) 968 969 # Copying the cv generators to use them n_refinements times. 970 cv_ = zip(*cv) 971 972 for i, (this_cv) in enumerate(itertools.tee(cv_, n_refinements)): 973 # Compute the cross-validated loss on the current grid 974 train_test_subjs = [] 975 for train_test in this_cv: 976 assert(len(train_test) == n_subjects) 977 train_test_subjs.append(list(zip(*[(subject[train, :], 978 subject[test, :]) 979 for subject, (train, test) 980 in zip(subjects, train_test)]))) 981 if self.early_stopping: 982 probes = [EarlyStopProbe(test_subjs, 983 verbose=max(0, self.verbose - 1)) 984 for _, test_subjs in train_test_subjs] 985 else: 986 probes = itertools.repeat(None) 987 988 this_path = Parallel(n_jobs=self.n_jobs, 989 verbose=self.verbose)( 990 delayed(group_sparse_covariance_path)( 991 train_subjs, alphas, test_subjs=test_subjs, 992 max_iter=self.max_iter_cv, tol=self.tol_cv, 993 verbose=max(0, self.verbose - 1), debug=self.debug, 994 # Warm restart is useless with early stopping. 995 precisions_init=None if self.early_stopping else prec_init, 996 probe_function=probe) 997 for (train_subjs, test_subjs), prec_init, probe 998 in zip(train_test_subjs, covs_init, probes)) 999 1000 # this_path[i] is a tuple (precisions_list, scores) 1001 # - scores: scores obtained with the i-th folding, for each value 1002 # of alpha. 1003 # - precisions_list: corresponding precisions matrices, for each 1004 # value of alpha. 1005 precisions_list, scores = list(zip(*this_path)) 1006 # now scores[i][j] is the score for the i-th folding, j-th value of 1007 # alpha (analogous for precisions_list) 1008 precisions_list = list(zip(*precisions_list)) 1009 scores = [np.mean(sc) for sc in zip(*scores)] 1010 # scores[i] is the mean score obtained for the i-th value of alpha. 1011 1012 path.extend(list(zip(alphas, scores, precisions_list))) 1013 path = sorted(path, key=operator.itemgetter(0), reverse=True) 1014 1015 # Find the maximum score (avoid using the built-in 'max' function 1016 # to have a fully-reproducible selection of the smallest alpha in 1017 # case of equality) 1018 best_score = -np.inf 1019 last_finite_idx = 0 1020 for index, (alpha, this_score, _) in enumerate(path): 1021 if this_score >= .1 / np.finfo(np.float64).eps: 1022 this_score = np.nan 1023 if np.isfinite(this_score): 1024 last_finite_idx = index 1025 if this_score >= best_score: 1026 best_score = this_score 1027 best_index = index 1028 1029 # Refine the grid 1030 if best_index == 0: 1031 # We do not need to go back: we have chosen 1032 # the highest value of alpha for which there are 1033 # non-zero coefficients 1034 alpha_1 = path[0][0] 1035 alpha_0 = path[1][0] 1036 covs_init = path[0][2] 1037 elif (best_index == last_finite_idx 1038 and not best_index == len(path) - 1): 1039 # We have non-converged models on the upper bound of the 1040 # grid, we need to refine the grid there 1041 alpha_1 = path[best_index][0] 1042 alpha_0 = path[best_index + 1][0] 1043 covs_init = path[best_index][2] 1044 elif best_index == len(path) - 1: 1045 alpha_1 = path[best_index][0] 1046 alpha_0 = 0.01 * path[best_index][0] 1047 covs_init = path[best_index][2] 1048 else: 1049 alpha_1 = path[best_index - 1][0] 1050 alpha_0 = path[best_index + 1][0] 1051 covs_init = path[best_index - 1][2] 1052 alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 1053 len(alphas) + 2) 1054 alphas = alphas[1:-1] 1055 if n_refinements > 1: 1056 logger.log("[GroupSparseCovarianceCV] Done refinement " 1057 "% 2i out of %i" % (i + 1, n_refinements), 1058 verbose=self.verbose) 1059 1060 path = list(zip(*path)) 1061 cv_scores_ = list(path[1]) 1062 alphas = list(path[0]) 1063 1064 self.cv_scores_ = np.array(cv_scores_) 1065 self.alpha_ = alphas[best_index] 1066 self.cv_alphas_ = alphas 1067 1068 # Finally, fit the model with the selected alpha 1069 logger.log("Final optimization", verbose=self.verbose) 1070 self.covariances_ = emp_covs 1071 self.precisions_ = _group_sparse_covariance( 1072 emp_covs, n_samples, self.alpha_, tol=self.tol, 1073 max_iter=self.max_iter, 1074 verbose=max(0, self.verbose - 1), debug=self.debug) 1075 return self 1076