1"""Sparse block 1-norm estimator. 2""" 3 4import numpy as np 5from scipy.sparse.linalg import aslinearoperator 6 7 8__all__ = ['onenormest'] 9 10 11def onenormest(A, t=2, itmax=5, compute_v=False, compute_w=False): 12 """ 13 Compute a lower bound of the 1-norm of a sparse matrix. 14 15 Parameters 16 ---------- 17 A : ndarray or other linear operator 18 A linear operator that can be transposed and that can 19 produce matrix products. 20 t : int, optional 21 A positive parameter controlling the tradeoff between 22 accuracy versus time and memory usage. 23 Larger values take longer and use more memory 24 but give more accurate output. 25 itmax : int, optional 26 Use at most this many iterations. 27 compute_v : bool, optional 28 Request a norm-maximizing linear operator input vector if True. 29 compute_w : bool, optional 30 Request a norm-maximizing linear operator output vector if True. 31 32 Returns 33 ------- 34 est : float 35 An underestimate of the 1-norm of the sparse matrix. 36 v : ndarray, optional 37 The vector such that ||Av||_1 == est*||v||_1. 38 It can be thought of as an input to the linear operator 39 that gives an output with particularly large norm. 40 w : ndarray, optional 41 The vector Av which has relatively large 1-norm. 42 It can be thought of as an output of the linear operator 43 that is relatively large in norm compared to the input. 44 45 Notes 46 ----- 47 This is algorithm 2.4 of [1]. 48 49 In [2] it is described as follows. 50 "This algorithm typically requires the evaluation of 51 about 4t matrix-vector products and almost invariably 52 produces a norm estimate (which is, in fact, a lower 53 bound on the norm) correct to within a factor 3." 54 55 .. versionadded:: 0.13.0 56 57 References 58 ---------- 59 .. [1] Nicholas J. Higham and Francoise Tisseur (2000), 60 "A Block Algorithm for Matrix 1-Norm Estimation, 61 with an Application to 1-Norm Pseudospectra." 62 SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201. 63 64 .. [2] Awad H. Al-Mohy and Nicholas J. Higham (2009), 65 "A new scaling and squaring algorithm for the matrix exponential." 66 SIAM J. Matrix Anal. Appl. Vol. 31, No. 3, pp. 970-989. 67 68 Examples 69 -------- 70 >>> from scipy.sparse import csc_matrix 71 >>> from scipy.sparse.linalg import onenormest 72 >>> A = csc_matrix([[1., 0., 0.], [5., 8., 2.], [0., -1., 0.]], dtype=float) 73 >>> A.todense() 74 matrix([[ 1., 0., 0.], 75 [ 5., 8., 2.], 76 [ 0., -1., 0.]]) 77 >>> onenormest(A) 78 9.0 79 >>> np.linalg.norm(A.todense(), ord=1) 80 9.0 81 """ 82 83 # Check the input. 84 A = aslinearoperator(A) 85 if A.shape[0] != A.shape[1]: 86 raise ValueError('expected the operator to act like a square matrix') 87 88 # If the operator size is small compared to t, 89 # then it is easier to compute the exact norm. 90 # Otherwise estimate the norm. 91 n = A.shape[1] 92 if t >= n: 93 A_explicit = np.asarray(aslinearoperator(A).matmat(np.identity(n))) 94 if A_explicit.shape != (n, n): 95 raise Exception('internal error: ', 96 'unexpected shape ' + str(A_explicit.shape)) 97 col_abs_sums = abs(A_explicit).sum(axis=0) 98 if col_abs_sums.shape != (n, ): 99 raise Exception('internal error: ', 100 'unexpected shape ' + str(col_abs_sums.shape)) 101 argmax_j = np.argmax(col_abs_sums) 102 v = elementary_vector(n, argmax_j) 103 w = A_explicit[:, argmax_j] 104 est = col_abs_sums[argmax_j] 105 else: 106 est, v, w, nmults, nresamples = _onenormest_core(A, A.H, t, itmax) 107 108 # Report the norm estimate along with some certificates of the estimate. 109 if compute_v or compute_w: 110 result = (est,) 111 if compute_v: 112 result += (v,) 113 if compute_w: 114 result += (w,) 115 return result 116 else: 117 return est 118 119 120def _blocked_elementwise(func): 121 """ 122 Decorator for an elementwise function, to apply it blockwise along 123 first dimension, to avoid excessive memory usage in temporaries. 124 """ 125 block_size = 2**20 126 127 def wrapper(x): 128 if x.shape[0] < block_size: 129 return func(x) 130 else: 131 y0 = func(x[:block_size]) 132 y = np.zeros((x.shape[0],) + y0.shape[1:], dtype=y0.dtype) 133 y[:block_size] = y0 134 del y0 135 for j in range(block_size, x.shape[0], block_size): 136 y[j:j+block_size] = func(x[j:j+block_size]) 137 return y 138 return wrapper 139 140 141@_blocked_elementwise 142def sign_round_up(X): 143 """ 144 This should do the right thing for both real and complex matrices. 145 146 From Higham and Tisseur: 147 "Everything in this section remains valid for complex matrices 148 provided that sign(A) is redefined as the matrix (aij / |aij|) 149 (and sign(0) = 1) transposes are replaced by conjugate transposes." 150 151 """ 152 Y = X.copy() 153 Y[Y == 0] = 1 154 Y /= np.abs(Y) 155 return Y 156 157 158@_blocked_elementwise 159def _max_abs_axis1(X): 160 return np.max(np.abs(X), axis=1) 161 162 163def _sum_abs_axis0(X): 164 block_size = 2**20 165 r = None 166 for j in range(0, X.shape[0], block_size): 167 y = np.sum(np.abs(X[j:j+block_size]), axis=0) 168 if r is None: 169 r = y 170 else: 171 r += y 172 return r 173 174 175def elementary_vector(n, i): 176 v = np.zeros(n, dtype=float) 177 v[i] = 1 178 return v 179 180 181def vectors_are_parallel(v, w): 182 # Columns are considered parallel when they are equal or negative. 183 # Entries are required to be in {-1, 1}, 184 # which guarantees that the magnitudes of the vectors are identical. 185 if v.ndim != 1 or v.shape != w.shape: 186 raise ValueError('expected conformant vectors with entries in {-1,1}') 187 n = v.shape[0] 188 return np.dot(v, w) == n 189 190 191def every_col_of_X_is_parallel_to_a_col_of_Y(X, Y): 192 for v in X.T: 193 if not any(vectors_are_parallel(v, w) for w in Y.T): 194 return False 195 return True 196 197 198def column_needs_resampling(i, X, Y=None): 199 # column i of X needs resampling if either 200 # it is parallel to a previous column of X or 201 # it is parallel to a column of Y 202 n, t = X.shape 203 v = X[:, i] 204 if any(vectors_are_parallel(v, X[:, j]) for j in range(i)): 205 return True 206 if Y is not None: 207 if any(vectors_are_parallel(v, w) for w in Y.T): 208 return True 209 return False 210 211 212def resample_column(i, X): 213 X[:, i] = np.random.randint(0, 2, size=X.shape[0])*2 - 1 214 215 216def less_than_or_close(a, b): 217 return np.allclose(a, b) or (a < b) 218 219 220def _algorithm_2_2(A, AT, t): 221 """ 222 This is Algorithm 2.2. 223 224 Parameters 225 ---------- 226 A : ndarray or other linear operator 227 A linear operator that can produce matrix products. 228 AT : ndarray or other linear operator 229 The transpose of A. 230 t : int, optional 231 A positive parameter controlling the tradeoff between 232 accuracy versus time and memory usage. 233 234 Returns 235 ------- 236 g : sequence 237 A non-negative decreasing vector 238 such that g[j] is a lower bound for the 1-norm 239 of the column of A of jth largest 1-norm. 240 The first entry of this vector is therefore a lower bound 241 on the 1-norm of the linear operator A. 242 This sequence has length t. 243 ind : sequence 244 The ith entry of ind is the index of the column A whose 1-norm 245 is given by g[i]. 246 This sequence of indices has length t, and its entries are 247 chosen from range(n), possibly with repetition, 248 where n is the order of the operator A. 249 250 Notes 251 ----- 252 This algorithm is mainly for testing. 253 It uses the 'ind' array in a way that is similar to 254 its usage in algorithm 2.4. This algorithm 2.2 may be easier to test, 255 so it gives a chance of uncovering bugs related to indexing 256 which could have propagated less noticeably to algorithm 2.4. 257 258 """ 259 A_linear_operator = aslinearoperator(A) 260 AT_linear_operator = aslinearoperator(AT) 261 n = A_linear_operator.shape[0] 262 263 # Initialize the X block with columns of unit 1-norm. 264 X = np.ones((n, t)) 265 if t > 1: 266 X[:, 1:] = np.random.randint(0, 2, size=(n, t-1))*2 - 1 267 X /= float(n) 268 269 # Iteratively improve the lower bounds. 270 # Track extra things, to assert invariants for debugging. 271 g_prev = None 272 h_prev = None 273 k = 1 274 ind = range(t) 275 while True: 276 Y = np.asarray(A_linear_operator.matmat(X)) 277 g = _sum_abs_axis0(Y) 278 best_j = np.argmax(g) 279 g.sort() 280 g = g[::-1] 281 S = sign_round_up(Y) 282 Z = np.asarray(AT_linear_operator.matmat(S)) 283 h = _max_abs_axis1(Z) 284 285 # If this algorithm runs for fewer than two iterations, 286 # then its return values do not have the properties indicated 287 # in the description of the algorithm. 288 # In particular, the entries of g are not 1-norms of any 289 # column of A until the second iteration. 290 # Therefore we will require the algorithm to run for at least 291 # two iterations, even though this requirement is not stated 292 # in the description of the algorithm. 293 if k >= 2: 294 if less_than_or_close(max(h), np.dot(Z[:, best_j], X[:, best_j])): 295 break 296 ind = np.argsort(h)[::-1][:t] 297 h = h[ind] 298 for j in range(t): 299 X[:, j] = elementary_vector(n, ind[j]) 300 301 # Check invariant (2.2). 302 if k >= 2: 303 if not less_than_or_close(g_prev[0], h_prev[0]): 304 raise Exception('invariant (2.2) is violated') 305 if not less_than_or_close(h_prev[0], g[0]): 306 raise Exception('invariant (2.2) is violated') 307 308 # Check invariant (2.3). 309 if k >= 3: 310 for j in range(t): 311 if not less_than_or_close(g[j], g_prev[j]): 312 raise Exception('invariant (2.3) is violated') 313 314 # Update for the next iteration. 315 g_prev = g 316 h_prev = h 317 k += 1 318 319 # Return the lower bounds and the corresponding column indices. 320 return g, ind 321 322 323def _onenormest_core(A, AT, t, itmax): 324 """ 325 Compute a lower bound of the 1-norm of a sparse matrix. 326 327 Parameters 328 ---------- 329 A : ndarray or other linear operator 330 A linear operator that can produce matrix products. 331 AT : ndarray or other linear operator 332 The transpose of A. 333 t : int, optional 334 A positive parameter controlling the tradeoff between 335 accuracy versus time and memory usage. 336 itmax : int, optional 337 Use at most this many iterations. 338 339 Returns 340 ------- 341 est : float 342 An underestimate of the 1-norm of the sparse matrix. 343 v : ndarray, optional 344 The vector such that ||Av||_1 == est*||v||_1. 345 It can be thought of as an input to the linear operator 346 that gives an output with particularly large norm. 347 w : ndarray, optional 348 The vector Av which has relatively large 1-norm. 349 It can be thought of as an output of the linear operator 350 that is relatively large in norm compared to the input. 351 nmults : int, optional 352 The number of matrix products that were computed. 353 nresamples : int, optional 354 The number of times a parallel column was observed, 355 necessitating a re-randomization of the column. 356 357 Notes 358 ----- 359 This is algorithm 2.4. 360 361 """ 362 # This function is a more or less direct translation 363 # of Algorithm 2.4 from the Higham and Tisseur (2000) paper. 364 A_linear_operator = aslinearoperator(A) 365 AT_linear_operator = aslinearoperator(AT) 366 if itmax < 2: 367 raise ValueError('at least two iterations are required') 368 if t < 1: 369 raise ValueError('at least one column is required') 370 n = A.shape[0] 371 if t >= n: 372 raise ValueError('t should be smaller than the order of A') 373 # Track the number of big*small matrix multiplications 374 # and the number of resamplings. 375 nmults = 0 376 nresamples = 0 377 # "We now explain our choice of starting matrix. We take the first 378 # column of X to be the vector of 1s [...] This has the advantage that 379 # for a matrix with nonnegative elements the algorithm converges 380 # with an exact estimate on the second iteration, and such matrices 381 # arise in applications [...]" 382 X = np.ones((n, t), dtype=float) 383 # "The remaining columns are chosen as rand{-1,1}, 384 # with a check for and correction of parallel columns, 385 # exactly as for S in the body of the algorithm." 386 if t > 1: 387 for i in range(1, t): 388 # These are technically initial samples, not resamples, 389 # so the resampling count is not incremented. 390 resample_column(i, X) 391 for i in range(t): 392 while column_needs_resampling(i, X): 393 resample_column(i, X) 394 nresamples += 1 395 # "Choose starting matrix X with columns of unit 1-norm." 396 X /= float(n) 397 # "indices of used unit vectors e_j" 398 ind_hist = np.zeros(0, dtype=np.intp) 399 est_old = 0 400 S = np.zeros((n, t), dtype=float) 401 k = 1 402 ind = None 403 while True: 404 Y = np.asarray(A_linear_operator.matmat(X)) 405 nmults += 1 406 mags = _sum_abs_axis0(Y) 407 est = np.max(mags) 408 best_j = np.argmax(mags) 409 if est > est_old or k == 2: 410 if k >= 2: 411 ind_best = ind[best_j] 412 w = Y[:, best_j] 413 # (1) 414 if k >= 2 and est <= est_old: 415 est = est_old 416 break 417 est_old = est 418 S_old = S 419 if k > itmax: 420 break 421 S = sign_round_up(Y) 422 del Y 423 # (2) 424 if every_col_of_X_is_parallel_to_a_col_of_Y(S, S_old): 425 break 426 if t > 1: 427 # "Ensure that no column of S is parallel to another column of S 428 # or to a column of S_old by replacing columns of S by rand{-1,1}." 429 for i in range(t): 430 while column_needs_resampling(i, S, S_old): 431 resample_column(i, S) 432 nresamples += 1 433 del S_old 434 # (3) 435 Z = np.asarray(AT_linear_operator.matmat(S)) 436 nmults += 1 437 h = _max_abs_axis1(Z) 438 del Z 439 # (4) 440 if k >= 2 and max(h) == h[ind_best]: 441 break 442 # "Sort h so that h_first >= ... >= h_last 443 # and re-order ind correspondingly." 444 # 445 # Later on, we will need at most t+len(ind_hist) largest 446 # entries, so drop the rest 447 ind = np.argsort(h)[::-1][:t+len(ind_hist)].copy() 448 del h 449 if t > 1: 450 # (5) 451 # Break if the most promising t vectors have been visited already. 452 if np.in1d(ind[:t], ind_hist).all(): 453 break 454 # Put the most promising unvisited vectors at the front of the list 455 # and put the visited vectors at the end of the list. 456 # Preserve the order of the indices induced by the ordering of h. 457 seen = np.in1d(ind, ind_hist) 458 ind = np.concatenate((ind[~seen], ind[seen])) 459 for j in range(t): 460 X[:, j] = elementary_vector(n, ind[j]) 461 462 new_ind = ind[:t][~np.in1d(ind[:t], ind_hist)] 463 ind_hist = np.concatenate((ind_hist, new_ind)) 464 k += 1 465 v = elementary_vector(n, ind_best) 466 return est, v, w, nmults, nresamples 467