1"""Sparse DIAgonal format""" 2 3__docformat__ = "restructuredtext en" 4 5__all__ = ['dia_matrix', 'isspmatrix_dia'] 6 7import numpy as np 8 9from .base import isspmatrix, _formats, spmatrix 10from .data import _data_matrix 11from .sputils import (isshape, upcast_char, getdtype, get_index_dtype, 12 get_sum_dtype, validateaxis, check_shape, matrix) 13from ._sparsetools import dia_matvec 14 15 16class dia_matrix(_data_matrix): 17 """Sparse matrix with DIAgonal storage 18 19 This can be instantiated in several ways: 20 dia_matrix(D) 21 with a dense matrix 22 23 dia_matrix(S) 24 with another sparse matrix S (equivalent to S.todia()) 25 26 dia_matrix((M, N), [dtype]) 27 to construct an empty matrix with shape (M, N), 28 dtype is optional, defaulting to dtype='d'. 29 30 dia_matrix((data, offsets), shape=(M, N)) 31 where the ``data[k,:]`` stores the diagonal entries for 32 diagonal ``offsets[k]`` (See example below) 33 34 Attributes 35 ---------- 36 dtype : dtype 37 Data type of the matrix 38 shape : 2-tuple 39 Shape of the matrix 40 ndim : int 41 Number of dimensions (this is always 2) 42 nnz 43 Number of stored values, including explicit zeros 44 data 45 DIA format data array of the matrix 46 offsets 47 DIA format offset array of the matrix 48 49 Notes 50 ----- 51 52 Sparse matrices can be used in arithmetic operations: they support 53 addition, subtraction, multiplication, division, and matrix power. 54 55 Examples 56 -------- 57 58 >>> import numpy as np 59 >>> from scipy.sparse import dia_matrix 60 >>> dia_matrix((3, 4), dtype=np.int8).toarray() 61 array([[0, 0, 0, 0], 62 [0, 0, 0, 0], 63 [0, 0, 0, 0]], dtype=int8) 64 65 >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) 66 >>> offsets = np.array([0, -1, 2]) 67 >>> dia_matrix((data, offsets), shape=(4, 4)).toarray() 68 array([[1, 0, 3, 0], 69 [1, 2, 0, 4], 70 [0, 2, 3, 0], 71 [0, 0, 3, 4]]) 72 73 >>> from scipy.sparse import dia_matrix 74 >>> n = 10 75 >>> ex = np.ones(n) 76 >>> data = np.array([ex, 2 * ex, ex]) 77 >>> offsets = np.array([-1, 0, 1]) 78 >>> dia_matrix((data, offsets), shape=(n, n)).toarray() 79 array([[2., 1., 0., ..., 0., 0., 0.], 80 [1., 2., 1., ..., 0., 0., 0.], 81 [0., 1., 2., ..., 0., 0., 0.], 82 ..., 83 [0., 0., 0., ..., 2., 1., 0.], 84 [0., 0., 0., ..., 1., 2., 1.], 85 [0., 0., 0., ..., 0., 1., 2.]]) 86 """ 87 format = 'dia' 88 89 def __init__(self, arg1, shape=None, dtype=None, copy=False): 90 _data_matrix.__init__(self) 91 92 if isspmatrix_dia(arg1): 93 if copy: 94 arg1 = arg1.copy() 95 self.data = arg1.data 96 self.offsets = arg1.offsets 97 self._shape = check_shape(arg1.shape) 98 elif isspmatrix(arg1): 99 if isspmatrix_dia(arg1) and copy: 100 A = arg1.copy() 101 else: 102 A = arg1.todia() 103 self.data = A.data 104 self.offsets = A.offsets 105 self._shape = check_shape(A.shape) 106 elif isinstance(arg1, tuple): 107 if isshape(arg1): 108 # It's a tuple of matrix dimensions (M, N) 109 # create empty matrix 110 self._shape = check_shape(arg1) 111 self.data = np.zeros((0,0), getdtype(dtype, default=float)) 112 idx_dtype = get_index_dtype(maxval=max(self.shape)) 113 self.offsets = np.zeros((0), dtype=idx_dtype) 114 else: 115 try: 116 # Try interpreting it as (data, offsets) 117 data, offsets = arg1 118 except Exception as e: 119 raise ValueError('unrecognized form for dia_matrix constructor') from e 120 else: 121 if shape is None: 122 raise ValueError('expected a shape argument') 123 self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy)) 124 self.offsets = np.atleast_1d(np.array(arg1[1], 125 dtype=get_index_dtype(maxval=max(shape)), 126 copy=copy)) 127 self._shape = check_shape(shape) 128 else: 129 #must be dense, convert to COO first, then to DIA 130 try: 131 arg1 = np.asarray(arg1) 132 except Exception as e: 133 raise ValueError("unrecognized form for" 134 " %s_matrix constructor" % self.format) from e 135 from .coo import coo_matrix 136 A = coo_matrix(arg1, dtype=dtype, shape=shape).todia() 137 self.data = A.data 138 self.offsets = A.offsets 139 self._shape = check_shape(A.shape) 140 141 if dtype is not None: 142 self.data = self.data.astype(dtype) 143 144 #check format 145 if self.offsets.ndim != 1: 146 raise ValueError('offsets array must have rank 1') 147 148 if self.data.ndim != 2: 149 raise ValueError('data array must have rank 2') 150 151 if self.data.shape[0] != len(self.offsets): 152 raise ValueError('number of diagonals (%d) ' 153 'does not match the number of offsets (%d)' 154 % (self.data.shape[0], len(self.offsets))) 155 156 if len(np.unique(self.offsets)) != len(self.offsets): 157 raise ValueError('offset array contains duplicate values') 158 159 def __repr__(self): 160 format = _formats[self.getformat()][1] 161 return "<%dx%d sparse matrix of type '%s'\n" \ 162 "\twith %d stored elements (%d diagonals) in %s format>" % \ 163 (self.shape + (self.dtype.type, self.nnz, self.data.shape[0], 164 format)) 165 166 def _data_mask(self): 167 """Returns a mask of the same shape as self.data, where 168 mask[i,j] is True when data[i,j] corresponds to a stored element.""" 169 num_rows, num_cols = self.shape 170 offset_inds = np.arange(self.data.shape[1]) 171 row = offset_inds - self.offsets[:,None] 172 mask = (row >= 0) 173 mask &= (row < num_rows) 174 mask &= (offset_inds < num_cols) 175 return mask 176 177 def count_nonzero(self): 178 mask = self._data_mask() 179 return np.count_nonzero(self.data[mask]) 180 181 def getnnz(self, axis=None): 182 if axis is not None: 183 raise NotImplementedError("getnnz over an axis is not implemented " 184 "for DIA format") 185 M,N = self.shape 186 nnz = 0 187 for k in self.offsets: 188 if k > 0: 189 nnz += min(M,N-k) 190 else: 191 nnz += min(M+k,N) 192 return int(nnz) 193 194 getnnz.__doc__ = spmatrix.getnnz.__doc__ 195 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__ 196 197 def sum(self, axis=None, dtype=None, out=None): 198 validateaxis(axis) 199 200 if axis is not None and axis < 0: 201 axis += 2 202 203 res_dtype = get_sum_dtype(self.dtype) 204 num_rows, num_cols = self.shape 205 ret = None 206 207 if axis == 0: 208 mask = self._data_mask() 209 x = (self.data * mask).sum(axis=0) 210 if x.shape[0] == num_cols: 211 res = x 212 else: 213 res = np.zeros(num_cols, dtype=x.dtype) 214 res[:x.shape[0]] = x 215 ret = matrix(res, dtype=res_dtype) 216 217 else: 218 row_sums = np.zeros(num_rows, dtype=res_dtype) 219 one = np.ones(num_cols, dtype=res_dtype) 220 dia_matvec(num_rows, num_cols, len(self.offsets), 221 self.data.shape[1], self.offsets, self.data, one, row_sums) 222 223 row_sums = matrix(row_sums) 224 225 if axis is None: 226 return row_sums.sum(dtype=dtype, out=out) 227 228 if axis is not None: 229 row_sums = row_sums.T 230 231 ret = matrix(row_sums.sum(axis=axis)) 232 233 if out is not None and out.shape != ret.shape: 234 raise ValueError("dimensions do not match") 235 236 return ret.sum(axis=(), dtype=dtype, out=out) 237 238 sum.__doc__ = spmatrix.sum.__doc__ 239 240 def _add_sparse(self, other): 241 242 # Check if other is also of type dia_matrix 243 if not isinstance(other, type(self)): 244 # If other is not of type dia_matrix, default to 245 # converting to csr_matrix, as is done in the _add_sparse 246 # method of parent class spmatrix 247 return self.tocsr()._add_sparse(other) 248 249 # The task is to compute m = self + other 250 # Start by making a copy of self, of the datatype 251 # that should result from adding self and other 252 dtype = np.promote_types(self.dtype, other.dtype) 253 m = self.astype(dtype, copy=True) 254 255 # Then, add all the stored diagonals of other. 256 for d in other.offsets: 257 # Check if the diagonal has already been added. 258 if d in m.offsets: 259 # If the diagonal is already there, we need to take 260 # the sum of the existing and the new 261 m.setdiag(m.diagonal(d) + other.diagonal(d), d) 262 else: 263 m.setdiag(other.diagonal(d), d) 264 return m 265 266 def _mul_vector(self, other): 267 x = other 268 269 y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, 270 x.dtype.char)) 271 272 L = self.data.shape[1] 273 274 M,N = self.shape 275 276 dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel()) 277 278 return y 279 280 def _mul_multimatrix(self, other): 281 return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T]) 282 283 def _setdiag(self, values, k=0): 284 M, N = self.shape 285 286 if values.ndim == 0: 287 # broadcast 288 values_n = np.inf 289 else: 290 values_n = len(values) 291 292 if k < 0: 293 n = min(M + k, N, values_n) 294 min_index = 0 295 max_index = n 296 else: 297 n = min(M, N - k, values_n) 298 min_index = k 299 max_index = k + n 300 301 if values.ndim != 0: 302 # allow also longer sequences 303 values = values[:n] 304 305 data_rows, data_cols = self.data.shape 306 if k in self.offsets: 307 if max_index > data_cols: 308 data = np.zeros((data_rows, max_index), dtype=self.data.dtype) 309 data[:, :data_cols] = self.data 310 self.data = data 311 self.data[self.offsets == k, min_index:max_index] = values 312 else: 313 self.offsets = np.append(self.offsets, self.offsets.dtype.type(k)) 314 m = max(max_index, data_cols) 315 data = np.zeros((data_rows + 1, m), dtype=self.data.dtype) 316 data[:-1, :data_cols] = self.data 317 data[-1, min_index:max_index] = values 318 self.data = data 319 320 def todia(self, copy=False): 321 if copy: 322 return self.copy() 323 else: 324 return self 325 326 todia.__doc__ = spmatrix.todia.__doc__ 327 328 def transpose(self, axes=None, copy=False): 329 if axes is not None: 330 raise ValueError(("Sparse matrices do not support " 331 "an 'axes' parameter because swapping " 332 "dimensions is the only logical permutation.")) 333 334 num_rows, num_cols = self.shape 335 max_dim = max(self.shape) 336 337 # flip diagonal offsets 338 offsets = -self.offsets 339 340 # re-align the data matrix 341 r = np.arange(len(offsets), dtype=np.intc)[:, None] 342 c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None] 343 pad_amount = max(0, max_dim-self.data.shape[1]) 344 data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount), 345 dtype=self.data.dtype))) 346 data = data[r, c] 347 return dia_matrix((data, offsets), shape=( 348 num_cols, num_rows), copy=copy) 349 350 transpose.__doc__ = spmatrix.transpose.__doc__ 351 352 def diagonal(self, k=0): 353 rows, cols = self.shape 354 if k <= -rows or k >= cols: 355 return np.empty(0, dtype=self.data.dtype) 356 idx, = np.nonzero(self.offsets == k) 357 first_col = max(0, k) 358 last_col = min(rows + k, cols) 359 result_size = last_col - first_col 360 if idx.size == 0: 361 return np.zeros(result_size, dtype=self.data.dtype) 362 result = self.data[idx[0], first_col:last_col] 363 padding = result_size - len(result) 364 if padding > 0: 365 result = np.pad(result, (0, padding), mode='constant') 366 return result 367 368 diagonal.__doc__ = spmatrix.diagonal.__doc__ 369 370 def tocsc(self, copy=False): 371 from .csc import csc_matrix 372 if self.nnz == 0: 373 return csc_matrix(self.shape, dtype=self.dtype) 374 375 num_rows, num_cols = self.shape 376 num_offsets, offset_len = self.data.shape 377 offset_inds = np.arange(offset_len) 378 379 row = offset_inds - self.offsets[:,None] 380 mask = (row >= 0) 381 mask &= (row < num_rows) 382 mask &= (offset_inds < num_cols) 383 mask &= (self.data != 0) 384 385 idx_dtype = get_index_dtype(maxval=max(self.shape)) 386 indptr = np.zeros(num_cols + 1, dtype=idx_dtype) 387 indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)) 388 indptr[offset_len+1:] = indptr[offset_len] 389 indices = row.T[mask.T].astype(idx_dtype, copy=False) 390 data = self.data.T[mask.T] 391 return csc_matrix((data, indices, indptr), shape=self.shape, 392 dtype=self.dtype) 393 394 tocsc.__doc__ = spmatrix.tocsc.__doc__ 395 396 def tocoo(self, copy=False): 397 num_rows, num_cols = self.shape 398 num_offsets, offset_len = self.data.shape 399 offset_inds = np.arange(offset_len) 400 401 row = offset_inds - self.offsets[:,None] 402 mask = (row >= 0) 403 mask &= (row < num_rows) 404 mask &= (offset_inds < num_cols) 405 mask &= (self.data != 0) 406 row = row[mask] 407 col = np.tile(offset_inds, num_offsets)[mask.ravel()] 408 data = self.data[mask] 409 410 from .coo import coo_matrix 411 A = coo_matrix((data,(row,col)), shape=self.shape, dtype=self.dtype) 412 A.has_canonical_format = True 413 return A 414 415 tocoo.__doc__ = spmatrix.tocoo.__doc__ 416 417 # needed by _data_matrix 418 def _with_data(self, data, copy=True): 419 """Returns a matrix with the same sparsity structure as self, 420 but with different data. By default the structure arrays are copied. 421 """ 422 if copy: 423 return dia_matrix((data, self.offsets.copy()), shape=self.shape) 424 else: 425 return dia_matrix((data,self.offsets), shape=self.shape) 426 427 def resize(self, *shape): 428 shape = check_shape(shape) 429 M, N = shape 430 # we do not need to handle the case of expanding N 431 self.data = self.data[:, :N] 432 433 if (M > self.shape[0] and 434 np.any(self.offsets + self.shape[0] < self.data.shape[1])): 435 # explicitly clear values that were previously hidden 436 mask = (self.offsets[:, None] + self.shape[0] <= 437 np.arange(self.data.shape[1])) 438 self.data[mask] = 0 439 440 self._shape = shape 441 442 resize.__doc__ = spmatrix.resize.__doc__ 443 444 445def isspmatrix_dia(x): 446 """Is x of dia_matrix type? 447 448 Parameters 449 ---------- 450 x 451 object to check for being a dia matrix 452 453 Returns 454 ------- 455 bool 456 True if x is a dia matrix, False otherwise 457 458 Examples 459 -------- 460 >>> from scipy.sparse import dia_matrix, isspmatrix_dia 461 >>> isspmatrix_dia(dia_matrix([[5]])) 462 True 463 464 >>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia 465 >>> isspmatrix_dia(csr_matrix([[5]])) 466 False 467 """ 468 return isinstance(x, dia_matrix) 469