1"""Sparse DIAgonal format"""
2
3__docformat__ = "restructuredtext en"
4
5__all__ = ['dia_matrix', 'isspmatrix_dia']
6
7import numpy as np
8
9from .base import isspmatrix, _formats, spmatrix
10from .data import _data_matrix
11from .sputils import (isshape, upcast_char, getdtype, get_index_dtype,
12                      get_sum_dtype, validateaxis, check_shape, matrix)
13from ._sparsetools import dia_matvec
14
15
16class dia_matrix(_data_matrix):
17    """Sparse matrix with DIAgonal storage
18
19    This can be instantiated in several ways:
20        dia_matrix(D)
21            with a dense matrix
22
23        dia_matrix(S)
24            with another sparse matrix S (equivalent to S.todia())
25
26        dia_matrix((M, N), [dtype])
27            to construct an empty matrix with shape (M, N),
28            dtype is optional, defaulting to dtype='d'.
29
30        dia_matrix((data, offsets), shape=(M, N))
31            where the ``data[k,:]`` stores the diagonal entries for
32            diagonal ``offsets[k]`` (See example below)
33
34    Attributes
35    ----------
36    dtype : dtype
37        Data type of the matrix
38    shape : 2-tuple
39        Shape of the matrix
40    ndim : int
41        Number of dimensions (this is always 2)
42    nnz
43        Number of stored values, including explicit zeros
44    data
45        DIA format data array of the matrix
46    offsets
47        DIA format offset array of the matrix
48
49    Notes
50    -----
51
52    Sparse matrices can be used in arithmetic operations: they support
53    addition, subtraction, multiplication, division, and matrix power.
54
55    Examples
56    --------
57
58    >>> import numpy as np
59    >>> from scipy.sparse import dia_matrix
60    >>> dia_matrix((3, 4), dtype=np.int8).toarray()
61    array([[0, 0, 0, 0],
62           [0, 0, 0, 0],
63           [0, 0, 0, 0]], dtype=int8)
64
65    >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
66    >>> offsets = np.array([0, -1, 2])
67    >>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
68    array([[1, 0, 3, 0],
69           [1, 2, 0, 4],
70           [0, 2, 3, 0],
71           [0, 0, 3, 4]])
72
73    >>> from scipy.sparse import dia_matrix
74    >>> n = 10
75    >>> ex = np.ones(n)
76    >>> data = np.array([ex, 2 * ex, ex])
77    >>> offsets = np.array([-1, 0, 1])
78    >>> dia_matrix((data, offsets), shape=(n, n)).toarray()
79    array([[2., 1., 0., ..., 0., 0., 0.],
80           [1., 2., 1., ..., 0., 0., 0.],
81           [0., 1., 2., ..., 0., 0., 0.],
82           ...,
83           [0., 0., 0., ..., 2., 1., 0.],
84           [0., 0., 0., ..., 1., 2., 1.],
85           [0., 0., 0., ..., 0., 1., 2.]])
86    """
87    format = 'dia'
88
89    def __init__(self, arg1, shape=None, dtype=None, copy=False):
90        _data_matrix.__init__(self)
91
92        if isspmatrix_dia(arg1):
93            if copy:
94                arg1 = arg1.copy()
95            self.data = arg1.data
96            self.offsets = arg1.offsets
97            self._shape = check_shape(arg1.shape)
98        elif isspmatrix(arg1):
99            if isspmatrix_dia(arg1) and copy:
100                A = arg1.copy()
101            else:
102                A = arg1.todia()
103            self.data = A.data
104            self.offsets = A.offsets
105            self._shape = check_shape(A.shape)
106        elif isinstance(arg1, tuple):
107            if isshape(arg1):
108                # It's a tuple of matrix dimensions (M, N)
109                # create empty matrix
110                self._shape = check_shape(arg1)
111                self.data = np.zeros((0,0), getdtype(dtype, default=float))
112                idx_dtype = get_index_dtype(maxval=max(self.shape))
113                self.offsets = np.zeros((0), dtype=idx_dtype)
114            else:
115                try:
116                    # Try interpreting it as (data, offsets)
117                    data, offsets = arg1
118                except Exception as e:
119                    raise ValueError('unrecognized form for dia_matrix constructor') from e
120                else:
121                    if shape is None:
122                        raise ValueError('expected a shape argument')
123                    self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
124                    self.offsets = np.atleast_1d(np.array(arg1[1],
125                                                          dtype=get_index_dtype(maxval=max(shape)),
126                                                          copy=copy))
127                    self._shape = check_shape(shape)
128        else:
129            #must be dense, convert to COO first, then to DIA
130            try:
131                arg1 = np.asarray(arg1)
132            except Exception as e:
133                raise ValueError("unrecognized form for"
134                        " %s_matrix constructor" % self.format) from e
135            from .coo import coo_matrix
136            A = coo_matrix(arg1, dtype=dtype, shape=shape).todia()
137            self.data = A.data
138            self.offsets = A.offsets
139            self._shape = check_shape(A.shape)
140
141        if dtype is not None:
142            self.data = self.data.astype(dtype)
143
144        #check format
145        if self.offsets.ndim != 1:
146            raise ValueError('offsets array must have rank 1')
147
148        if self.data.ndim != 2:
149            raise ValueError('data array must have rank 2')
150
151        if self.data.shape[0] != len(self.offsets):
152            raise ValueError('number of diagonals (%d) '
153                    'does not match the number of offsets (%d)'
154                    % (self.data.shape[0], len(self.offsets)))
155
156        if len(np.unique(self.offsets)) != len(self.offsets):
157            raise ValueError('offset array contains duplicate values')
158
159    def __repr__(self):
160        format = _formats[self.getformat()][1]
161        return "<%dx%d sparse matrix of type '%s'\n" \
162               "\twith %d stored elements (%d diagonals) in %s format>" % \
163               (self.shape + (self.dtype.type, self.nnz, self.data.shape[0],
164                              format))
165
166    def _data_mask(self):
167        """Returns a mask of the same shape as self.data, where
168        mask[i,j] is True when data[i,j] corresponds to a stored element."""
169        num_rows, num_cols = self.shape
170        offset_inds = np.arange(self.data.shape[1])
171        row = offset_inds - self.offsets[:,None]
172        mask = (row >= 0)
173        mask &= (row < num_rows)
174        mask &= (offset_inds < num_cols)
175        return mask
176
177    def count_nonzero(self):
178        mask = self._data_mask()
179        return np.count_nonzero(self.data[mask])
180
181    def getnnz(self, axis=None):
182        if axis is not None:
183            raise NotImplementedError("getnnz over an axis is not implemented "
184                                      "for DIA format")
185        M,N = self.shape
186        nnz = 0
187        for k in self.offsets:
188            if k > 0:
189                nnz += min(M,N-k)
190            else:
191                nnz += min(M+k,N)
192        return int(nnz)
193
194    getnnz.__doc__ = spmatrix.getnnz.__doc__
195    count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
196
197    def sum(self, axis=None, dtype=None, out=None):
198        validateaxis(axis)
199
200        if axis is not None and axis < 0:
201            axis += 2
202
203        res_dtype = get_sum_dtype(self.dtype)
204        num_rows, num_cols = self.shape
205        ret = None
206
207        if axis == 0:
208            mask = self._data_mask()
209            x = (self.data * mask).sum(axis=0)
210            if x.shape[0] == num_cols:
211                res = x
212            else:
213                res = np.zeros(num_cols, dtype=x.dtype)
214                res[:x.shape[0]] = x
215            ret = matrix(res, dtype=res_dtype)
216
217        else:
218            row_sums = np.zeros(num_rows, dtype=res_dtype)
219            one = np.ones(num_cols, dtype=res_dtype)
220            dia_matvec(num_rows, num_cols, len(self.offsets),
221                       self.data.shape[1], self.offsets, self.data, one, row_sums)
222
223            row_sums = matrix(row_sums)
224
225            if axis is None:
226                return row_sums.sum(dtype=dtype, out=out)
227
228            if axis is not None:
229                row_sums = row_sums.T
230
231            ret = matrix(row_sums.sum(axis=axis))
232
233        if out is not None and out.shape != ret.shape:
234            raise ValueError("dimensions do not match")
235
236        return ret.sum(axis=(), dtype=dtype, out=out)
237
238    sum.__doc__ = spmatrix.sum.__doc__
239
240    def _add_sparse(self, other):
241
242        # Check if other is also of type dia_matrix
243        if not isinstance(other, type(self)):
244            # If other is not of type dia_matrix, default to
245            # converting to csr_matrix, as is done in the _add_sparse
246            # method of parent class spmatrix
247            return self.tocsr()._add_sparse(other)
248
249        # The task is to compute m = self + other
250        # Start by making a copy of self, of the datatype
251        # that should result from adding self and other
252        dtype = np.promote_types(self.dtype, other.dtype)
253        m = self.astype(dtype, copy=True)
254
255        # Then, add all the stored diagonals of other.
256        for d in other.offsets:
257            # Check if the diagonal has already been added.
258            if d in m.offsets:
259                # If the diagonal is already there, we need to take
260                # the sum of the existing and the new
261                m.setdiag(m.diagonal(d) + other.diagonal(d), d)
262            else:
263                m.setdiag(other.diagonal(d), d)
264        return m
265
266    def _mul_vector(self, other):
267        x = other
268
269        y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
270                                                       x.dtype.char))
271
272        L = self.data.shape[1]
273
274        M,N = self.shape
275
276        dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel())
277
278        return y
279
280    def _mul_multimatrix(self, other):
281        return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T])
282
283    def _setdiag(self, values, k=0):
284        M, N = self.shape
285
286        if values.ndim == 0:
287            # broadcast
288            values_n = np.inf
289        else:
290            values_n = len(values)
291
292        if k < 0:
293            n = min(M + k, N, values_n)
294            min_index = 0
295            max_index = n
296        else:
297            n = min(M, N - k, values_n)
298            min_index = k
299            max_index = k + n
300
301        if values.ndim != 0:
302            # allow also longer sequences
303            values = values[:n]
304
305        data_rows, data_cols = self.data.shape
306        if k in self.offsets:
307            if max_index > data_cols:
308                data = np.zeros((data_rows, max_index), dtype=self.data.dtype)
309                data[:, :data_cols] = self.data
310                self.data = data
311            self.data[self.offsets == k, min_index:max_index] = values
312        else:
313            self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
314            m = max(max_index, data_cols)
315            data = np.zeros((data_rows + 1, m), dtype=self.data.dtype)
316            data[:-1, :data_cols] = self.data
317            data[-1, min_index:max_index] = values
318            self.data = data
319
320    def todia(self, copy=False):
321        if copy:
322            return self.copy()
323        else:
324            return self
325
326    todia.__doc__ = spmatrix.todia.__doc__
327
328    def transpose(self, axes=None, copy=False):
329        if axes is not None:
330            raise ValueError(("Sparse matrices do not support "
331                              "an 'axes' parameter because swapping "
332                              "dimensions is the only logical permutation."))
333
334        num_rows, num_cols = self.shape
335        max_dim = max(self.shape)
336
337        # flip diagonal offsets
338        offsets = -self.offsets
339
340        # re-align the data matrix
341        r = np.arange(len(offsets), dtype=np.intc)[:, None]
342        c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
343        pad_amount = max(0, max_dim-self.data.shape[1])
344        data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
345                                              dtype=self.data.dtype)))
346        data = data[r, c]
347        return dia_matrix((data, offsets), shape=(
348            num_cols, num_rows), copy=copy)
349
350    transpose.__doc__ = spmatrix.transpose.__doc__
351
352    def diagonal(self, k=0):
353        rows, cols = self.shape
354        if k <= -rows or k >= cols:
355            return np.empty(0, dtype=self.data.dtype)
356        idx, = np.nonzero(self.offsets == k)
357        first_col = max(0, k)
358        last_col = min(rows + k, cols)
359        result_size = last_col - first_col
360        if idx.size == 0:
361            return np.zeros(result_size, dtype=self.data.dtype)
362        result = self.data[idx[0], first_col:last_col]
363        padding = result_size - len(result)
364        if padding > 0:
365            result = np.pad(result, (0, padding), mode='constant')
366        return result
367
368    diagonal.__doc__ = spmatrix.diagonal.__doc__
369
370    def tocsc(self, copy=False):
371        from .csc import csc_matrix
372        if self.nnz == 0:
373            return csc_matrix(self.shape, dtype=self.dtype)
374
375        num_rows, num_cols = self.shape
376        num_offsets, offset_len = self.data.shape
377        offset_inds = np.arange(offset_len)
378
379        row = offset_inds - self.offsets[:,None]
380        mask = (row >= 0)
381        mask &= (row < num_rows)
382        mask &= (offset_inds < num_cols)
383        mask &= (self.data != 0)
384
385        idx_dtype = get_index_dtype(maxval=max(self.shape))
386        indptr = np.zeros(num_cols + 1, dtype=idx_dtype)
387        indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0))
388        indptr[offset_len+1:] = indptr[offset_len]
389        indices = row.T[mask.T].astype(idx_dtype, copy=False)
390        data = self.data.T[mask.T]
391        return csc_matrix((data, indices, indptr), shape=self.shape,
392                          dtype=self.dtype)
393
394    tocsc.__doc__ = spmatrix.tocsc.__doc__
395
396    def tocoo(self, copy=False):
397        num_rows, num_cols = self.shape
398        num_offsets, offset_len = self.data.shape
399        offset_inds = np.arange(offset_len)
400
401        row = offset_inds - self.offsets[:,None]
402        mask = (row >= 0)
403        mask &= (row < num_rows)
404        mask &= (offset_inds < num_cols)
405        mask &= (self.data != 0)
406        row = row[mask]
407        col = np.tile(offset_inds, num_offsets)[mask.ravel()]
408        data = self.data[mask]
409
410        from .coo import coo_matrix
411        A = coo_matrix((data,(row,col)), shape=self.shape, dtype=self.dtype)
412        A.has_canonical_format = True
413        return A
414
415    tocoo.__doc__ = spmatrix.tocoo.__doc__
416
417    # needed by _data_matrix
418    def _with_data(self, data, copy=True):
419        """Returns a matrix with the same sparsity structure as self,
420        but with different data.  By default the structure arrays are copied.
421        """
422        if copy:
423            return dia_matrix((data, self.offsets.copy()), shape=self.shape)
424        else:
425            return dia_matrix((data,self.offsets), shape=self.shape)
426
427    def resize(self, *shape):
428        shape = check_shape(shape)
429        M, N = shape
430        # we do not need to handle the case of expanding N
431        self.data = self.data[:, :N]
432
433        if (M > self.shape[0] and
434                np.any(self.offsets + self.shape[0] < self.data.shape[1])):
435            # explicitly clear values that were previously hidden
436            mask = (self.offsets[:, None] + self.shape[0] <=
437                    np.arange(self.data.shape[1]))
438            self.data[mask] = 0
439
440        self._shape = shape
441
442    resize.__doc__ = spmatrix.resize.__doc__
443
444
445def isspmatrix_dia(x):
446    """Is x of dia_matrix type?
447
448    Parameters
449    ----------
450    x
451        object to check for being a dia matrix
452
453    Returns
454    -------
455    bool
456        True if x is a dia matrix, False otherwise
457
458    Examples
459    --------
460    >>> from scipy.sparse import dia_matrix, isspmatrix_dia
461    >>> isspmatrix_dia(dia_matrix([[5]]))
462    True
463
464    >>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia
465    >>> isspmatrix_dia(csr_matrix([[5]]))
466    False
467    """
468    return isinstance(x, dia_matrix)
469