1function stats = ssstats (A, kind, skip_chol, skip_dmperm, Z)
2%SSSTATS compute matrix statistics for the SuiteSparse Matrix Collection
3% Example:
4%   stats = ssstats (A, kind, skip_chol, skip_dmperm, Z)
5%
6% A: a sparse matrix
7% kind: a string with the Problem.kind
8% Z: empty, or a sparse matrix the same size as A.  Only used for
9%   pattern_symmetry, nzero, and bandwidth statistics, described below.
10%
11% Requires amd, cholmod, RBio, and CSparse.  Computes the following
12% statistics, returning them as fields in the stats struct:
13%
14%   nrows               number of rows
15%   ncols               number of columns
16%   nnz                 number of entries in A
17%   RBtype              Rutherford/Boeing type
18%   isBinary            1 if binary, 0 otherwise
19%   isReal              1 if real, 0 if complex
20%   cholcand            1 if a candidate for sparse Cholesky, 0 otherwise
21%   numerical_symmetry  numeric symmetry (0 to 1, where 1=symmetric)
22%   pattern_symmetry    pattern symmetry (0 to 1, where 1=symmetric)
23%   nnzdiag             nnz (diag (A)) if A is square, 0 otherwise
24%   nzero               nnz (Z)
25%   nentries            nnz (A) + nnz (Z)
26%   amd_lnz             nnz(L) for chol(C(p,p)) where, C=A+A', p=amd(C)
27%   amd_flops           flop count for chol(C(p,p)) where, C=A+A', p=amd(C)
28%   amd_vnz             nnz in Householder vectors for qr(A(:,colamd(A)))
29%   amd_rnz             nnz in R for qr(A(:,colamd(A)))
30%   nblocks             # of blocks from dmperm
31%   sprank              sprank(A)
32%   ncc                 # of strongly connected components
33%   posdef              1 if positive definite, 0 otherwise
34%   isND                1 if a 2D/3D problem, 0 otherwise
35%   isGraph             1 if a graph, 0 otherwise
36%   lowerbandwidth      lower bandwidth, [i j]=find(A), max(0,max(i-j))
37%   upperbandwidth      upper bandwidth, [i j]=find(A), max(0,max(j-i))
38%   rcm_lowerbandwidth  lower bandwidth after symrcm
39%   rcm_upperbandwidth  upper bandwidth after symrcm
40%   xmin            smallest nonzero value
41%   xmax            largest nonzero value
42%
43% amd_lnz and amd_flops are not computed for rectangular matrices.
44%
45% Ordering statistics are not computed for graphs (amd_*), since they are not
46% linear systems.  For directed or undirected graphs (square matrices that
47% represent graph problems), the diagonal is typically not present, but it is
48% implicitly present.  Thus, sprank(A) is always equal to the # of rows, and
49% nblocks is the same as ncc, for these problems.  stats.sprank and
50% stats.nblocks are left as -2.
51%
52% The bandwidth statistics include the Z matrix.  For rectangular matrices,
53% symrcm is not applicable, and the rcm_lowerbandwidth and rcm_upperbandwidth
54% statistics are the same as the unpermuted versions, lowerbandwidth and
55% upperbandwidth, respectively.
56%
57% If a statistic is not computed, it is set to -2.  If an attempt to compute
58% the statistic was made but failed, it is set to -1.
59%
60% See also ssget, ssindex, RBtype, amd, colamd, cs_scc, cs_sqr, dmperm,
61% cholmod2, symrcm
62
63% Copyright 2006-2014, Timothy A. Davis
64
65% Requires the SuiteSparse set of packages: CHOLMOD, RBio, CSparse
66
67%-------------------------------------------------------------------------------
68% ensure the matrix is sparse
69%-------------------------------------------------------------------------------
70
71if (~issparse (A))
72    A = sparse (A) ;
73end
74[m n] = size (A) ;
75
76uncomputed = -2 ;
77failure = -1 ;
78
79if (nargin < 5)
80    Z = sparse (m,n) ;
81    AZ = A ;
82else
83    AZ = A + Z ;
84    if (nnz (AZ) ~= nnz (A) + nnz (Z))
85        error ('A and Z overlap!')
86    end
87end
88
89if (nargin < 4)
90    skip_dmperm = 0 ;
91end
92if (nargin < 3)
93    skip_chol = 0 ;
94end
95if (nargin < 2)
96    kind = '' ;
97end
98
99%-------------------------------------------------------------------------------
100% basic stats
101%-------------------------------------------------------------------------------
102
103tic ;
104stats.nrows = m ;
105stats.ncols = n ;
106stats.nnz = nnz (A) ;
107stats.RBtype = RBtype (AZ) ;                    % Rutherford/Boeing type
108stats.isBinary = (stats.RBtype (1) == 'p') ;
109stats.isReal = (stats.RBtype (1) ~= 'c') ;
110
111fprintf ('RBtype: %s time: %g\n', stats.RBtype, toc) ;
112
113%-------------------------------------------------------------------------------
114% symmetry and Cholesky candidacy
115%-------------------------------------------------------------------------------
116
117% get the symmetry
118tic ;
119[s xmatched pmatched nzoffdiag nnzdiag] = spsym (A) ;
120if (m ~= n)
121    stats.numerical_symmetry = 0 ;
122    stats.pattern_symmetry = 0 ;
123elseif (nzoffdiag > 0)
124    stats.numerical_symmetry = xmatched / nzoffdiag ;
125    stats.pattern_symmetry = pmatched / nzoffdiag ;
126else
127    stats.numerical_symmetry = 1 ;
128    stats.pattern_symmetry = 1 ;
129end
130psym_A = stats.pattern_symmetry ;   % symmetry of the pattern of A (excluding Z)
131stats.nnzdiag = nnzdiag ;
132stats.cholcand = (s >= 6) ;         % check if Cholesky candidate
133stats.nzero = nnz (Z) ;
134stats.nentries = stats.nnz + stats.nzero ;
135
136fprintf ('cholcand: %d\n', stats.cholcand) ;
137fprintf ('numerical_symmetry: %g pattern_symmetry: %g time: %g\n', ...
138    stats.numerical_symmetry, stats.pattern_symmetry, toc) ;
139
140% recompute the pattern symmetry with Z included
141tic ;
142if (m == n && stats.nzero > 0)
143    [s xmatched pmatched nzoffdiag] = spsym (AZ) ;
144    if (nzoffdiag > 0)
145        stats.pattern_symmetry = pmatched / nzoffdiag ;
146    else
147        stats.pattern_symmetry = 1 ;
148    end
149end
150fprintf ('stats with A+Zeros:\n') ;
151fprintf ('numerical_symmetry: %g pattern_symmetry: %g time: %g\n', ...
152    stats.numerical_symmetry, stats.pattern_symmetry, toc) ;
153
154%-------------------------------------------------------------------------------
155% bandwidth (includes explicit zeros)
156%-------------------------------------------------------------------------------
157
158[i j] = find (AZ) ;
159stats.lowerbandwidth = max (0, max (i-j)) ;
160stats.upperbandwidth = max (0, max (j-i)) ;
161clear i j
162fprintf ('lo %d up %d ', ...
163    stats.lowerbandwidth, stats.upperbandwidth) ;
164% now with symrcm, if the matrix is square
165stats.rcm_lowerbandwidth = stats.lowerbandwidth ;
166stats.rcm_upperbandwidth = stats.upperbandwidth ;
167if (m == n)
168    try
169        p = symrcm (AZ) ;
170        [i j] = find (AZ (p,p)) ;
171        stats.rcm_lowerbandwidth = max (0, max (i-j)) ;
172        stats.rcm_upperbandwidth = max (0, max (j-i)) ;
173    catch
174        fprintf ('================ symrcm failed ') ;
175        stats.rcm_lowerbandwidth = failure ;
176        stats.rcm_upperbandwidth = failure ;
177    end
178    fprintf ('rcm: lo %d up %d', ...
179        stats.rcm_lowerbandwidth, stats.rcm_upperbandwidth) ;
180end
181fprintf ('\n') ;
182clear AZ i j p
183
184%-------------------------------------------------------------------------------
185% isND
186%-------------------------------------------------------------------------------
187
188s = 0 ;
189if (~isempty (strfind (kind, 'structural')))
190    s = 1 ;
191elseif (~isempty (strfind (kind, 'fluid')))
192    s = 1 ;
193elseif (~isempty (strfind (kind, '2D')))
194    s = 1 ;
195elseif (~isempty (strfind (kind, 'reduction')))
196    s = 1 ;
197elseif (~isempty (strfind (kind, 'electromagnetics')))
198    s = 1 ;
199elseif (~isempty (strfind (kind, 'semiconductor')))
200    s = 1 ;
201elseif (~isempty (strfind (kind, 'thermal')))
202    s = 1 ;
203elseif (~isempty (strfind (kind, 'materials')))
204    s = 1 ;
205elseif (~isempty (strfind (kind, 'acoustics')))
206    s = 1 ;
207elseif (~isempty (strfind (kind, 'vision')))
208    s = 1 ;
209elseif (~isempty (strfind (kind, 'robotics')))
210    s = 1 ;
211end
212stats.isND = s ;
213
214fprintf ('isND %d\n', stats.isND) ;
215
216%-------------------------------------------------------------------------------
217% determine if this is a graph (directed, undirected, or bipartite)
218%-------------------------------------------------------------------------------
219
220if (~isempty (strfind (kind, 'graph')) && isempty (strfind (kind, 'graphics')))
221    % this is a directed, undirected, or bipartite graph.
222    % it might also be a multigraph, and weighted or unweighted.
223    stats.isGraph = 1 ;
224else
225    stats.isGraph = 0 ;
226end
227
228fprintf ('isGraph %d\n', stats.isGraph) ;
229
230%-------------------------------------------------------------------------------
231% determine if positive definite
232%-------------------------------------------------------------------------------
233
234fprintf ('start Cholesky\n') ;
235tic ;
236if (~stats.cholcand)
237
238    % not a candidate for Cholesky, so it cannot be positive definite
239    fprintf ('not a Cholesky candidate\n') ;
240    stats.posdef = 0 ;
241
242elseif (stats.isBinary)
243
244    % For all symmetric binary matrices:  only identity matrices are positive
245    % definite.  All others are indefinite.  Since at this point, A is a
246    % Cholesky candidate, and thus we know that A is symmetric with a zero-free
247    % diagonal.  So just a quick check of nnz(A) is needed.
248    % See: McKay et al, "Acyclic Digraphs and Eigenvalues of (0,1)-Matrices",
249    % Journal of Integer Sequences, Vol. 7 (2004), Article 04.3.3.
250    % http://www.cs.uwaterloo.ca/journals/JIS/VOL7/Sloane/sloane15.html
251
252    stats.posdef = (stats.nnz == stats.nrows) ;
253
254elseif (skip_chol)
255
256    % Cholesky was skipped
257    fprintf ('skip Cholesky\n') ;
258    stats.posdef = uncomputed ;
259
260else
261
262    % try chol
263    try
264        [x, cstats] = cholmod2 (A, ones (stats.ncols,1)) ;
265        rcond = cstats (1) ;
266        fprintf ('rcond: %g\n', rcond) ;
267        stats.posdef = (rcond > 0) ;
268    catch
269        % chol failed
270        disp (lasterr) ;
271        fprintf ('sparse Cholesky failed\n') ;
272        stats.posdef = failure ;
273    end
274    clear x cstats
275end
276
277fprintf ('posdef: %d time: %g\n', stats.posdef, toc) ;
278
279%-------------------------------------------------------------------------------
280% transpose A if m < n, for ordering methods
281%-------------------------------------------------------------------------------
282
283tic ;
284if (m < n)
285    try
286        A = A' ;            % A is now tall and thin, or square
287    catch
288        disp (lasterr) ;
289        fprintf ('transpose failed...\n') ;
290        return ;
291    end
292    [m n] = size (A) ;
293end
294if (~isreal (A))
295    try
296        A = spones (A) ;
297    catch
298        disp (lasterr) ;
299        fprintf ('conversion from complex failed...\n') ;
300        return ;
301    end
302end
303fprintf ('computed A transpose if needed, time: %g\n', toc) ;
304
305%-------------------------------------------------------------------------------
306% order entire matrix with AMD, if square
307%-------------------------------------------------------------------------------
308
309if (m == n && ~stats.isGraph)
310
311    tic ;
312    try
313        if (psym_A < 1)
314            C = A|A' ;      % A has unsymmetric pattern, so symmetrize it
315        else
316            C = A ;         % A already has symmetric pattern
317        end
318    catch
319        disp (lasterr) ;
320        fprintf ('A+A'' failed\n') ;
321    end
322    fprintf ('computed A+A'', time: %g\nstart AMD\n', toc) ;
323
324    tic ;
325    try
326        p = amd (C) ;
327        c = symbfact (C (p,p)) ;
328        stats.amd_lnz = sum (c) ;           % nnz (chol (C))
329        stats.amd_flops = sum (c.^2) ;      % flop counts for chol (C)
330    catch
331        disp (lasterr) ;
332        fprintf ('amd failed\n') ;
333        stats.amd_lnz = failure ;
334        stats.amd_flops = failure ;
335    end
336    clear p c C
337    fprintf ('AMD lnz %d flops %g time: %g\n', ...
338        stats.amd_lnz, stats.amd_flops, toc) ;
339
340else
341
342    % not computed if rectangular, or for graph problems
343    stats.amd_lnz = uncomputed ;
344    stats.amd_flops = uncomputed ;
345    fprintf ('AMD skipped\n') ;
346
347end
348
349%-------------------------------------------------------------------------------
350% order entire matrix with COLAMD, for LU bounds
351%-------------------------------------------------------------------------------
352
353if (~stats.isGraph)
354
355    fprintf ('start colamd:\n') ;
356    tic ;
357    try
358        q = colamd (A) ;
359        [vnz,rnz] = cs_sqr (A (:,q)) ;
360        stats.amd_rnz = rnz ;   % nnz (V), upper bound on L, for A(:,colamd(A))
361        stats.amd_vnz = vnz ;   % nnz (R), upper bound on U, for A(:,colamd(A))
362    catch
363        disp (lasterr) ;
364        fprintf ('colamd2 and cs_sqr failed\n') ;
365        stats.amd_vnz = failure ;
366        stats.amd_rnz = failure ;
367    end
368    clear q
369    fprintf ('COLAMD rnz %d vnz %d time: %g\n', ...
370        stats.amd_rnz, stats.amd_vnz, toc) ;
371
372else
373
374    % not computed for graph problems
375    stats.amd_rnz = uncomputed ;
376    stats.amd_vnz = uncomputed ;
377    fprintf ('COLAMD skipped\n') ;
378
379end
380
381%-------------------------------------------------------------------------------
382% strongly connected components
383%-------------------------------------------------------------------------------
384
385tic ;
386fprintf ('start scc:\n') ;
387try
388    % find the # of strongly connected components of the graph of a square A,
389    % or # of connected components of the bipartite graph of a rectangular A.
390    if (m == n)
391        [p r] = cs_scc (A) ;
392    else
393        [p r] = cs_scc (spaugment (A)) ;
394    end
395    stats.ncc = length (r) - 1 ;
396    clear p r
397catch
398    disp (lasterr) ;
399    fprintf ('cs_scc failed\n') ;
400    stats.ncc = failure ;
401end
402fprintf ('scc %d, time: %g\n', stats.ncc, toc) ;
403
404%-------------------------------------------------------------------------------
405% Dulmage-Mendelsohn permutation, and order each block
406%-------------------------------------------------------------------------------
407
408tic ;
409if (m == n && stats.isGraph && isempty (strfind (kind, 'bipartite')))
410    % for directed and undirected graphs (square matrices), the diagonal is
411    % implicitly present.  Thus, nblocks is the same as ncc, and the graph has
412    % full sprank.  dmperm *is* computed for bipartite graphs, however.
413    skip_dmperm = 1 ;
414end
415
416if (skip_dmperm)
417
418    fprintf ('skip dmperm\n') ;
419    stats.nblocks = uncomputed ;
420    stats.sprank = uncomputed ;
421
422else
423
424    try
425        % find the Dulmage-Mendelsohn decomposition
426        fprintf ('start dmperm:\n') ;
427        % [p,q,r,s,cc,rr] = cs_dmperm (A) ;
428        [p,q,r,s,cc,rr] = dmperm (A) ;
429        nblocks = length (r) - 1 ;
430        stats.nblocks = nblocks ;   % # of blocks in block-triangular form
431        stats.sprank = rr(4)-1 ;    % structural rank
432    catch
433        disp (lasterr) ;
434        fprintf ('dmperm failed\n') ;
435        stats.nblocks = failure ;
436        stats.sprank = failure ;
437    end
438end
439
440fprintf ('nblocks %d\n', stats.nblocks) ;
441fprintf ('sprank %d, time: %g\n', stats.sprank, toc) ;
442
443%-------------------------------------------------------------------------------
444% xmin and xmax
445%-------------------------------------------------------------------------------
446
447[i,j,x] = find (A) ;
448stats.xmin = min (x) ;
449stats.xmax = max (x) ;
450fprintf ('xmin %32.16g xmax %32.16g\n', stats.xmin, stats.xmax) ;
451if (stats.xmin == 0 || stats.xmax == 0)
452    error ('explicit zeros in the matrix!') ;
453end
454
455%-------------------------------------------------------------------------------
456
457fprintf ('ssstats done\n') ;
458