1function test71(f)
2%TEST71 performance comparison of triangle counting methods
3% Considers MATLAB:Sandia, GraphBLAS:Sandia, and GraphBLAS:Sandia2 only.
4%
5% Requires ssget and SuiteSparse/MATLAB_Tools/SSMULT.
6% To compile SSMULT in MATLAB, do:
7%
8%   cd SuiteSparse/MATLAB_Tools/SSMULT
9%   ssmult_install
10%
11% Then add the the path to your MATLAB path.
12% ssget is at http://sparse.tamu.edu
13%
14% This test saves its results in test71_results.mat, so it
15% can be restarted.  Results already obtained will be skipped.
16%
17% Can also run a list of matrices.  For example:
18% test71([936 2662])
19%
20% Edit ll_memory_limit and nz_limit to match the memory on your machine.
21
22% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
23% SPDX-License-Identifier: Apache-2.0
24
25[save save_chunk] = nthreads_get ;
26chunk = 4096 ;
27nthreads = feature ('numcores') ;
28nthreads_set (nthreads, chunk) ;
29
30L = sparse (1) ;
31try
32    ssmultsym (L,L) ;
33catch
34    here = pwd ;
35    cd ../../MATLAB_Tools/SSMULT
36    ssmult_install
37    cd (here) ;
38end
39
40% matrices are too big for some methods.  Edit memory sizes as needed.
41if (ismac || ispc)
42    % assume this is a laptop with limited memory
43    results = 'test71_results.mat' ;
44    ll_memory_limit = 12e9 ;       % 12 GB memory
45    nz_limit = 500e6 ;
46else
47    % assume this is a large server
48    results = 'test71_results_big.mat' ;
49    ll_memory_limit = 700e9 ;      % 700 GB memory
50    nz_limit = inf ;
51end
52
53index = ssget ;
54
55if (nargin == 0)
56    % get all square matrices and sort by nnz(A)
57    f = find (index.nrows == index.ncols) ;
58    [~, i] = sort (index.nnz (f)) ;
59    f = f (i) ;
60
61    Kokkos = [
62    2292 0.00441 79.9
63    2293 0.00502 72.5
64    2289 0.00580 70.0
65    2284 0.00390 108
66    2286 0.00611 76.8
67    2287 0.00630 80.1
68    2305 0.0754  30.7
69    2306 0.0177 133
70    2307 0.0184 132
71    2294 0.497 31.5
72    2285 0.733 58.5
73    1842 0.232 199
74    750 nan nan
75    1904 nan nan
76    2482 nan nan
77    916 nan nan
78    2276 nan nan
79    2662 nan nan
80    ] ;
81
82    f = Kokkos (:,1)' ;
83
84    % f = 1323
85    % f = f (1)
86    figure (1)
87end
88
89nmat = length (f) ;
90
91% try
92%     load (results) ;
93% catch
94    T = nan (nmat, 3) ;
95    Nedges = nan (nmat, 1) ;
96    Nnodes = nan (nmat, 1) ;
97    LLnz   = nan (nmat, 1) ;
98    LLmem  = nan (nmat, 1) ;
99    LLflops= nan (nmat, 1) ;
100    Ntri   = nan (nmat, 1) ;
101% end
102
103% merge with prior results
104if (nargin == 0)
105    try
106        old = load ('test71_results.mat') ;
107        ok = ~isnan (old.Nnodes) ;
108        Nnodes (ok) = old.Nnodes (ok) ;
109        Nedges (ok) = old.Nedges (ok) ;
110        ok = ~isnan (old.LLnz) ;
111        LLnz (ok) = old.LLnz (ok) ;
112        LLmem (ok) = old.LLmem (ok) ;
113        LLflops (ok) = old.LLflops (ok) ;
114        ok = ~isnan (Ntri) ;
115        Ntri (ok) = old.Ntri (ok) ;
116    catch
117    end
118end
119
120tstart = cputime ;
121x = sparse (0) ;
122
123for k = 1:nmat
124
125    % skip if already done
126    if (all (~isnan (T (k,:))))
127        continue ;
128    end
129
130    % get the problem
131    id = f (k) ;
132    if (index.nnz (id) > nz_limit)
133        fprintf ('\n%s/%s : too big\n', index.Group{id}, index.Name{id}) ;
134        continue ;
135    end
136    Prob = ssget (id, index) ;
137    A = Prob.A ;
138    name = Prob.name ;
139    clear Prob
140
141    % remove the diagonal and extract L and U
142    A = spones (A) ;
143    A = spones (A+A') ;
144    L = tril (A,-1) ;
145    U = triu (A,1) ;
146    n = size (A,1) ;
147    nz = nnz (L) ;
148    clear A
149
150    fprintf ('\nid %4d Matrix %s\n', id, name) ;
151    fprintf ('n %d edges %d\n', n, nz) ;
152    diary off
153    diary on
154
155    % do the work in unint32 inside GraphBLAS.  The mexFunction
156    % typecasts Lint and Uint from double (the sparse L and U)
157    % into uin32, when it creates GraphBLAS matrices.  This
158    % typecast time does not count against the run time of
159    % GraphBLAS, since outside of MATLAB the matrices would never
160    % be in double precision.
161    Lint.matrix = L ;
162    Lint.class = 'uint32' ;
163
164    Uint.matrix = U ;
165    Uint.class = 'uint32' ;
166
167    Nedges (k) = nz ;
168    Nnodes (k) = n ;
169
170    % count the triangles in MATLAB and in GraphBLAS
171
172    [nt1 t1] = GB_mex_tricount (3, x, x, Uint, x) ;      % C<L>=L*L
173    fprintf ('triangles: %d\n', nt1) ;
174    fprintf ('GraphBLAS outer product: %14.6f sec (rate %6.2f million/sec)', t1, 1e-6*nz/t1) ;
175    T (k,1) = t1 ;
176    Ntri (k) = nt1 ;
177
178    fprintf ('\n') ;
179
180    [nt2 t2] = GB_mex_tricount (5, x, x, Uint, Lint) ;  % C<U>=L'*U
181    fprintf ('GraphBLAS dot   product: %14.6f sec (rate %6.2f million/sec)', t2, 1e-6*nz/t2) ;
182    T (k,2) = t2 ;
183    assert (nt1 == nt2) ;
184
185    fprintf ('\n') ;
186    clear Uint Lint
187
188    % get llnz, flops, and memory for L*L
189    if (isnan (LLnz (k)))
190        llsymbolic = ssmultsym (L, L) ;
191        LLnz (k) = llsymbolic.nz ;
192        LLmem (k) = llsymbolic.memory ;
193        LLflops (k) = llsymbolic.flops ;
194    end
195
196    fprintf ('nnz(L*L) %g flops %g memory %g (GB)\n', ...
197        LLnz (k), LLflops (k), LLmem (k) / 1e9) ;
198
199    clear L Lint Uint
200
201    % MATLAB, but only do it once
202    ok = true ;
203    t3 = T (k,3) ;
204    if (isnan (t3))
205        t3 = inf ;
206        try
207            if (LLmem (k) < ll_memory_limit)
208                tic
209                nt3 = sum (sum ((U * U) .* U)) ;
210                t3 = toc ;
211                ok = (nt1 == nt3) ;
212            end
213        catch
214        end
215    end
216    assert (ok) ;
217
218    fprintf ('MATLAB (U*U).*U:         %14.6f sec (rate %6.2f million/sec)\n', t3, 1e-6*nz/t3) ;
219    T (k,3) = t3 ;
220
221    clear L
222
223    % save the results and redraw the plot, but wait at least 5 seconds
224    tnow = cputime - tstart ;
225    if (nargin == 0 && tnow > 5)
226        diary off
227        diary on
228        save (results, 'T', 'Nedges', 'Nnodes', 'f', ...
229            'LLnz', 'LLmem', 'LLflops', 'Ntri') ; ;
230        test71_plot (T, Nedges, Nnodes, LLnz, LLmem, LLflops, Ntri, f) ;
231        tstart = cputime ;
232    end
233
234end
235
236if (nargin == 0)
237    test71_plot ;
238end
239nthreads_set (save, save_chunk) ;
240