1function test114
2%TEST114 performance of reduce-to-scalar
3
4% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
5% SPDX-License-Identifier: Apache-2.0
6
7rng ('default') ;
8
9[save save_chunk] = nthreads_get ;
10chunk = 4096 ;
11nthreads_list = [1 2 4 8 16 2 40 64 160] ;
12nthreads_max = GB_mex_omp_max_threads ;
13ntrials = 10 ;
14
15%-------------------------------------------------------------------------------
16% big matrix ...
17fprintf ('\nbig matrix, no early exit\n') ;
18n = 8000 ;
19A = sparse (ones (n)) ;
20
21tic
22for trial = 1:ntrials
23    s = full (max (max (A))) ;
24end
25tm = toc ;
26fprintf ('MATLAB max: %g\n', tm) ;
27
28tic
29for trial = 1:ntrials
30    s = full (min (min (A))) ;
31end
32tm = toc ;
33fprintf ('MATLAB min: %g\n', tm) ;
34
35tic
36for trial = 1:ntrials
37    s = full (sum (sum (A))) ;
38end
39tm = toc ;
40fprintf ('MATLAB sum: %g\n', tm) ;
41
42tic
43for trial = 1:ntrials
44    s = full (prod (prod (A))) ;
45end
46tm = toc ;
47fprintf ('MATLAB prod: %g\n', tm) ;
48
49S.matrix = A ;
50S.pattern = logical (spones (A)) ;
51
52[~, ~, add_ops, types, ~, ~] = GB_spec_opsall ;
53types = types.all ;
54
55ops = { 'or', 'and', 'xor', 'eq', 'any' } ;
56for k1 = 1:length(ops)
57    op = ops {k1} ;
58    fprintf ('\nGraphBLAS: op %s\n', op) ;
59    S.class = 'logical' ;
60    cin = logical (0) ;
61    for nthreads = nthreads_list
62        if (nthreads > nthreads_max)
63            break ;
64        end
65        nthreads_set (nthreads,chunk) ;
66        t = 0 ;
67        tic
68        for trial = 1:ntrials
69            c1 = GB_mex_reduce_to_scalar (cin, [ ], op, S) ;
70            t = t + grbresults ;
71        end
72        if (nthreads == 1)
73            t1 = t ;
74        end
75        fprintf ('nthreads %3d %12.4f  speedup %12.4f\n', ...
76            nthreads, t, t1/t) ;
77    end
78end
79
80ops = add_ops ;
81for k1 = 1:length(ops)
82    op = ops {k1} ;
83    fprintf ('\nGraphBLAS: op %s\n', op) ;
84    for k2 = 2:length(types)
85        atype = types {k2} ;
86        S.class = atype ;
87        fprintf ('\ntype: %s\n', atype) ;
88        try
89            GB_spec_operator (op, atype) ;
90        catch
91            continue
92        end
93        switch atype
94            case 'logical'
95                cin = logical (0) ;
96            case 'int8'          % GrB_INT8
97                cin = int8 (0) ;
98            case 'uint8'         % GrB_UINT8
99                cin = uint8 (0) ;
100            case 'int16'         % GrB_INT16
101                cin = int16 (0) ;
102            case 'uint16'        % GrB_UINT16
103                cin = uint16 (0) ;
104            case 'int32'         % GrB_INT32
105                cin = int32 (0) ;
106            case 'uint32'        % GrB_UINT32
107                cin = uint32 (0) ;
108            case 'int64'         % GrB_INT64
109                cin = int64 (0) ;
110            case 'uint64'        % GrB_UINT64
111                cin = uint64 (0) ;
112            case 'single'        % GrB_FP32
113                cin = single (0) ;
114            case 'double'        % GrB_FP64
115                cin = double (0) ;
116            case 'single complex'        % GxB_FC32
117                cin = complex (single (0)) ;
118            case 'double complex'        % GxB_FC64
119                cin = complex (double (0)) ;
120        end
121        for nthreads = nthreads_list
122            if (nthreads > nthreads_max)
123                break ;
124            end
125            nthreads_set (nthreads,chunk) ;
126            t = 0 ;
127            tic
128            for trial = 1:ntrials
129                c1 = GB_mex_reduce_to_scalar (cin, [ ], op, S) ;
130                t = t + grbresults ;
131            end
132            if (nthreads == 1)
133                t1 = t ;
134            end
135            fprintf ('nthreads %3d %12.4f  speedup %12.4f\n', ...
136                nthreads, t, t1/t) ;
137        end
138    end
139end
140
141nthreads_set (save, save_chunk) ;
142