1function test114 2%TEST114 performance of reduce-to-scalar 3 4% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 5% SPDX-License-Identifier: Apache-2.0 6 7rng ('default') ; 8 9[save save_chunk] = nthreads_get ; 10chunk = 4096 ; 11nthreads_list = [1 2 4 8 16 2 40 64 160] ; 12nthreads_max = GB_mex_omp_max_threads ; 13ntrials = 10 ; 14 15%------------------------------------------------------------------------------- 16% big matrix ... 17fprintf ('\nbig matrix, no early exit\n') ; 18n = 8000 ; 19A = sparse (ones (n)) ; 20 21tic 22for trial = 1:ntrials 23 s = full (max (max (A))) ; 24end 25tm = toc ; 26fprintf ('MATLAB max: %g\n', tm) ; 27 28tic 29for trial = 1:ntrials 30 s = full (min (min (A))) ; 31end 32tm = toc ; 33fprintf ('MATLAB min: %g\n', tm) ; 34 35tic 36for trial = 1:ntrials 37 s = full (sum (sum (A))) ; 38end 39tm = toc ; 40fprintf ('MATLAB sum: %g\n', tm) ; 41 42tic 43for trial = 1:ntrials 44 s = full (prod (prod (A))) ; 45end 46tm = toc ; 47fprintf ('MATLAB prod: %g\n', tm) ; 48 49S.matrix = A ; 50S.pattern = logical (spones (A)) ; 51 52[~, ~, add_ops, types, ~, ~] = GB_spec_opsall ; 53types = types.all ; 54 55ops = { 'or', 'and', 'xor', 'eq', 'any' } ; 56for k1 = 1:length(ops) 57 op = ops {k1} ; 58 fprintf ('\nGraphBLAS: op %s\n', op) ; 59 S.class = 'logical' ; 60 cin = logical (0) ; 61 for nthreads = nthreads_list 62 if (nthreads > nthreads_max) 63 break ; 64 end 65 nthreads_set (nthreads,chunk) ; 66 t = 0 ; 67 tic 68 for trial = 1:ntrials 69 c1 = GB_mex_reduce_to_scalar (cin, [ ], op, S) ; 70 t = t + grbresults ; 71 end 72 if (nthreads == 1) 73 t1 = t ; 74 end 75 fprintf ('nthreads %3d %12.4f speedup %12.4f\n', ... 76 nthreads, t, t1/t) ; 77 end 78end 79 80ops = add_ops ; 81for k1 = 1:length(ops) 82 op = ops {k1} ; 83 fprintf ('\nGraphBLAS: op %s\n', op) ; 84 for k2 = 2:length(types) 85 atype = types {k2} ; 86 S.class = atype ; 87 fprintf ('\ntype: %s\n', atype) ; 88 try 89 GB_spec_operator (op, atype) ; 90 catch 91 continue 92 end 93 switch atype 94 case 'logical' 95 cin = logical (0) ; 96 case 'int8' % GrB_INT8 97 cin = int8 (0) ; 98 case 'uint8' % GrB_UINT8 99 cin = uint8 (0) ; 100 case 'int16' % GrB_INT16 101 cin = int16 (0) ; 102 case 'uint16' % GrB_UINT16 103 cin = uint16 (0) ; 104 case 'int32' % GrB_INT32 105 cin = int32 (0) ; 106 case 'uint32' % GrB_UINT32 107 cin = uint32 (0) ; 108 case 'int64' % GrB_INT64 109 cin = int64 (0) ; 110 case 'uint64' % GrB_UINT64 111 cin = uint64 (0) ; 112 case 'single' % GrB_FP32 113 cin = single (0) ; 114 case 'double' % GrB_FP64 115 cin = double (0) ; 116 case 'single complex' % GxB_FC32 117 cin = complex (single (0)) ; 118 case 'double complex' % GxB_FC64 119 cin = complex (double (0)) ; 120 end 121 for nthreads = nthreads_list 122 if (nthreads > nthreads_max) 123 break ; 124 end 125 nthreads_set (nthreads,chunk) ; 126 t = 0 ; 127 tic 128 for trial = 1:ntrials 129 c1 = GB_mex_reduce_to_scalar (cin, [ ], op, S) ; 130 t = t + grbresults ; 131 end 132 if (nthreads == 1) 133 t1 = t ; 134 end 135 fprintf ('nthreads %3d %12.4f speedup %12.4f\n', ... 136 nthreads, t, t1/t) ; 137 end 138 end 139end 140 141nthreads_set (save, save_chunk) ; 142