1function test122 2%TEST122 performance tests for GrB_assign 3 4% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 5% SPDX-License-Identifier: Apache-2.0 6 7fprintf ('test120:------------------- C(I,J)<!M> += A:\n') ; 8 9[save save_chunk] = nthreads_get ; 10chunk = 4096 ; 11 12rng ('default') ; 13n = 4000 ; ; 14 15k = 3000 ; 16 17% I0.begin = 0 ; 18% I0.inc = 1 ; 19% I0.end = k-1 ; 20 I1 = randperm (k) ; 21 I0 = uint64 (I1) - 1 ; 22 23d.mask = 'complement' ; 24 25ncores = feature ('numcores') ; 26 27for dc = [2 0 1e-6 1e-5 1e-4 1e-3 1e-2 0.1 1] 28 29 if (dc == 2) 30 C0 = sparse (rand (n)) ; 31 else 32 C0 = sprand (n, n, dc) ; 33 end 34 35 for da = [2 0 1e-6 1e-5 1e-4 1e-3 1e-2 0.1 1] 36 37 if (da == 2) 38 A = sparse (rand (k)) ; 39 else 40 A = sprandn (k, k, da) ; 41 end 42 43 for dm = [2 0 1e-6 1e-5 1e-4 1e-3 1e-2 0.1 1] 44 45 if (dm == 2) 46 M = sparse (ones (k)) ; 47 else 48 M = spones (sprand (k, k, dm)) ; 49 end 50 51 Mbool = logical (M) ; 52 53 fprintf ('\n--------------------------------------\n') ; 54 fprintf ('dc = %g, dm = %g, da = %g\n', dc, dm, da) ; 55% fprintf ('dc = %g nnz(C) %8.4f million\n', dc, nnz(C0)/1e6) ; 56% fprintf ('dm = %g nnz(M) %8.4f million\n', dm, nnz(M)/1e6) ; 57% fprintf ('da = %g nnz(A) %8.4f million\n', da, nnz(A)/1e6) ; 58 59 tm = inf ; 60 if (n < 500) 61 % MATLAB is exceedingly slow for this case 62 tic 63 C1 = C0 ; 64 % Csub = C1 (1:k, 1:k) ; 65 Csub = C1 (I1, I1) ; 66 Csub (~Mbool) = Csub (~Mbool) + A (~Mbool) ; 67 % C1 (1:k, 1:k) = Csub ; 68 C1 (I1, I1) = Csub ; 69 tm = toc ; 70 end 71 72 for nthreads = [1 2 4 8 16 20 32 40 64] 73 if (nthreads > 2*ncores) 74 break ; 75 end 76 if (nthreads > 1 && t1 < 0.01) 77 break ; 78 end 79 80 nthreads_set (nthreads, chunk) ; 81 82 C2 = GB_mex_subassign (C0, M, 'plus', A, I0, I0, d) ; 83 C2 = GB_mex_subassign (C0, M, 'plus', A, I0, I0, d) ; 84 tg = grbresults ; 85 if (n < 500) 86 assert (isequal (C1, C2.matrix)) ; 87 end 88 if (nthreads == 1) 89 t1 = tg ; 90 end 91 92 fprintf ('%3d : MATLAB: %10.4f GB: %8.4f', nthreads, tm, tg) ; 93 fprintf (' speedup %10.4f %10.4f\n', tm / tg, t1/tg) ; 94 95 end 96 end 97 end 98end 99 100nthreads_set (save, save_chunk) ; 101