1function test118
2%TEST118 performance tests for GrB_assign
3
4% test C(:,:)<M> = A
5
6% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
7% SPDX-License-Identifier: Apache-2.0
8
9fprintf ('test118 ----------------------------------- C(:,:)<M> = A\n') ;
10
11[save save_chunk] = nthreads_get ;
12chunk = 4096 ;
13
14rng ('default') ;
15n = 2000 ;
16S = sparse (n,n) ;
17
18I.begin = 0 ;
19I.inc = 1 ;
20I.end = n-1 ;
21
22ncores = feature ('numcores') ;
23
24for dc = [0 1e-5 1e-4 1e-3 1e-2 1e-1 0.5]
25
26    C0 = sparse (n,n,dc) ;
27
28    for da = [1e-5  1e-4 1e-3 1e-2 1e-1 0.5]
29        A  = sprand (n, n, da) ;
30
31        for dm = [1e-5 1e-4 1e-3 1e-2 1e-1 0.5]
32
33            M = spones (sprand (n, n, dm)) ;
34            Mbool = logical (M) ;
35
36            fprintf ('\n--------------------------------------\n') ;
37            fprintf ('dc: %g, da: %g, dm: %g ', dc, da, dm) ;
38            fprintf ('\n') ;
39
40            fprintf ('nnz(M): %g million, ',  nnz (M) / 1e6) ;
41            fprintf ('nnz(A): %g million\n',  nnz (A) / 1e6) ;
42
43            % warmup
44            % C1 = C0 ;
45            % C1 (Mbool) = A (Mbool) ;
46
47            tic
48            C1 = C0 ;
49            C1 (Mbool) = A (Mbool) ;
50            tm = toc ;
51
52            t1 = 0 ;
53
54            for nthreads = [1 2 4 8 16 20 32 40 64]
55                if (nthreads > 2*ncores)
56                    break ;
57                end
58                nthreads_set (nthreads, chunk) ;
59
60                if (nthreads > 1 & t1 < 0.1)
61                    continue
62                end
63
64                % if (nnz(A)<nnz(M)) use method 06s, else method 06n
65                C2 = GB_mex_assign (C0, M, [ ], A, I, I) ;
66                C2 = GB_mex_assign (C0, M, [ ], A, I, I) ;
67                tg = grbresults ;
68                assert (isequal (C1, C2.matrix)) ;
69                if (nthreads == 1)
70                    t1 = tg ;
71                end
72
73                fprintf ('%3d : %8.4f GB: %8.4f', nthreads, tm, tg) ;
74                fprintf (' speedup: %8.2f  %8.2f\n', t1/tg, tm / tg) ;
75
76            end
77        end
78    end
79end
80
81nthreads_set (save, save_chunk) ;
82