1% compares SPQR with SPQR+GPU on lots of sparse matrices
2
3clear
4index = UFget ;
5f = find ((index.isReal == 1) & (index.isBinary == 0) & (index.isGraph == 0)) ;
6nmat = length (f) ;
7howbig = max (index.amd_rnz (f), index.amd_lnz (f)) ;
8[ignore i] = sort (howbig) ;
9howbig = howbig (i) ;
10f = f (i) ;
11nmat = length (f) ;
12
13keep = ones (nmat,1) ;
14kinds = UFkinds ;
15for k = 1:nmat
16    id = f (k) ;
17    if (~isempty (strfind (kinds {id}, 'subsequent')))
18        keep (k) = 0 ;
19    end
20    if (~isempty (strfind (kinds {id}, 'duplicate')))
21        keep (k) = 0 ;
22    end
23end
24f = f (find (keep)) ;
25nmat = length (f) ;
26
27for k = 1:nmat
28    id = f (k) ;
29    kind = kinds {id} ;
30    fprintf ('%4d %4d %s/%s : %s\n', ...
31        k, id, index.Group {id}, index.Name {id}, kind);
32end
33
34fprintf ('\n# of matrices %d\n', nmat) ;
35
36fprintf ('Hit enter to continue: ') ; pause
37
38have_gpu = 1 ;
39
40opts.Q = 'discard' ;
41
42% try
43%     load Results4
44% catch
45    % 1: spqr on CPU with colamd
46    % 2: spqr on CPU with metis
47    % 3: spqr on GPU with colamd
48    % 4: spqr on GPU with metis
49    Info        = cell (nmat,4) ;
50    Flops       = zeros (nmat, 4) ;
51    Mem         = zeros (nmat, 4) ;
52    Rnz         = zeros (nmat, 4) ;
53    T_analyze   = zeros (nmat, 4) ;
54    T_factorize = zeros (nmat, 4) ;
55
56    % matrix stats
57    Rank_est    = zeros (nmat, 1) ;
58    Anz         = zeros (nmat, 1) ;
59    Augment     = zeros (nmat, 1) ; % 1 if A was augmented, 0 otherwise
60% end
61
62for k = 700:nmat
63
64    %---------------------------------------------------------------------------
65    % skip the problem if it is already done
66    %---------------------------------------------------------------------------
67
68    if (all (T_factorize (k,:) > 0))
69        continue
70    end
71
72    %---------------------------------------------------------------------------
73    % get the problem
74    %---------------------------------------------------------------------------
75
76    id = f (k) ;
77    kind = kinds {id} ;
78    fprintf ('\n%4d %4d %s/%s : %s\n', ...
79        k, id, index.Group {id}, index.Name {id}, kind) ;
80
81    Prob = UFget (id, index) ;
82    A = Prob.A ;
83    anorm = norm (A,1) ;
84    [m n] = size (A) ;
85    if (m < n)
86        A = A' ;
87        [m n] = size (A) ;
88    end
89    b = ones (m,1) ;
90
91    %---------------------------------------------------------------------------
92    % run SPQR without the GPU
93    %---------------------------------------------------------------------------
94
95    for ordering = 1:2
96
97        if (ordering == 1)
98            opts.ordering = 'colamd' ;
99        else
100            opts.ordering = 'metis' ;
101        end
102
103        [C R E info] = spqr (A, b, opts) ;
104        rnz = nnz (R) ;
105        clear Q R E
106
107        if (ordering == 1)
108
109            Rank_est (k) = info.rank_A_estimate ;
110            fprintf ('rankest %d ', Rank_est (k)) ;
111
112            if (info.rank_A_estimate < min (m,n))
113                % oops, A is rank deficient.  Try it again
114                fprintf ('(rank deficient)\n') ;
115                Augment (k) = 1 ;
116                A = [A ; anorm*speye(n)] ;
117                [m n] = size (A) ;
118                b = ones (m,1) ;
119                clear Q R E
120                [C R E info] = spqr (A, b, opts) ;
121                rnz = nnz (R) ;
122                clear Q R E
123            else
124                fprintf ('(ok)\n') ;
125            end
126
127            Anz (k) = nnz (A) ;
128        end
129
130        Info {k,ordering} = info ;
131        info
132        Flops (k,ordering) = info.flops ;
133        Mem (k,ordering) = info.memory_usage_in_bytes ;
134        Rnz (k,ordering) = rnz ;
135        T_analyze (k,ordering) = info.analyze_time ;
136        T_factorize (k,ordering) = info.factorize_time ;
137
138    end
139
140    %---------------------------------------------------------------------------
141    % run SPQR with the GPU
142    %---------------------------------------------------------------------------
143
144    mwrite ('A.mtx', A) ;
145
146    for ordering = 1:2
147
148        try
149            info = spqr_gpu (ordering) ;
150            info
151            Info {k,2 + ordering} = info ;
152            Flops (k,2 + ordering) = info.flops ;
153            Mem (k,2 + ordering) = info.memory_usage_in_bytes ;
154            Rnz (k,2 + ordering) = info.nnzR ;
155            T_analyze (k,2 + ordering) = info.analyze_time ;
156            T_factorize (k,2 + ordering) = info.factorize_time ;
157
158            % Protect the data against caught OOM errors
159            if (T_factorize(k,2+ordering) == 0)
160                T_factorize(k,2+ordering) = nan ;
161            end
162
163        catch
164            % GPU version failed
165            T_factorize (k,2 + ordering) = nan ;
166        end
167    end
168
169%    if (Flops (k,1) ~= Flops (k,3))
170%        Flops (k,[1 3])
171%        error ('colamd flops are not the same ... why?') ;
172%    end
173%    if (Flops (k,2) ~= Flops (k,4))
174%        Flops (k,[2 4])
175%        error ('metis flops are not the same ... why?') ;
176%    end
177
178    %---------------------------------------------------------------------------
179    % save the results and plot them
180    %---------------------------------------------------------------------------
181
182    save Results6 ...
183        Info Flops Mem Rnz T_analyze T_factorize Rank_est Anz Augment f
184
185    intensity1 = Flops (1:k,1) ./ Mem (1:k,1) ;
186    intensity2 = Flops (1:k,2) ./ Mem (1:k,2) ;
187
188    gflops1 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,1) ;
189    gflops2 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,2) ;
190    gflops3 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,3) ;
191    gflops4 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,4) ;
192
193    fprintf ('CPU colamd factime %8.2f  gflops : %8.2f\n', ...
194        T_factorize (k,1), gflops1 (k)) ;
195
196    fprintf ('CPU metis  factime %8.2f  gflops : %8.2f\n', ...
197        T_factorize (k,2), gflops2 (k)) ;
198
199    fprintf ('GPU colamd factime %8.2f  gflops : %8.2f\n', ...
200        T_factorize (k,3), gflops3 (k)) ;
201
202    fprintf ('GPU metis  factime %8.2f  gflops : %8.2f\n', ...
203        T_factorize (k,4), gflops4 (k)) ;
204
205    subplot (2,3,1) ; plot (intensity1, gflops1, 'o') ; title ('CPU:colamd') ;
206    ylabel ('Gflops') ; xlabel ('flops/byte') ;
207    subplot (2,3,2) ; plot (intensity1, gflops3, 'o') ; title ('GPU:colamd') ;
208    ylabel ('Gflops') ; xlabel ('flops/byte') ;
209    subplot (2,3,3) ; plot (intensity1, gflops3./gflops1, 'o') ;
210    title ('GPU speedup (colamd)') ;
211
212    subplot (2,3,4) ; plot (intensity2, gflops2, 'o') ; title ('CPU:metis') ;
213    ylabel ('Gflops') ; xlabel ('flops/byte') ;
214    subplot (2,3,5) ; plot (intensity2, gflops4, 'o') ; title ('GPU:metis') ;
215    ylabel ('Gflops') ; xlabel ('flops/byte') ;
216    subplot (2,3,6) ; plot (intensity2, gflops4./gflops2, 'o') ;
217    title ('GPU speedup (metis)') ;
218    drawnow
219
220    diary off
221    diary on
222
223    % fprintf ('Hit enter: ') ; pause
224
225end
226