1% compares SPQR with SPQR+GPU on lots of sparse matrices
2
3clear
4index = UFget ;
5f = find ((index.isReal == 1) & (index.isBinary == 0) & (index.isGraph == 0)) ;
6nmat = length (f) ;
7howbig = max (index.amd_rnz (f), index.amd_lnz (f)) ;
8[ignore i] = sort (howbig) ;
9howbig = howbig (i) ;
10f = f (i) ;
11nmat = length (f) ;
12
13keep = ones (nmat,1) ;
14kinds = UFkinds ;
15for k = 1:nmat
16    id = f (k) ;
17    if (~isempty (strfind (kinds {id}, 'subsequent')))
18        keep (k) = 0 ;
19    end
20    if (~isempty (strfind (kinds {id}, 'duplicate')))
21        keep (k) = 0 ;
22    end
23end
24f = f (find (keep)) ;
25nmat = length (f) ;
26
27rankdeficient = [317] ;
28skip_list = [1657] ;
29
30revisit = [253 317 1556]
31
32for k = 1:nmat
33    id = f (k) ;
34    kind = kinds {id} ;
35    fprintf ('%4d %4d %s/%s : %s\n', ...
36        k, id, index.Group {id}, index.Name {id}, kind);
37end
38
39fprintf ('\n# of matrices %d\n', nmat) ;
40
41fprintf ('Hit enter to continue: ') ; pause
42
43have_gpu = 1 ;
44
45opts.Q = 'discard' ;
46
47try
48    load Results7
49catch
50    % 1: spqr on CPU with colamd
51    % 2: spqr on CPU with metis
52    % 3: spqr on GPU with colamd
53    % 4: spqr on GPU with metis
54    Info        = cell (nmat,4) ;
55    Flops       = zeros (nmat, 4) ;
56    Mem         = zeros (nmat, 4) ;
57    Rnz         = zeros (nmat, 4) ;
58    T_analyze   = zeros (nmat, 4) ;
59    T_factorize = zeros (nmat, 4) ;
60
61    % matrix stats
62    Rank_est    = zeros (nmat, 1) ;
63    Anz         = zeros (nmat, 1) ;
64    Augment     = zeros (nmat, 1) ; % 1 if A was augmented, 0 otherwise
65end
66
67for k = 1:nmat
68
69    %---------------------------------------------------------------------------
70    % skip the problem if it is already done
71    %---------------------------------------------------------------------------
72
73    if (all (T_factorize (k,:) > 0))
74        continue
75    end
76
77    %---------------------------------------------------------------------------
78    % get the problem
79    %---------------------------------------------------------------------------
80
81    id = f (k) ;
82    kind = kinds {id} ;
83    fprintf ('\n%4d %4d %s/%s : %s\n', ...
84        k, id, index.Group {id}, index.Name {id}, kind) ;
85
86    if (any (id == skip_list))
87        fprintf ('skip\n') ;
88        continue
89    end
90
91    Prob = UFget (id, index) ;
92    A = Prob.A ;
93    anorm = norm (A,1) ;
94    [m n] = size (A) ;
95    if (m < n)
96        A = A' ;
97        [m n] = size (A) ;
98    end
99    b = ones (m,1) ;
100
101    %---------------------------------------------------------------------------
102    % run SPQR without the GPU
103    %---------------------------------------------------------------------------
104
105    for ordering = 1:2
106
107        if (ordering == 1)
108            opts.ordering = 'colamd' ;
109        else
110            opts.ordering = 'metis' ;
111        end
112
113        [C R E info] = spqr (A, b, opts) ;
114        rnz = nnz (R) ;
115
116        if (ordering == 1)
117
118            Rank_est (k) = info.rank_A_estimate ;
119            fprintf ('rankest %d ', Rank_est (k)) ;
120
121            if (info.rank_A_estimate < min (m,n))
122                % oops, A is rank deficient.  Try it again
123                fprintf ('(rank deficient)\n') ;
124                Augment (k) = 1 ;
125                A = [A ; anorm*speye(n)] ;
126                [m n] = size (A) ;
127                b = ones (m,1) ;
128                clear Q R E
129                [C R E info] = spqr (A, b, opts) ;
130                rnz = nnz (R) ;
131            else
132                fprintf ('(ok)\n') ;
133            end
134
135            Anz (k) = nnz (A) ;
136        end
137
138        x = E*(R\C) ;
139
140        Info {k,ordering} = info ;
141        info
142        Flops (k,ordering) = info.flops ;
143        Mem (k,ordering) = info.memory_usage_in_bytes ;
144        Rnz (k,ordering) = rnz ;
145        T_analyze (k,ordering) = info.analyze_time ;
146        T_factorize (k,ordering) = info.factorize_time ;
147
148        atrnorm (ordering) = norm (A'*(A*x-b)) / norm (A'*A,1) ;;
149        fprintf ('relative atrnorm for CPU: %g\n', atrnorm (ordering)) ;
150
151        clear Q R E
152
153    end
154
155    if (any (id == rankdeficient))
156        fprintf ('skipping rank deficient case for the GPU\n') ;
157        continue ;
158    end
159
160    %---------------------------------------------------------------------------
161    % run SPQR with the GPU
162    %---------------------------------------------------------------------------
163
164    mwrite ('A.mtx', A) ;
165
166    for ordering = 1:2
167
168%       try
169            info = spqr_gpu (ordering, A) ;
170            info
171            Info {k,2 + ordering} = info ;
172            Flops (k,2 + ordering) = info.flops ;
173            Mem (k,2 + ordering) = info.memory_usage_in_bytes ;
174            Rnz (k,2 + ordering) = info.nnzR ;
175            T_analyze (k,2 + ordering) = info.analyze_time ;
176            T_factorize (k,2 + ordering) = info.factorize_time ;
177
178            % Protect the data against caught OOM errors
179            if (T_factorize(k,2+ordering) == 0)
180                T_factorize(k,2+ordering) = nan ;
181                % pause
182                error ('oom') ;
183            end
184
185            [C,R,E,B,X,err] = spqr_gpu2 (ordering, A) ;
186            clear C R E B X
187
188%       catch
189%           % GPU version failed
190%           T_factorize (k,2 + ordering) = nan ;
191%           pause
192%       end
193
194    end
195
196%    if (Flops (k,1) ~= Flops (k,3))
197%        Flops (k,[1 3])
198%        error ('colamd flops are not the same ... why?') ;
199%    end
200%    if (Flops (k,2) ~= Flops (k,4))
201%        Flops (k,[2 4])
202%        error ('metis flops are not the same ... why?') ;
203%    end
204
205    %---------------------------------------------------------------------------
206    % save the results and plot them
207    %---------------------------------------------------------------------------
208
209    save Results7 ...
210        Info Flops Mem Rnz T_analyze T_factorize Rank_est Anz Augment f
211
212    intensity1 = Flops (1:k,1) ./ Mem (1:k,1) ;
213    intensity2 = Flops (1:k,2) ./ Mem (1:k,2) ;
214
215    gflops1 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,1) ;
216    gflops2 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,2) ;
217    gflops3 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,3) ;
218    gflops4 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,4) ;
219
220    fprintf ('CPU colamd factime %8.2f  gflops : %8.2f\n', ...
221        T_factorize (k,1), gflops1 (k)) ;
222
223    fprintf ('CPU metis  factime %8.2f  gflops : %8.2f\n', ...
224        T_factorize (k,2), gflops2 (k)) ;
225
226    fprintf ('GPU colamd factime %8.2f  gflops : %8.2f\n', ...
227        T_factorize (k,3), gflops3 (k)) ;
228
229    fprintf ('GPU metis  factime %8.2f  gflops : %8.2f\n', ...
230        T_factorize (k,4), gflops4 (k)) ;
231
232%{
233    subplot (2,3,1) ; plot (intensity1, gflops1, 'o') ; title ('CPU:colamd') ;
234    ylabel ('Gflops') ; xlabel ('flops/byte') ;
235    subplot (2,3,2) ; plot (intensity1, gflops3, 'o') ; title ('GPU:colamd') ;
236    ylabel ('Gflops') ; xlabel ('flops/byte') ;
237    subplot (2,3,3) ; plot (intensity1, gflops3./gflops1, 'o') ;
238    title ('GPU speedup (colamd)') ;
239
240    subplot (2,3,4) ; plot (intensity2, gflops2, 'o') ; title ('CPU:metis') ;
241    ylabel ('Gflops') ; xlabel ('flops/byte') ;
242    subplot (2,3,5) ; plot (intensity2, gflops4, 'o') ; title ('GPU:metis') ;
243    ylabel ('Gflops') ; xlabel ('flops/byte') ;
244    subplot (2,3,6) ; plot (intensity2, gflops4./gflops2, 'o') ;
245    title ('GPU speedup (metis)') ;
246    drawnow
247%}
248
249    diary off
250    diary on
251
252    % fprintf ('Hit enter: ') ; pause
253
254end
255