1% compares SPQR with SPQR+GPU on lots of sparse matrices 2 3clear 4index = UFget ; 5f = find ((index.isReal == 1) & (index.isBinary == 0) & (index.isGraph == 0)) ; 6nmat = length (f) ; 7howbig = max (index.amd_rnz (f), index.amd_lnz (f)) ; 8[ignore i] = sort (howbig) ; 9howbig = howbig (i) ; 10f = f (i) ; 11nmat = length (f) ; 12 13keep = ones (nmat,1) ; 14kinds = UFkinds ; 15for k = 1:nmat 16 id = f (k) ; 17 if (~isempty (strfind (kinds {id}, 'subsequent'))) 18 keep (k) = 0 ; 19 end 20 if (~isempty (strfind (kinds {id}, 'duplicate'))) 21 keep (k) = 0 ; 22 end 23end 24f = f (find (keep)) ; 25nmat = length (f) ; 26 27rankdeficient = [317] ; 28skip_list = [1657] ; 29 30revisit = [253 317 1556] 31 32for k = 1:nmat 33 id = f (k) ; 34 kind = kinds {id} ; 35 fprintf ('%4d %4d %s/%s : %s\n', ... 36 k, id, index.Group {id}, index.Name {id}, kind); 37end 38 39fprintf ('\n# of matrices %d\n', nmat) ; 40 41fprintf ('Hit enter to continue: ') ; pause 42 43have_gpu = 1 ; 44 45opts.Q = 'discard' ; 46 47try 48 load Results7 49catch 50 % 1: spqr on CPU with colamd 51 % 2: spqr on CPU with metis 52 % 3: spqr on GPU with colamd 53 % 4: spqr on GPU with metis 54 Info = cell (nmat,4) ; 55 Flops = zeros (nmat, 4) ; 56 Mem = zeros (nmat, 4) ; 57 Rnz = zeros (nmat, 4) ; 58 T_analyze = zeros (nmat, 4) ; 59 T_factorize = zeros (nmat, 4) ; 60 61 % matrix stats 62 Rank_est = zeros (nmat, 1) ; 63 Anz = zeros (nmat, 1) ; 64 Augment = zeros (nmat, 1) ; % 1 if A was augmented, 0 otherwise 65end 66 67for k = 1:nmat 68 69 %--------------------------------------------------------------------------- 70 % skip the problem if it is already done 71 %--------------------------------------------------------------------------- 72 73 if (all (T_factorize (k,:) > 0)) 74 continue 75 end 76 77 %--------------------------------------------------------------------------- 78 % get the problem 79 %--------------------------------------------------------------------------- 80 81 id = f (k) ; 82 kind = kinds {id} ; 83 fprintf ('\n%4d %4d %s/%s : %s\n', ... 84 k, id, index.Group {id}, index.Name {id}, kind) ; 85 86 if (any (id == skip_list)) 87 fprintf ('skip\n') ; 88 continue 89 end 90 91 Prob = UFget (id, index) ; 92 A = Prob.A ; 93 anorm = norm (A,1) ; 94 [m n] = size (A) ; 95 if (m < n) 96 A = A' ; 97 [m n] = size (A) ; 98 end 99 b = ones (m,1) ; 100 101 %--------------------------------------------------------------------------- 102 % run SPQR without the GPU 103 %--------------------------------------------------------------------------- 104 105 for ordering = 1:2 106 107 if (ordering == 1) 108 opts.ordering = 'colamd' ; 109 else 110 opts.ordering = 'metis' ; 111 end 112 113 [C R E info] = spqr (A, b, opts) ; 114 rnz = nnz (R) ; 115 116 if (ordering == 1) 117 118 Rank_est (k) = info.rank_A_estimate ; 119 fprintf ('rankest %d ', Rank_est (k)) ; 120 121 if (info.rank_A_estimate < min (m,n)) 122 % oops, A is rank deficient. Try it again 123 fprintf ('(rank deficient)\n') ; 124 Augment (k) = 1 ; 125 A = [A ; anorm*speye(n)] ; 126 [m n] = size (A) ; 127 b = ones (m,1) ; 128 clear Q R E 129 [C R E info] = spqr (A, b, opts) ; 130 rnz = nnz (R) ; 131 else 132 fprintf ('(ok)\n') ; 133 end 134 135 Anz (k) = nnz (A) ; 136 end 137 138 x = E*(R\C) ; 139 140 Info {k,ordering} = info ; 141 info 142 Flops (k,ordering) = info.flops ; 143 Mem (k,ordering) = info.memory_usage_in_bytes ; 144 Rnz (k,ordering) = rnz ; 145 T_analyze (k,ordering) = info.analyze_time ; 146 T_factorize (k,ordering) = info.factorize_time ; 147 148 atrnorm (ordering) = norm (A'*(A*x-b)) / norm (A'*A,1) ;; 149 fprintf ('relative atrnorm for CPU: %g\n', atrnorm (ordering)) ; 150 151 clear Q R E 152 153 end 154 155 if (any (id == rankdeficient)) 156 fprintf ('skipping rank deficient case for the GPU\n') ; 157 continue ; 158 end 159 160 %--------------------------------------------------------------------------- 161 % run SPQR with the GPU 162 %--------------------------------------------------------------------------- 163 164 mwrite ('A.mtx', A) ; 165 166 for ordering = 1:2 167 168% try 169 info = spqr_gpu (ordering, A) ; 170 info 171 Info {k,2 + ordering} = info ; 172 Flops (k,2 + ordering) = info.flops ; 173 Mem (k,2 + ordering) = info.memory_usage_in_bytes ; 174 Rnz (k,2 + ordering) = info.nnzR ; 175 T_analyze (k,2 + ordering) = info.analyze_time ; 176 T_factorize (k,2 + ordering) = info.factorize_time ; 177 178 % Protect the data against caught OOM errors 179 if (T_factorize(k,2+ordering) == 0) 180 T_factorize(k,2+ordering) = nan ; 181 % pause 182 error ('oom') ; 183 end 184 185 [C,R,E,B,X,err] = spqr_gpu2 (ordering, A) ; 186 clear C R E B X 187 188% catch 189% % GPU version failed 190% T_factorize (k,2 + ordering) = nan ; 191% pause 192% end 193 194 end 195 196% if (Flops (k,1) ~= Flops (k,3)) 197% Flops (k,[1 3]) 198% error ('colamd flops are not the same ... why?') ; 199% end 200% if (Flops (k,2) ~= Flops (k,4)) 201% Flops (k,[2 4]) 202% error ('metis flops are not the same ... why?') ; 203% end 204 205 %--------------------------------------------------------------------------- 206 % save the results and plot them 207 %--------------------------------------------------------------------------- 208 209 save Results7 ... 210 Info Flops Mem Rnz T_analyze T_factorize Rank_est Anz Augment f 211 212 intensity1 = Flops (1:k,1) ./ Mem (1:k,1) ; 213 intensity2 = Flops (1:k,2) ./ Mem (1:k,2) ; 214 215 gflops1 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,1) ; 216 gflops2 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,2) ; 217 gflops3 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,3) ; 218 gflops4 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,4) ; 219 220 fprintf ('CPU colamd factime %8.2f gflops : %8.2f\n', ... 221 T_factorize (k,1), gflops1 (k)) ; 222 223 fprintf ('CPU metis factime %8.2f gflops : %8.2f\n', ... 224 T_factorize (k,2), gflops2 (k)) ; 225 226 fprintf ('GPU colamd factime %8.2f gflops : %8.2f\n', ... 227 T_factorize (k,3), gflops3 (k)) ; 228 229 fprintf ('GPU metis factime %8.2f gflops : %8.2f\n', ... 230 T_factorize (k,4), gflops4 (k)) ; 231 232%{ 233 subplot (2,3,1) ; plot (intensity1, gflops1, 'o') ; title ('CPU:colamd') ; 234 ylabel ('Gflops') ; xlabel ('flops/byte') ; 235 subplot (2,3,2) ; plot (intensity1, gflops3, 'o') ; title ('GPU:colamd') ; 236 ylabel ('Gflops') ; xlabel ('flops/byte') ; 237 subplot (2,3,3) ; plot (intensity1, gflops3./gflops1, 'o') ; 238 title ('GPU speedup (colamd)') ; 239 240 subplot (2,3,4) ; plot (intensity2, gflops2, 'o') ; title ('CPU:metis') ; 241 ylabel ('Gflops') ; xlabel ('flops/byte') ; 242 subplot (2,3,5) ; plot (intensity2, gflops4, 'o') ; title ('GPU:metis') ; 243 ylabel ('Gflops') ; xlabel ('flops/byte') ; 244 subplot (2,3,6) ; plot (intensity2, gflops4./gflops2, 'o') ; 245 title ('GPU speedup (metis)') ; 246 drawnow 247%} 248 249 diary off 250 diary on 251 252 % fprintf ('Hit enter: ') ; pause 253 254end 255