1% compares SPQR with SPQR+GPU on lots of sparse matrices 2 3clear 4index = UFget ; 5f = find ((index.isReal == 1) & (index.isBinary == 0) & (index.isGraph == 0)) ; 6nmat = length (f) ; 7howbig = max (index.amd_rnz (f), index.amd_lnz (f)) ; 8[ignore i] = sort (howbig) ; 9howbig = howbig (i) ; 10f = f (i) ; 11nmat = length (f) ; 12 13keep = ones (nmat,1) ; 14kinds = UFkinds ; 15for k = 1:nmat 16 id = f (k) ; 17 if (~isempty (strfind (kinds {id}, 'subsequent'))) 18 keep (k) = 0 ; 19 end 20 if (~isempty (strfind (kinds {id}, 'duplicate'))) 21 keep (k) = 0 ; 22 end 23end 24f = f (find (keep)) ; 25nmat = length (f) ; 26 27for k = 1:nmat 28 id = f (k) ; 29 kind = kinds {id} ; 30 fprintf ('%4d %4d %s/%s : %s\n', ... 31 k, id, index.Group {id}, index.Name {id}, kind); 32end 33 34fprintf ('\n# of matrices %d\n', nmat) ; 35 36fprintf ('Hit enter to continue: ') ; pause 37 38have_gpu = 1 ; 39 40opts.Q = 'discard' ; 41 42% try 43% load Results4 44% catch 45 % 1: spqr on CPU with colamd 46 % 2: spqr on CPU with metis 47 % 3: spqr on GPU with colamd 48 % 4: spqr on GPU with metis 49 Info = cell (nmat,4) ; 50 Flops = zeros (nmat, 4) ; 51 Mem = zeros (nmat, 4) ; 52 Rnz = zeros (nmat, 4) ; 53 T_analyze = zeros (nmat, 4) ; 54 T_factorize = zeros (nmat, 4) ; 55 56 % matrix stats 57 Rank_est = zeros (nmat, 1) ; 58 Anz = zeros (nmat, 1) ; 59 Augment = zeros (nmat, 1) ; % 1 if A was augmented, 0 otherwise 60% end 61 62for k = 700:nmat 63 64 %--------------------------------------------------------------------------- 65 % skip the problem if it is already done 66 %--------------------------------------------------------------------------- 67 68 if (all (T_factorize (k,:) > 0)) 69 continue 70 end 71 72 %--------------------------------------------------------------------------- 73 % get the problem 74 %--------------------------------------------------------------------------- 75 76 id = f (k) ; 77 kind = kinds {id} ; 78 fprintf ('\n%4d %4d %s/%s : %s\n', ... 79 k, id, index.Group {id}, index.Name {id}, kind) ; 80 81 Prob = UFget (id, index) ; 82 A = Prob.A ; 83 anorm = norm (A,1) ; 84 [m n] = size (A) ; 85 if (m < n) 86 A = A' ; 87 [m n] = size (A) ; 88 end 89 b = ones (m,1) ; 90 91 %--------------------------------------------------------------------------- 92 % run SPQR without the GPU 93 %--------------------------------------------------------------------------- 94 95 for ordering = 1:2 96 97 if (ordering == 1) 98 opts.ordering = 'colamd' ; 99 else 100 opts.ordering = 'metis' ; 101 end 102 103 [C R E info] = spqr (A, b, opts) ; 104 rnz = nnz (R) ; 105 clear Q R E 106 107 if (ordering == 1) 108 109 Rank_est (k) = info.rank_A_estimate ; 110 fprintf ('rankest %d ', Rank_est (k)) ; 111 112 if (info.rank_A_estimate < min (m,n)) 113 % oops, A is rank deficient. Try it again 114 fprintf ('(rank deficient)\n') ; 115 Augment (k) = 1 ; 116 A = [A ; anorm*speye(n)] ; 117 [m n] = size (A) ; 118 b = ones (m,1) ; 119 clear Q R E 120 [C R E info] = spqr (A, b, opts) ; 121 rnz = nnz (R) ; 122 clear Q R E 123 else 124 fprintf ('(ok)\n') ; 125 end 126 127 Anz (k) = nnz (A) ; 128 end 129 130 Info {k,ordering} = info ; 131 info 132 Flops (k,ordering) = info.flops ; 133 Mem (k,ordering) = info.memory_usage_in_bytes ; 134 Rnz (k,ordering) = rnz ; 135 T_analyze (k,ordering) = info.analyze_time ; 136 T_factorize (k,ordering) = info.factorize_time ; 137 138 end 139 140 %--------------------------------------------------------------------------- 141 % run SPQR with the GPU 142 %--------------------------------------------------------------------------- 143 144 mwrite ('A.mtx', A) ; 145 146 for ordering = 1:2 147 148 try 149 info = spqr_gpu (ordering) ; 150 info 151 Info {k,2 + ordering} = info ; 152 Flops (k,2 + ordering) = info.flops ; 153 Mem (k,2 + ordering) = info.memory_usage_in_bytes ; 154 Rnz (k,2 + ordering) = info.nnzR ; 155 T_analyze (k,2 + ordering) = info.analyze_time ; 156 T_factorize (k,2 + ordering) = info.factorize_time ; 157 158 % Protect the data against caught OOM errors 159 if (T_factorize(k,2+ordering) == 0) 160 T_factorize(k,2+ordering) = nan ; 161 end 162 163 catch 164 % GPU version failed 165 T_factorize (k,2 + ordering) = nan ; 166 end 167 end 168 169% if (Flops (k,1) ~= Flops (k,3)) 170% Flops (k,[1 3]) 171% error ('colamd flops are not the same ... why?') ; 172% end 173% if (Flops (k,2) ~= Flops (k,4)) 174% Flops (k,[2 4]) 175% error ('metis flops are not the same ... why?') ; 176% end 177 178 %--------------------------------------------------------------------------- 179 % save the results and plot them 180 %--------------------------------------------------------------------------- 181 182 save Results6 ... 183 Info Flops Mem Rnz T_analyze T_factorize Rank_est Anz Augment f 184 185 intensity1 = Flops (1:k,1) ./ Mem (1:k,1) ; 186 intensity2 = Flops (1:k,2) ./ Mem (1:k,2) ; 187 188 gflops1 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,1) ; 189 gflops2 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,2) ; 190 gflops3 = 1e-9 * Flops (1:k,1) ./ T_factorize (1:k,3) ; 191 gflops4 = 1e-9 * Flops (1:k,2) ./ T_factorize (1:k,4) ; 192 193 fprintf ('CPU colamd factime %8.2f gflops : %8.2f\n', ... 194 T_factorize (k,1), gflops1 (k)) ; 195 196 fprintf ('CPU metis factime %8.2f gflops : %8.2f\n', ... 197 T_factorize (k,2), gflops2 (k)) ; 198 199 fprintf ('GPU colamd factime %8.2f gflops : %8.2f\n', ... 200 T_factorize (k,3), gflops3 (k)) ; 201 202 fprintf ('GPU metis factime %8.2f gflops : %8.2f\n', ... 203 T_factorize (k,4), gflops4 (k)) ; 204 205 subplot (2,3,1) ; plot (intensity1, gflops1, 'o') ; title ('CPU:colamd') ; 206 ylabel ('Gflops') ; xlabel ('flops/byte') ; 207 subplot (2,3,2) ; plot (intensity1, gflops3, 'o') ; title ('GPU:colamd') ; 208 ylabel ('Gflops') ; xlabel ('flops/byte') ; 209 subplot (2,3,3) ; plot (intensity1, gflops3./gflops1, 'o') ; 210 title ('GPU speedup (colamd)') ; 211 212 subplot (2,3,4) ; plot (intensity2, gflops2, 'o') ; title ('CPU:metis') ; 213 ylabel ('Gflops') ; xlabel ('flops/byte') ; 214 subplot (2,3,5) ; plot (intensity2, gflops4, 'o') ; title ('GPU:metis') ; 215 ylabel ('Gflops') ; xlabel ('flops/byte') ; 216 subplot (2,3,6) ; plot (intensity2, gflops4./gflops2, 'o') ; 217 title ('GPU speedup (metis)') ; 218 drawnow 219 220 diary off 221 diary on 222 223 % fprintf ('Hit enter: ') ; pause 224 225end 226