1function info = spqr_gpu (ordering, A) 2% info = spqr_gpu (ordering) 3% ordering: 1 colamd 4% ordering: 2 metis 5 6if (nargin > 1) 7 % write the matrix to a file 8 mwrite ('A.mtx', A) ; 9end 10 11if (exist ('gpu_results.txt', 'file')) 12 delete ('gpu_results.txt') ; 13end 14 15setenv('LD_LIBRARY_PATH', '/usr/local/cuda/lib64:/usr/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/lib64') 16if (ordering == 1) 17 system ('tcsh demo_colamd.sh') ; 18else 19 system ('tcsh demo_metis.sh') ; 20end 21 22load ('gpu_results.txt') ; 23% delete ('gpu_results.txt') ; 24 25[m n] = size (A) ; 26B = ones (m,1) ; 27X = mread ('X.mtx') ; 28 29r = A*X-B ; 30atr = A'*r ; 31atanorm = norm (A'*A,1) ; 32atrnorm = norm (atr) / atanorm ; 33fprintf ('relative norm (A''*(A*x-b)): %g\n', atrnorm) ; 34 35% qrdemo_gpu.cpp writes the following: 36% 37% fprintf(info, "%ld\n", cc->SPQR_istat[7]); // ordering method 38% fprintf(info, "%ld\n", cc->memory_usage); // memory usage (bytes) 39% fprintf(info, "%30.16e\n", cc->SPQR_flopcount); // flop count 40% fprintf(info, "%lf\n", cc->SPQR_analyze_time); // analyze time 41% fprintf(info, "%lf\n", cc->SPQR_factorize_time); // factorize time 42% fprintf(info, "-1\n") ; // cpu memory (bytes) 43% fprintf(info, "-1\n") ; // gpu memory (bytes) 44% fprintf(info, "%8.1e\n", rnorm); // residual 45% fprintf(info, "%ld\n", cholmod_l_nnz (A, cc)); // nnz(A) 46% fprintf(info, "%ld\n", cc->SPQR_istat [0]); // nnz(R) 47% fprintf(info, "%ld\n", cc->SPQR_istat [2]); // # of frontal matrices 48% fprintf(info, "%ld\n", cc->SPQR_istat [3]); // ntasks, for now 49% fprintf(info, "%lf\n", cc->gpuKernelTime); // kernel time (ms) 50% fprintf(info, "%ld\n", cc->gpuFlops); // "actual" gpu flops 51% fprintf(info, "%d\n", cc->gpuNumKernelLaunches); // # of kernel launches 52% fprintf(info, "%8.1e\n", atrnorm); // norm (A'*(Ax-b)) 53 54info.ordering = gpu_results (1) ; 55info.memory_usage_in_bytes = gpu_results (2) ; 56info.flops = gpu_results (3) ; 57info.analyze_time = gpu_results (4) ; 58info.factorize_time = gpu_results (5) ; 59info.cpuWatermark = gpu_results (6) ; 60info.gpuWatermark = gpu_results (7) ; 61info.resid = gpu_results (8) ; 62info.nnzA = gpu_results (9) ; 63info.nnzR = gpu_results (10) ; 64info.numFronts = gpu_results (11) ; 65info.numTasks = gpu_results (12) ; 66info.kerneltime = gpu_results (13) ; 67info.gpuFlops = gpu_results (14) ; 68info.kernellaunches = gpu_results (15) ; 69info.atrnorm = gpu_results (16) / atanorm ; 70 71% /* ordering options */ 72% #define SPQR_ORDERING_FIXED 0 73% #define SPQR_ORDERING_NATURAL 1 74% #define SPQR_ORDERING_COLAMD 2 75% #define SPQR_ORDERING_GIVEN 3 /* only used for C/C++ interface */ 76% #define SPQR_ORDERING_CHOLMOD 4 /* CHOLMOD best-effort (COLAMD, METIS,...)*/ 77% #define SPQR_ORDERING_AMD 5 /* AMD(A'*A) */ 78% #define SPQR_ORDERING_METIS 6 /* metis(A'*A) */ 79% #define SPQR_ORDERING_DEFAULT 7 /* SuiteSparseQR default ordering */ 80% #define SPQR_ORDERING_BEST 8 /* try COLAMD, AMD, and METIS; pick best */ 81% #define SPQR_ORDERING_BESTAMD 9 /* try COLAMD and AMD; pick best */ 82