1function info = spqr_gpu (ordering, A)
2% info = spqr_gpu (ordering)
3%   ordering: 1 colamd
4%   ordering: 2 metis
5
6if (nargin > 1)
7    % write the matrix to a file
8    mwrite ('A.mtx', A) ;
9end
10
11if (exist ('gpu_results.txt', 'file'))
12    delete ('gpu_results.txt') ;
13end
14
15setenv('LD_LIBRARY_PATH', '/usr/local/cuda/lib64:/usr/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/lib64')
16if (ordering == 1)
17    system ('tcsh demo_colamd.sh') ;
18else
19    system ('tcsh demo_metis.sh') ;
20end
21
22load ('gpu_results.txt') ;
23% delete ('gpu_results.txt') ;
24
25[m n] = size (A) ;
26B = ones (m,1) ;
27X = mread ('X.mtx') ;
28
29r = A*X-B ;
30atr = A'*r ;
31atanorm = norm (A'*A,1) ;
32atrnorm = norm (atr) / atanorm ;
33fprintf ('relative norm (A''*(A*x-b)): %g\n', atrnorm) ;
34
35% qrdemo_gpu.cpp writes the following:
36%
37%   fprintf(info, "%ld\n", cc->SPQR_istat[7]);        // ordering method
38%   fprintf(info, "%ld\n", cc->memory_usage);         // memory usage (bytes)
39%   fprintf(info, "%30.16e\n", cc->SPQR_flopcount);   // flop count
40%   fprintf(info, "%lf\n", cc->SPQR_analyze_time);    // analyze time
41%   fprintf(info, "%lf\n", cc->SPQR_factorize_time);  // factorize time
42%   fprintf(info, "-1\n") ;                           // cpu memory (bytes)
43%   fprintf(info, "-1\n") ;                           // gpu memory (bytes)
44%   fprintf(info, "%8.1e\n", rnorm);                  // residual
45%   fprintf(info, "%ld\n", cholmod_l_nnz (A, cc));    // nnz(A)
46%   fprintf(info, "%ld\n", cc->SPQR_istat [0]);       // nnz(R)
47%   fprintf(info, "%ld\n", cc->SPQR_istat [2]);       // # of frontal matrices
48%   fprintf(info, "%ld\n", cc->SPQR_istat [3]);       // ntasks, for now
49%   fprintf(info, "%lf\n", cc->gpuKernelTime);        // kernel time (ms)
50%   fprintf(info, "%ld\n", cc->gpuFlops);             // "actual" gpu flops
51%   fprintf(info, "%d\n", cc->gpuNumKernelLaunches);  // # of kernel launches
52%   fprintf(info, "%8.1e\n", atrnorm);                // norm (A'*(Ax-b))
53
54info.ordering              = gpu_results (1) ;
55info.memory_usage_in_bytes = gpu_results (2) ;
56info.flops                 = gpu_results (3) ;
57info.analyze_time          = gpu_results (4) ;
58info.factorize_time        = gpu_results (5) ;
59info.cpuWatermark          = gpu_results (6) ;
60info.gpuWatermark          = gpu_results (7) ;
61info.resid                 = gpu_results (8) ;
62info.nnzA                  = gpu_results (9) ;
63info.nnzR                  = gpu_results (10) ;
64info.numFronts             = gpu_results (11) ;
65info.numTasks              = gpu_results (12) ;
66info.kerneltime            = gpu_results (13) ;
67info.gpuFlops              = gpu_results (14) ;
68info.kernellaunches        = gpu_results (15) ;
69info.atrnorm               = gpu_results (16) / atanorm ;
70
71% /* ordering options */
72%   #define SPQR_ORDERING_FIXED 0
73%   #define SPQR_ORDERING_NATURAL 1
74%   #define SPQR_ORDERING_COLAMD 2
75%   #define SPQR_ORDERING_GIVEN 3       /* only used for C/C++ interface */
76%   #define SPQR_ORDERING_CHOLMOD 4     /* CHOLMOD best-effort (COLAMD, METIS,...)*/
77%   #define SPQR_ORDERING_AMD 5         /* AMD(A'*A) */
78%   #define SPQR_ORDERING_METIS 6       /* metis(A'*A) */
79%   #define SPQR_ORDERING_DEFAULT 7     /* SuiteSparseQR default ordering */
80%   #define SPQR_ORDERING_BEST 8        /* try COLAMD, AMD, and METIS; pick best */
81%   #define SPQR_ORDERING_BESTAMD 9     /* try COLAMD and AMD; pick best */
82