1 // =============================================================================
2 // === qrdemo_gpu.cpp ==========================================================
3 // =============================================================================
4 
5 // A simple C++ demo of SuiteSparseQR.  The comments give the MATLAB equivalent
6 // statements.  See also qrdemo.m
7 
8 #include "SuiteSparseQR.hpp"
9 #include "SuiteSparseGPU_Runtime.hpp"
10 #include <complex>
11 
main(int argc,char ** argv)12 int main (int argc, char **argv)
13 {
14     cholmod_sparse *A ;
15     cholmod_dense *X, *B, *r, *atr ;
16     double anorm, xnorm, rnorm, one [2] = {1,0}, minusone [2] = {-1,0}, t ;
17     double zero [2] = {0,0}, atrnorm ;
18     int mtype ;
19     long m, n, rnk ;
20     size_t total_mem, available_mem ;
21 
22     printf ("\nqrdemo_gpu: Testing SPQR on the GPU:\n") ;
23 
24     // start CHOLMOD
25     cholmod_common *cc, Common ;
26     cc = &Common ;
27     cholmod_l_start (cc) ;
28 
29     // warmup the GPU.  This can take some time, but only needs
30     // to be done once
31     cc->useGPU = true ;
32     t = SuiteSparse_time ( ) ;
33     cholmod_l_gpu_memorysize (&total_mem, &available_mem, cc) ;
34     cc->gpuMemorySize = available_mem ;
35     t = SuiteSparse_time ( ) - t ;
36     if (cc->gpuMemorySize <= 1)
37     {
38         printf ("no GPU available\n") ;
39     }
40     printf ("available GPU memory: %g MB, warmup time: %g\n",
41         (double) (cc->gpuMemorySize) / (1024 * 1024), t) ;
42 
43     // A = mread (stdin) ; read in the sparse matrix A
44     const char *filename = (argc < 2 ? "Problems/2.mtx" : argv[1]);
45     FILE *file = fopen(filename, "r");
46     A = (cholmod_sparse *) cholmod_l_read_matrix (file, 1, &mtype, cc) ;
47     fclose(file);
48     if (mtype != CHOLMOD_SPARSE)
49     {
50         printf ("input matrix must be sparse\n") ;
51         exit (1) ;
52     }
53 
54     // [m n] = size (A) ;
55     m = A->nrow ;
56     n = A->ncol ;
57 
58     long ordering = (argc < 3 ? SPQR_ORDERING_DEFAULT : atoi(argv[2]));
59 
60 #if 1
61     printf ("Matrix %6ld-by-%-6ld nnz: %6ld\n",
62         m, n, cholmod_l_nnz (A, cc)) ;
63 #endif
64 
65     // anorm = norm (A,1) ;
66     anorm = cholmod_l_norm_sparse (A, 1, cc) ;
67 
68     // B = ones (m,1), a dense right-hand-side of the same type as A
69     B = cholmod_l_ones (m, 1, A->xtype, cc) ;
70 
71     // X = A\B ; with default ordering and default column 2-norm tolerance
72     if (A->xtype == CHOLMOD_REAL)
73     {
74         // A, X, and B are all real
75         X = SuiteSparseQR <double>(ordering, SPQR_NO_TOL, A, B, cc) ;
76     }
77     else
78     {
79 #if SUPPORTS_COMPLEX
80         // A, X, and B are all complex
81         X = SuiteSparseQR < std::complex<double> >
82             (SPQR_ORDERING_DEFAULT, SPQR_NO_TOL, A, B, cc) ;
83 #else
84         printf("Code doesn't support std::complex<?> types.\n");
85 #endif
86     }
87 
88     // get the rank(A) estimate
89     rnk = cc->SPQR_istat [4] ;
90 
91     // compute the residual r, and A'*r, and their norms
92     r = cholmod_l_copy_dense (B, cc) ;                  // r = B
93     cholmod_l_sdmult (A, 0, one, minusone, X, r, cc) ;  // r = A*X-r = A*x-b
94     rnorm = cholmod_l_norm_dense (r, 2, cc) ;           // rnorm = norm (r)
95     atr = cholmod_l_zeros (n, 1, CHOLMOD_REAL, cc) ;    // atr = zeros (n,1)
96     cholmod_l_sdmult (A, 1, one, zero, r, atr, cc) ;    // atr = A'*r
97     atrnorm = cholmod_l_norm_dense (atr, 2, cc) ;       // atrnorm = norm (atr)
98 
99     // xnorm = norm (X)
100     xnorm = cholmod_l_norm_dense (X, 2, cc) ;
101 
102     // write out X to a file
103     FILE *f = fopen ("X.mtx", "w") ;
104     cholmod_l_write_dense (f, X, NULL, cc) ;
105     fclose (f) ;
106 
107     if (m <= n && anorm > 0 && xnorm > 0)
108     {
109         // find the relative residual, except for least-squares systems
110         rnorm /= (anorm * xnorm) ;
111     }
112     printf ("\nnorm(Ax-b): %8.1e\n", rnorm) ;
113     printf ("norm(A'(Ax-b))         %8.1e rank: %ld of %ld\n",
114         atrnorm, rnk, (m < n) ? m:n) ;
115 
116     /* Write an info file. */
117     FILE *info = fopen("gpu_results.txt", "w");
118     fprintf(info, "%ld\n", cc->SPQR_istat[7]);        // ordering method
119     fprintf(info, "%ld\n", cc->memory_usage);         // memory usage (bytes)
120     fprintf(info, "%30.16e\n", cc->SPQR_flopcount);   // flop count
121     fprintf(info, "%lf\n", cc->SPQR_analyze_time);    // analyze time
122     fprintf(info, "%lf\n", cc->SPQR_factorize_time);  // factorize time
123     fprintf(info, "-1\n") ;                           // cpu memory (bytes)
124     fprintf(info, "-1\n") ;                           // gpu memory (bytes)
125     fprintf(info, "%32.16e\n", rnorm);                // residual
126     fprintf(info, "%ld\n", cholmod_l_nnz (A, cc));    // nnz(A)
127     fprintf(info, "%ld\n", cc->SPQR_istat [0]);       // nnz(R)
128     fprintf(info, "%ld\n", cc->SPQR_istat [2]);       // # of frontal matrices
129     fprintf(info, "%ld\n", cc->SPQR_istat [3]);       // ntasks, for now
130     fprintf(info, "%lf\n", cc->gpuKernelTime);        // kernel time (ms)
131     fprintf(info, "%ld\n", cc->gpuFlops);             // "actual" gpu flops
132     fprintf(info, "%d\n", cc->gpuNumKernelLaunches);  // # of kernel launches
133     fprintf(info, "%32.16e\n", atrnorm) ;             // norm (A'*(Ax-b))
134 
135     fclose(info);
136 
137     // free everything
138     cholmod_l_free_dense (&r, cc) ;
139     cholmod_l_free_dense (&atr, cc) ;
140     cholmod_l_free_sparse (&A, cc) ;
141     cholmod_l_free_dense (&X, cc) ;
142     cholmod_l_free_dense (&B, cc) ;
143     cholmod_l_finish (cc) ;
144 
145     return (0) ;
146 }
147