1 // =============================================================================
2 // === qrdemo_gpu.cpp ==========================================================
3 // =============================================================================
4
5 // A simple C++ demo of SuiteSparseQR. The comments give the MATLAB equivalent
6 // statements. See also qrdemo.m
7
8 #include "SuiteSparseQR.hpp"
9 #include "SuiteSparseGPU_Runtime.hpp"
10 #include <complex>
11
main(int argc,char ** argv)12 int main (int argc, char **argv)
13 {
14 cholmod_sparse *A ;
15 cholmod_dense *X, *B, *r, *atr ;
16 double anorm, xnorm, rnorm, one [2] = {1,0}, minusone [2] = {-1,0}, t ;
17 double zero [2] = {0,0}, atrnorm ;
18 int mtype ;
19 long m, n, rnk ;
20 size_t total_mem, available_mem ;
21
22 printf ("\nqrdemo_gpu: Testing SPQR on the GPU:\n") ;
23
24 // start CHOLMOD
25 cholmod_common *cc, Common ;
26 cc = &Common ;
27 cholmod_l_start (cc) ;
28
29 // warmup the GPU. This can take some time, but only needs
30 // to be done once
31 cc->useGPU = true ;
32 t = SuiteSparse_time ( ) ;
33 cholmod_l_gpu_memorysize (&total_mem, &available_mem, cc) ;
34 cc->gpuMemorySize = available_mem ;
35 t = SuiteSparse_time ( ) - t ;
36 if (cc->gpuMemorySize <= 1)
37 {
38 printf ("no GPU available\n") ;
39 }
40 printf ("available GPU memory: %g MB, warmup time: %g\n",
41 (double) (cc->gpuMemorySize) / (1024 * 1024), t) ;
42
43 // A = mread (stdin) ; read in the sparse matrix A
44 const char *filename = (argc < 2 ? "Problems/2.mtx" : argv[1]);
45 FILE *file = fopen(filename, "r");
46 A = (cholmod_sparse *) cholmod_l_read_matrix (file, 1, &mtype, cc) ;
47 fclose(file);
48 if (mtype != CHOLMOD_SPARSE)
49 {
50 printf ("input matrix must be sparse\n") ;
51 exit (1) ;
52 }
53
54 // [m n] = size (A) ;
55 m = A->nrow ;
56 n = A->ncol ;
57
58 long ordering = (argc < 3 ? SPQR_ORDERING_DEFAULT : atoi(argv[2]));
59
60 #if 1
61 printf ("Matrix %6ld-by-%-6ld nnz: %6ld\n",
62 m, n, cholmod_l_nnz (A, cc)) ;
63 #endif
64
65 // anorm = norm (A,1) ;
66 anorm = cholmod_l_norm_sparse (A, 1, cc) ;
67
68 // B = ones (m,1), a dense right-hand-side of the same type as A
69 B = cholmod_l_ones (m, 1, A->xtype, cc) ;
70
71 // X = A\B ; with default ordering and default column 2-norm tolerance
72 if (A->xtype == CHOLMOD_REAL)
73 {
74 // A, X, and B are all real
75 X = SuiteSparseQR <double>(ordering, SPQR_NO_TOL, A, B, cc) ;
76 }
77 else
78 {
79 #if SUPPORTS_COMPLEX
80 // A, X, and B are all complex
81 X = SuiteSparseQR < std::complex<double> >
82 (SPQR_ORDERING_DEFAULT, SPQR_NO_TOL, A, B, cc) ;
83 #else
84 printf("Code doesn't support std::complex<?> types.\n");
85 #endif
86 }
87
88 // get the rank(A) estimate
89 rnk = cc->SPQR_istat [4] ;
90
91 // compute the residual r, and A'*r, and their norms
92 r = cholmod_l_copy_dense (B, cc) ; // r = B
93 cholmod_l_sdmult (A, 0, one, minusone, X, r, cc) ; // r = A*X-r = A*x-b
94 rnorm = cholmod_l_norm_dense (r, 2, cc) ; // rnorm = norm (r)
95 atr = cholmod_l_zeros (n, 1, CHOLMOD_REAL, cc) ; // atr = zeros (n,1)
96 cholmod_l_sdmult (A, 1, one, zero, r, atr, cc) ; // atr = A'*r
97 atrnorm = cholmod_l_norm_dense (atr, 2, cc) ; // atrnorm = norm (atr)
98
99 // xnorm = norm (X)
100 xnorm = cholmod_l_norm_dense (X, 2, cc) ;
101
102 // write out X to a file
103 FILE *f = fopen ("X.mtx", "w") ;
104 cholmod_l_write_dense (f, X, NULL, cc) ;
105 fclose (f) ;
106
107 if (m <= n && anorm > 0 && xnorm > 0)
108 {
109 // find the relative residual, except for least-squares systems
110 rnorm /= (anorm * xnorm) ;
111 }
112 printf ("\nnorm(Ax-b): %8.1e\n", rnorm) ;
113 printf ("norm(A'(Ax-b)) %8.1e rank: %ld of %ld\n",
114 atrnorm, rnk, (m < n) ? m:n) ;
115
116 /* Write an info file. */
117 FILE *info = fopen("gpu_results.txt", "w");
118 fprintf(info, "%ld\n", cc->SPQR_istat[7]); // ordering method
119 fprintf(info, "%ld\n", cc->memory_usage); // memory usage (bytes)
120 fprintf(info, "%30.16e\n", cc->SPQR_flopcount); // flop count
121 fprintf(info, "%lf\n", cc->SPQR_analyze_time); // analyze time
122 fprintf(info, "%lf\n", cc->SPQR_factorize_time); // factorize time
123 fprintf(info, "-1\n") ; // cpu memory (bytes)
124 fprintf(info, "-1\n") ; // gpu memory (bytes)
125 fprintf(info, "%32.16e\n", rnorm); // residual
126 fprintf(info, "%ld\n", cholmod_l_nnz (A, cc)); // nnz(A)
127 fprintf(info, "%ld\n", cc->SPQR_istat [0]); // nnz(R)
128 fprintf(info, "%ld\n", cc->SPQR_istat [2]); // # of frontal matrices
129 fprintf(info, "%ld\n", cc->SPQR_istat [3]); // ntasks, for now
130 fprintf(info, "%lf\n", cc->gpuKernelTime); // kernel time (ms)
131 fprintf(info, "%ld\n", cc->gpuFlops); // "actual" gpu flops
132 fprintf(info, "%d\n", cc->gpuNumKernelLaunches); // # of kernel launches
133 fprintf(info, "%32.16e\n", atrnorm) ; // norm (A'*(Ax-b))
134
135 fclose(info);
136
137 // free everything
138 cholmod_l_free_dense (&r, cc) ;
139 cholmod_l_free_dense (&atr, cc) ;
140 cholmod_l_free_sparse (&A, cc) ;
141 cholmod_l_free_dense (&X, cc) ;
142 cholmod_l_free_dense (&B, cc) ;
143 cholmod_l_finish (cc) ;
144
145 return (0) ;
146 }
147