1 /* =========================================================================
2    Copyright (c) 2010-2014, Institute for Microelectronics,
3                             Institute for Analysis and Scientific Computing,
4                             TU Wien.
5    Portions of this software are copyright by UChicago Argonne, LLC.
6 
7                             -----------------
8                   ViennaCL - The Vienna Computing Library
9                             -----------------
10 
11    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
12 
13    (A list of authors and contributors can be found in the PDF manual)
14 
15    License:         MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 // include necessary system headers
19 #include <iostream>
20 
21 #include "viennacl.hpp"
22 #include "viennacl_private.hpp"
23 
24 //include basic scalar and vector types of ViennaCL
25 #include "viennacl/scalar.hpp"
26 #include "viennacl/vector.hpp"
27 
28 #include "viennacl/vector.hpp"
29 #include "viennacl/matrix.hpp"
30 #include "viennacl/linalg/direct_solve.hpp"
31 #include "viennacl/linalg/prod.hpp"
32 
33 
34 #ifdef VIENNACL_WITH_CUDA
35 
36 // xGEMV
37 
ViennaCLCUDASgemv(ViennaCLBackend,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLInt m,ViennaCLInt n,float alpha,float * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,float * x,ViennaCLInt offx,ViennaCLInt incx,float beta,float * y,ViennaCLInt offy,ViennaCLInt incy)38 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemv(ViennaCLBackend /*backend*/,
39                                                             ViennaCLOrder order, ViennaCLTranspose transA,
40                                                             ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
41                                                             float *x, ViennaCLInt offx, ViennaCLInt incx,
42                                                             float beta,
43                                                             float *y, ViennaCLInt offy, ViennaCLInt incy)
44 {
45   viennacl::vector_base<float> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
46   viennacl::vector_base<float> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
47   viennacl::matrix_base<float> mat(A, viennacl::CUDA_MEMORY,
48                                    m, offA_row, incA_row, m,
49                                    n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
50   v2 *= beta;
51   if (transA == ViennaCLTrans)
52     v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1);
53   else
54     v2 += alpha * viennacl::linalg::prod(mat, v1);
55 
56   return ViennaCLSuccess;
57 }
58 
ViennaCLCUDADgemv(ViennaCLBackend,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLInt m,ViennaCLInt n,double alpha,double * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,double * x,ViennaCLInt offx,ViennaCLInt incx,double beta,double * y,ViennaCLInt offy,ViennaCLInt incy)59 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemv(ViennaCLBackend /*backend*/,
60                                                             ViennaCLOrder order, ViennaCLTranspose transA,
61                                                             ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
62                                                             double *x, ViennaCLInt offx, ViennaCLInt incx,
63                                                             double beta,
64                                                             double *y, ViennaCLInt offy, ViennaCLInt incy)
65 {
66   viennacl::vector_base<double> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
67   viennacl::vector_base<double> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
68   viennacl::matrix_base<double> mat(A, viennacl::CUDA_MEMORY,
69                                     m, offA_row, incA_row, m,
70                                     n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
71   v2 *= beta;
72   if (transA == ViennaCLTrans)
73     v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1);
74   else
75     v2 += alpha * viennacl::linalg::prod(mat, v1);
76 
77   return ViennaCLSuccess;
78 }
79 
80 
81 
82 // xTRSV
83 
ViennaCLCUDAStrsv(ViennaCLBackend,ViennaCLUplo uplo,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLDiag diag,ViennaCLInt n,float * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,float * x,ViennaCLInt offx,ViennaCLInt incx)84 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAStrsv(ViennaCLBackend /*backend*/,
85                                                             ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag,
86                                                             ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
87                                                             float *x, ViennaCLInt offx, ViennaCLInt incx)
88 {
89   viennacl::vector_base<float> v(x, viennacl::CUDA_MEMORY, n, offx, incx);
90   viennacl::matrix_base<float> mat(A, viennacl::CUDA_MEMORY,
91                                    n, offA_row, incA_row, n,
92                                    n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
93   if (transA == ViennaCLTrans)
94   {
95     if (uplo == ViennaCLUpper)
96       if (diag == ViennaCLUnit)
97         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag());
98       else
99         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag());
100     else
101       if (diag == ViennaCLUnit)
102         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag());
103       else
104         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag());
105   }
106   else
107   {
108     if (uplo == ViennaCLUpper)
109       if (diag == ViennaCLUnit)
110         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag());
111       else
112         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag());
113     else
114       if (diag == ViennaCLUnit)
115         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag());
116       else
117         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag());
118   }
119 
120   return ViennaCLSuccess;
121 }
122 
ViennaCLCUDADtrsv(ViennaCLBackend,ViennaCLUplo uplo,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLDiag diag,ViennaCLInt n,double * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,double * x,ViennaCLInt offx,ViennaCLInt incx)123 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADtrsv(ViennaCLBackend /*backend*/,
124                                                             ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag,
125                                                             ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
126                                                             double *x, ViennaCLInt offx, ViennaCLInt incx)
127 {
128   viennacl::vector_base<double> v(x, viennacl::CUDA_MEMORY, n, offx, incx);
129   viennacl::matrix_base<double> mat(A, viennacl::CUDA_MEMORY,
130                                     n, offA_row, incA_row, n,
131                                     n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
132   if (transA == ViennaCLTrans)
133   {
134     if (uplo == ViennaCLUpper)
135       if (diag == ViennaCLUnit)
136         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag());
137       else
138         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag());
139     else
140       if (diag == ViennaCLUnit)
141         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag());
142       else
143         viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag());
144   }
145   else
146   {
147     if (uplo == ViennaCLUpper)
148       if (diag == ViennaCLUnit)
149         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag());
150       else
151         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag());
152     else
153       if (diag == ViennaCLUnit)
154         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag());
155       else
156         viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag());
157   }
158 
159   return ViennaCLSuccess;
160 }
161 
162 
163 
164 // xGER
165 
ViennaCLCUDASger(ViennaCLBackend,ViennaCLOrder order,ViennaCLInt m,ViennaCLInt n,float alpha,float * x,ViennaCLInt offx,ViennaCLInt incx,float * y,ViennaCLInt offy,ViennaCLInt incy,float * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda)166 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASger(ViennaCLBackend /*backend*/,
167                                                            ViennaCLOrder order,
168                                                            ViennaCLInt m, ViennaCLInt n,
169                                                            float alpha,
170                                                            float *x, ViennaCLInt offx, ViennaCLInt incx,
171                                                            float *y, ViennaCLInt offy, ViennaCLInt incy,
172                                                            float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda)
173 {
174   viennacl::vector_base<float> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
175   viennacl::vector_base<float> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
176   viennacl::matrix_base<float> mat(A, viennacl::CUDA_MEMORY,
177                                    m, offA_row, incA_row, m,
178                                    n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
179 
180   mat += alpha * viennacl::linalg::outer_prod(v1, v2);
181 
182   return ViennaCLSuccess;
183 }
184 
ViennaCLCUDADger(ViennaCLBackend,ViennaCLOrder order,ViennaCLInt m,ViennaCLInt n,double alpha,double * x,ViennaCLInt offx,ViennaCLInt incx,double * y,ViennaCLInt offy,ViennaCLInt incy,double * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda)185 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADger(ViennaCLBackend /*backend*/,
186                                                            ViennaCLOrder order,
187                                                            ViennaCLInt m,  ViennaCLInt n,
188                                                            double alpha,
189                                                            double *x, ViennaCLInt offx, ViennaCLInt incx,
190                                                            double *y, ViennaCLInt offy, ViennaCLInt incy,
191                                                            double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda)
192 {
193   viennacl::vector_base<double> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
194   viennacl::vector_base<double> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
195   viennacl::matrix_base<double> mat(A, viennacl::CUDA_MEMORY,
196                                     m, offA_row, incA_row, m,
197                                     n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
198 
199   mat += alpha * viennacl::linalg::outer_prod(v1, v2);
200 
201   return ViennaCLSuccess;
202 }
203 
204 #endif
205