1 /* =========================================================================
2 Copyright (c) 2010-2014, Institute for Microelectronics,
3 Institute for Analysis and Scientific Computing,
4 TU Wien.
5 Portions of this software are copyright by UChicago Argonne, LLC.
6
7 -----------------
8 ViennaCL - The Vienna Computing Library
9 -----------------
10
11 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12
13 (A list of authors and contributors can be found in the PDF manual)
14
15 License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17
18 // include necessary system headers
19 #include <iostream>
20
21 #include "viennacl.hpp"
22 #include "viennacl_private.hpp"
23
24 //include basic scalar and vector types of ViennaCL
25 #include "viennacl/scalar.hpp"
26 #include "viennacl/vector.hpp"
27
28 #include "viennacl/vector.hpp"
29 #include "viennacl/matrix.hpp"
30 #include "viennacl/linalg/direct_solve.hpp"
31 #include "viennacl/linalg/prod.hpp"
32
33
34 #ifdef VIENNACL_WITH_CUDA
35
36 // xGEMV
37
ViennaCLCUDASgemv(ViennaCLBackend,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLInt m,ViennaCLInt n,float alpha,float * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,float * x,ViennaCLInt offx,ViennaCLInt incx,float beta,float * y,ViennaCLInt offy,ViennaCLInt incy)38 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASgemv(ViennaCLBackend /*backend*/,
39 ViennaCLOrder order, ViennaCLTranspose transA,
40 ViennaCLInt m, ViennaCLInt n, float alpha, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
41 float *x, ViennaCLInt offx, ViennaCLInt incx,
42 float beta,
43 float *y, ViennaCLInt offy, ViennaCLInt incy)
44 {
45 viennacl::vector_base<float> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
46 viennacl::vector_base<float> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
47 viennacl::matrix_base<float> mat(A, viennacl::CUDA_MEMORY,
48 m, offA_row, incA_row, m,
49 n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
50 v2 *= beta;
51 if (transA == ViennaCLTrans)
52 v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1);
53 else
54 v2 += alpha * viennacl::linalg::prod(mat, v1);
55
56 return ViennaCLSuccess;
57 }
58
ViennaCLCUDADgemv(ViennaCLBackend,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLInt m,ViennaCLInt n,double alpha,double * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,double * x,ViennaCLInt offx,ViennaCLInt incx,double beta,double * y,ViennaCLInt offy,ViennaCLInt incy)59 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADgemv(ViennaCLBackend /*backend*/,
60 ViennaCLOrder order, ViennaCLTranspose transA,
61 ViennaCLInt m, ViennaCLInt n, double alpha, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
62 double *x, ViennaCLInt offx, ViennaCLInt incx,
63 double beta,
64 double *y, ViennaCLInt offy, ViennaCLInt incy)
65 {
66 viennacl::vector_base<double> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
67 viennacl::vector_base<double> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
68 viennacl::matrix_base<double> mat(A, viennacl::CUDA_MEMORY,
69 m, offA_row, incA_row, m,
70 n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
71 v2 *= beta;
72 if (transA == ViennaCLTrans)
73 v2 += alpha * viennacl::linalg::prod(viennacl::trans(mat), v1);
74 else
75 v2 += alpha * viennacl::linalg::prod(mat, v1);
76
77 return ViennaCLSuccess;
78 }
79
80
81
82 // xTRSV
83
ViennaCLCUDAStrsv(ViennaCLBackend,ViennaCLUplo uplo,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLDiag diag,ViennaCLInt n,float * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,float * x,ViennaCLInt offx,ViennaCLInt incx)84 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDAStrsv(ViennaCLBackend /*backend*/,
85 ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag,
86 ViennaCLInt n, float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
87 float *x, ViennaCLInt offx, ViennaCLInt incx)
88 {
89 viennacl::vector_base<float> v(x, viennacl::CUDA_MEMORY, n, offx, incx);
90 viennacl::matrix_base<float> mat(A, viennacl::CUDA_MEMORY,
91 n, offA_row, incA_row, n,
92 n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
93 if (transA == ViennaCLTrans)
94 {
95 if (uplo == ViennaCLUpper)
96 if (diag == ViennaCLUnit)
97 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag());
98 else
99 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag());
100 else
101 if (diag == ViennaCLUnit)
102 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag());
103 else
104 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag());
105 }
106 else
107 {
108 if (uplo == ViennaCLUpper)
109 if (diag == ViennaCLUnit)
110 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag());
111 else
112 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag());
113 else
114 if (diag == ViennaCLUnit)
115 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag());
116 else
117 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag());
118 }
119
120 return ViennaCLSuccess;
121 }
122
ViennaCLCUDADtrsv(ViennaCLBackend,ViennaCLUplo uplo,ViennaCLOrder order,ViennaCLTranspose transA,ViennaCLDiag diag,ViennaCLInt n,double * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda,double * x,ViennaCLInt offx,ViennaCLInt incx)123 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADtrsv(ViennaCLBackend /*backend*/,
124 ViennaCLUplo uplo, ViennaCLOrder order, ViennaCLTranspose transA, ViennaCLDiag diag,
125 ViennaCLInt n, double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda,
126 double *x, ViennaCLInt offx, ViennaCLInt incx)
127 {
128 viennacl::vector_base<double> v(x, viennacl::CUDA_MEMORY, n, offx, incx);
129 viennacl::matrix_base<double> mat(A, viennacl::CUDA_MEMORY,
130 n, offA_row, incA_row, n,
131 n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
132 if (transA == ViennaCLTrans)
133 {
134 if (uplo == ViennaCLUpper)
135 if (diag == ViennaCLUnit)
136 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_upper_tag());
137 else
138 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::upper_tag());
139 else
140 if (diag == ViennaCLUnit)
141 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::unit_lower_tag());
142 else
143 viennacl::linalg::inplace_solve(viennacl::trans(mat), v, viennacl::linalg::lower_tag());
144 }
145 else
146 {
147 if (uplo == ViennaCLUpper)
148 if (diag == ViennaCLUnit)
149 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_upper_tag());
150 else
151 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::upper_tag());
152 else
153 if (diag == ViennaCLUnit)
154 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::unit_lower_tag());
155 else
156 viennacl::linalg::inplace_solve(mat, v, viennacl::linalg::lower_tag());
157 }
158
159 return ViennaCLSuccess;
160 }
161
162
163
164 // xGER
165
ViennaCLCUDASger(ViennaCLBackend,ViennaCLOrder order,ViennaCLInt m,ViennaCLInt n,float alpha,float * x,ViennaCLInt offx,ViennaCLInt incx,float * y,ViennaCLInt offy,ViennaCLInt incy,float * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda)166 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDASger(ViennaCLBackend /*backend*/,
167 ViennaCLOrder order,
168 ViennaCLInt m, ViennaCLInt n,
169 float alpha,
170 float *x, ViennaCLInt offx, ViennaCLInt incx,
171 float *y, ViennaCLInt offy, ViennaCLInt incy,
172 float *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda)
173 {
174 viennacl::vector_base<float> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
175 viennacl::vector_base<float> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
176 viennacl::matrix_base<float> mat(A, viennacl::CUDA_MEMORY,
177 m, offA_row, incA_row, m,
178 n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
179
180 mat += alpha * viennacl::linalg::outer_prod(v1, v2);
181
182 return ViennaCLSuccess;
183 }
184
ViennaCLCUDADger(ViennaCLBackend,ViennaCLOrder order,ViennaCLInt m,ViennaCLInt n,double alpha,double * x,ViennaCLInt offx,ViennaCLInt incx,double * y,ViennaCLInt offy,ViennaCLInt incy,double * A,ViennaCLInt offA_row,ViennaCLInt offA_col,ViennaCLInt incA_row,ViennaCLInt incA_col,ViennaCLInt lda)185 VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLCUDADger(ViennaCLBackend /*backend*/,
186 ViennaCLOrder order,
187 ViennaCLInt m, ViennaCLInt n,
188 double alpha,
189 double *x, ViennaCLInt offx, ViennaCLInt incx,
190 double *y, ViennaCLInt offy, ViennaCLInt incy,
191 double *A, ViennaCLInt offA_row, ViennaCLInt offA_col, ViennaCLInt incA_row, ViennaCLInt incA_col, ViennaCLInt lda)
192 {
193 viennacl::vector_base<double> v1(x, viennacl::CUDA_MEMORY, n, offx, incx);
194 viennacl::vector_base<double> v2(y, viennacl::CUDA_MEMORY, m, offy, incy);
195 viennacl::matrix_base<double> mat(A, viennacl::CUDA_MEMORY,
196 m, offA_row, incA_row, m,
197 n, offA_col, incA_col, lda, order == ViennaCLRowMajor);
198
199 mat += alpha * viennacl::linalg::outer_prod(v1, v2);
200
201 return ViennaCLSuccess;
202 }
203
204 #endif
205