1 #ifndef VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
3
4 /* =========================================================================
5 Copyright (c) 2010-2016, Institute for Microelectronics,
6 Institute for Analysis and Scientific Computing,
7 TU Wien.
8 Portions of this software are copyright by UChicago Argonne, LLC.
9
10 -----------------
11 ViennaCL - The Vienna Computing Library
12 -----------------
13
14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16 (A list of authors and contributors can be found in the PDF manual)
17
18 License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20
21 /** @file viennacl/linalg/opencl/ilu_operations.hpp
22 @brief Implementations of specialized routines for the Chow-Patel parallel ILU preconditioner using OpenCL
23 */
24
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/opencl/common.hpp"
32 #include "viennacl/linalg/opencl/kernels/ilu.hpp"
33 #include "viennacl/meta/predicate.hpp"
34 #include "viennacl/meta/enable_if.hpp"
35 #include "viennacl/traits/size.hpp"
36 #include "viennacl/traits/start.hpp"
37 #include "viennacl/traits/stride.hpp"
38 #include "viennacl/linalg/vector_operations.hpp"
39
40
41 namespace viennacl
42 {
43 namespace linalg
44 {
45 namespace opencl
46 {
47
48 /////////////////////// ICC /////////////////////
49
50 template<typename NumericT>
extract_L(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L)51 void extract_L(compressed_matrix<NumericT> const & A,
52 compressed_matrix<NumericT> & L)
53 {
54 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
55 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
56
57 //
58 // Step 1: Count elements in L:
59 //
60 viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_1");
61
62 viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
63 L.handle1().opencl_handle())
64 );
65
66 //
67 // Step 2: Exclusive scan on row_buffers:
68 //
69 viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
70 viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
71 L.reserve(wrapped_L_row_buffer[L.size1()], false);
72
73
74 //
75 // Step 3: Write entries
76 //
77 viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_2");
78
79 viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
80 L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle())
81 );
82
83 L.generate_row_block_information();
84
85 } // extract_LU
86
87 ///////////////////////////////////////////////
88
89
90
91 /** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */
92 template<typename NumericT>
icc_scale(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L)93 void icc_scale(compressed_matrix<NumericT> const & A,
94 compressed_matrix<NumericT> & L)
95 {
96 viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));
97
98 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
99 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
100
101 // fill D:
102 viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
103 viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
104
105 // scale L:
106 viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
107 viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
108
109 }
110
111 /////////////////////////////////////
112
113
114 /** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */
115 template<typename NumericT>
icc_chow_patel_sweep(compressed_matrix<NumericT> & L,vector<NumericT> const & aij_L)116 void icc_chow_patel_sweep(compressed_matrix<NumericT> & L,
117 vector<NumericT> const & aij_L)
118 {
119 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
120 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
121
122 viennacl::backend::mem_handle L_backup;
123 viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L));
124 viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
125
126 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "icc_chow_patel_sweep_kernel");
127 viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
128 aij_L)
129 );
130
131 }
132
133
134 /////////////////////// ILU /////////////////////
135
136 template<typename NumericT>
extract_LU(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L,compressed_matrix<NumericT> & U)137 void extract_LU(compressed_matrix<NumericT> const & A,
138 compressed_matrix<NumericT> & L,
139 compressed_matrix<NumericT> & U)
140 {
141 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
142 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
143
144 //
145 // Step 1: Count elements in L and U:
146 //
147 viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_1");
148
149 viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
150 L.handle1().opencl_handle(),
151 U.handle1().opencl_handle())
152 );
153
154 //
155 // Step 2: Exclusive scan on row_buffers:
156 //
157 viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
158 viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
159 L.reserve(wrapped_L_row_buffer[L.size1()], false);
160
161 viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), A.size1() + 1, 0, 1);
162 viennacl::linalg::exclusive_scan(wrapped_U_row_buffer, wrapped_U_row_buffer);
163 U.reserve(wrapped_U_row_buffer[U.size1()], false);
164
165 //
166 // Step 3: Write entries
167 //
168 viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_2");
169
170 viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
171 L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
172 U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle())
173 );
174
175 L.generate_row_block_information();
176 // Note: block information for U will be generated after transposition
177
178 } // extract_LU
179
180 ///////////////////////////////////////////////
181
182
183
184 /** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */
185 template<typename NumericT>
ilu_scale(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L,compressed_matrix<NumericT> & U)186 void ilu_scale(compressed_matrix<NumericT> const & A,
187 compressed_matrix<NumericT> & L,
188 compressed_matrix<NumericT> & U)
189 {
190 viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));
191
192 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
193 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
194
195 // fill D:
196 viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
197 viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
198
199 // scale L:
200 viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
201 viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
202
203 // scale U:
204 viennacl::ocl::enqueue(k2(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), cl_uint(A.size1()), D) );
205
206 }
207
208 /////////////////////////////////////
209
210
211 /** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */
212 template<typename NumericT>
ilu_chow_patel_sweep(compressed_matrix<NumericT> & L,vector<NumericT> const & aij_L,compressed_matrix<NumericT> & U_trans,vector<NumericT> const & aij_U_trans)213 void ilu_chow_patel_sweep(compressed_matrix<NumericT> & L,
214 vector<NumericT> const & aij_L,
215 compressed_matrix<NumericT> & U_trans,
216 vector<NumericT> const & aij_U_trans)
217 {
218 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
219 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
220
221 viennacl::backend::mem_handle L_backup;
222 viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L));
223 viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
224
225 viennacl::backend::mem_handle U_backup;
226 viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans));
227 viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size());
228
229 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_chow_patel_sweep_kernel");
230 viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
231 aij_L,
232 U_trans.handle1().opencl_handle(), U_trans.handle2().opencl_handle(), U_trans.handle().opencl_handle(), U_backup.opencl_handle(),
233 aij_U_trans)
234 );
235
236 }
237
238 //////////////////////////////////////
239
240
241
242 template<typename NumericT>
ilu_form_neumann_matrix(compressed_matrix<NumericT> & R,vector<NumericT> & diag_R)243 void ilu_form_neumann_matrix(compressed_matrix<NumericT> & R,
244 vector<NumericT> & diag_R)
245 {
246 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(R).context());
247 viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
248
249 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_form_neumann_matrix_kernel");
250 viennacl::ocl::enqueue(k(R.handle1().opencl_handle(), R.handle2().opencl_handle(), R.handle().opencl_handle(), cl_uint(R.size1()),
251 diag_R)
252 );
253 }
254
255 } //namespace opencl
256 } //namespace linalg
257 } //namespace viennacl
258
259
260 #endif
261