1 #ifndef VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the PDF manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 /** @file viennacl/linalg/opencl/ilu_operations.hpp
22     @brief Implementations of specialized routines for the Chow-Patel parallel ILU preconditioner using OpenCL
23 */
24 
25 #include <cmath>
26 #include <algorithm>  //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/opencl/common.hpp"
32 #include "viennacl/linalg/opencl/kernels/ilu.hpp"
33 #include "viennacl/meta/predicate.hpp"
34 #include "viennacl/meta/enable_if.hpp"
35 #include "viennacl/traits/size.hpp"
36 #include "viennacl/traits/start.hpp"
37 #include "viennacl/traits/stride.hpp"
38 #include "viennacl/linalg/vector_operations.hpp"
39 
40 
41 namespace viennacl
42 {
43 namespace linalg
44 {
45 namespace opencl
46 {
47 
48 /////////////////////// ICC /////////////////////
49 
50 template<typename NumericT>
extract_L(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L)51 void extract_L(compressed_matrix<NumericT> const & A,
52                 compressed_matrix<NumericT>       & L)
53 {
54   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
55   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
56 
57   //
58   // Step 1: Count elements in L:
59   //
60   viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_1");
61 
62   viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
63                             L.handle1().opencl_handle())
64                         );
65 
66   //
67   // Step 2: Exclusive scan on row_buffers:
68   //
69   viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
70   viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
71   L.reserve(wrapped_L_row_buffer[L.size1()], false);
72 
73 
74   //
75   // Step 3: Write entries
76   //
77   viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_2");
78 
79   viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
80                             L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle())
81                         );
82 
83   L.generate_row_block_information();
84 
85 } // extract_LU
86 
87 ///////////////////////////////////////////////
88 
89 
90 
91 /** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */
92 template<typename NumericT>
icc_scale(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L)93 void icc_scale(compressed_matrix<NumericT> const & A,
94                compressed_matrix<NumericT>       & L)
95 {
96   viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));
97 
98   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
99   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
100 
101   // fill D:
102   viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
103   viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
104 
105   // scale L:
106   viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
107   viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
108 
109 }
110 
111 /////////////////////////////////////
112 
113 
114 /** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */
115 template<typename NumericT>
icc_chow_patel_sweep(compressed_matrix<NumericT> & L,vector<NumericT> const & aij_L)116 void icc_chow_patel_sweep(compressed_matrix<NumericT>       & L,
117                           vector<NumericT>            const & aij_L)
118 {
119   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
120   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
121 
122   viennacl::backend::mem_handle L_backup;
123   viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L));
124   viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
125 
126   viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "icc_chow_patel_sweep_kernel");
127   viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
128                            aij_L)
129                         );
130 
131 }
132 
133 
134 /////////////////////// ILU /////////////////////
135 
136 template<typename NumericT>
extract_LU(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L,compressed_matrix<NumericT> & U)137 void extract_LU(compressed_matrix<NumericT> const & A,
138                 compressed_matrix<NumericT>       & L,
139                 compressed_matrix<NumericT>       & U)
140 {
141   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
142   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
143 
144   //
145   // Step 1: Count elements in L and U:
146   //
147   viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_1");
148 
149   viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()),
150                             L.handle1().opencl_handle(),
151                             U.handle1().opencl_handle())
152                         );
153 
154   //
155   // Step 2: Exclusive scan on row_buffers:
156   //
157   viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1);
158   viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer);
159   L.reserve(wrapped_L_row_buffer[L.size1()], false);
160 
161   viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), A.size1() + 1, 0, 1);
162   viennacl::linalg::exclusive_scan(wrapped_U_row_buffer, wrapped_U_row_buffer);
163   U.reserve(wrapped_U_row_buffer[U.size1()], false);
164 
165   //
166   // Step 3: Write entries
167   //
168   viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_2");
169 
170   viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()),
171                             L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
172                             U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle())
173                         );
174 
175   L.generate_row_block_information();
176   // Note: block information for U will be generated after transposition
177 
178 } // extract_LU
179 
180 ///////////////////////////////////////////////
181 
182 
183 
184 /** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */
185 template<typename NumericT>
ilu_scale(compressed_matrix<NumericT> const & A,compressed_matrix<NumericT> & L,compressed_matrix<NumericT> & U)186 void ilu_scale(compressed_matrix<NumericT> const & A,
187                compressed_matrix<NumericT>       & L,
188                compressed_matrix<NumericT>       & U)
189 {
190   viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A));
191 
192   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
193   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
194 
195   // fill D:
196   viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1");
197   viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) );
198 
199   // scale L:
200   viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2");
201   viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) );
202 
203   // scale U:
204   viennacl::ocl::enqueue(k2(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), cl_uint(A.size1()), D) );
205 
206 }
207 
208 /////////////////////////////////////
209 
210 
211 /** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */
212 template<typename NumericT>
ilu_chow_patel_sweep(compressed_matrix<NumericT> & L,vector<NumericT> const & aij_L,compressed_matrix<NumericT> & U_trans,vector<NumericT> const & aij_U_trans)213 void ilu_chow_patel_sweep(compressed_matrix<NumericT>       & L,
214                           vector<NumericT>            const & aij_L,
215                           compressed_matrix<NumericT>       & U_trans,
216                           vector<NumericT>            const & aij_U_trans)
217 {
218   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
219   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
220 
221   viennacl::backend::mem_handle L_backup;
222   viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L));
223   viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size());
224 
225   viennacl::backend::mem_handle U_backup;
226   viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans));
227   viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size());
228 
229   viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_chow_patel_sweep_kernel");
230   viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()),
231                            aij_L,
232                            U_trans.handle1().opencl_handle(), U_trans.handle2().opencl_handle(), U_trans.handle().opencl_handle(), U_backup.opencl_handle(),
233                            aij_U_trans)
234                         );
235 
236 }
237 
238 //////////////////////////////////////
239 
240 
241 
242 template<typename NumericT>
ilu_form_neumann_matrix(compressed_matrix<NumericT> & R,vector<NumericT> & diag_R)243 void ilu_form_neumann_matrix(compressed_matrix<NumericT> & R,
244                              vector<NumericT> & diag_R)
245 {
246   viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(R).context());
247   viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx);
248 
249   viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_form_neumann_matrix_kernel");
250   viennacl::ocl::enqueue(k(R.handle1().opencl_handle(), R.handle2().opencl_handle(), R.handle().opencl_handle(), cl_uint(R.size1()),
251                            diag_R)
252                         );
253 }
254 
255 } //namespace opencl
256 } //namespace linalg
257 } //namespace viennacl
258 
259 
260 #endif
261