1 /*! \file TridiagQueue.cpp
2     \brief task mangemanet of tridiagonal factorization algorithm
3     \author Atsushi Suzuki, Laboratoire Jacques-Louis Lions
4     \date   Jun. 20th 2014
5     \date   Jul. 12th 2015
6     \date   Nov. 30th 2016
7 */
8 
9 // This file is part of Dissection
10 //
11 // Dissection is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // Linking Dissection statically or dynamically with other modules is making
17 // a combined work based on Disssection. Thus, the terms and conditions of
18 // the GNU General Public License cover the whole combination.
19 //
20 // As a special exception, the copyright holders of Dissection give you
21 // permission to combine Dissection program with free software programs or
22 // libraries that are released under the GNU LGPL and with independent modules
23 // that communicate with Dissection solely through the Dissection-fortran
24 // interface. You may copy and distribute such a system following the terms of
25 // the GNU GPL for Dissection and the licenses of the other code concerned,
26 // provided that you include the source code of that other code when and as
27 // the GNU GPL requires distribution of source code and provided that you do
28 // not modify the Dissection-fortran interface.
29 //
30 // Note that people who make modified versions of Dissection are not obligated
31 // to grant this special exception for their modified versions; it is their
32 // choice whether to do so. The GNU General Public License gives permission to
33 // release a modified version without this exception; this exception also makes
34 // it possible to release a modified version which carries forward this
35 // exception. If you modify the Dissection-fortran interface, this exception
36 // does not apply to your modified version of Dissection, and you must remove
37 // this exception when you distribute your modified version.
38 //
39 // This exception is an additional permission under section 7 of the GNU
40 // General Public License, version 3 ("GPLv3")
41 //
42 // Dissection is distributed in the hope that it will be useful,
43 // but WITHOUT ANY WARRANTY; without even the implied warranty of
44 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
45 // GNU General Public License for more details.
46 //
47 // You should have received a copy of the GNU General Public License
48 // along with Dissection.  If not, see <http://www.gnu.org/licenses/>.
49 //
50 
51 #include <float.h>
52 #include "Driver/TridiagQueue.hpp"
53 #include "Algebra/VectorArray.hpp"
54 #include "Compiler/DissectionIO.hpp"
55 
56 template<typename T, typename U>
57 void TridiagQueue<T, U>::
generate_queue(TridiagBlockMatrix<T,U> * tridiag,const int dim,const int nnz,const bool isMapped,int * remap_eqn,int * ptUnsymRows,int * indUnsymCol,int * indVals,T * coef)58 generate_queue(TridiagBlockMatrix<T, U> *tridiag,
59 	       const int dim,
60 	       const int nnz,
61 	       const bool isMapped,
62 	       int *remap_eqn,
63 	       int *ptUnsymRows,
64 	       int *indUnsymCol,
65 	       int *indVals,
66 	       T *coef)
67 {
68   _dim = dim;
69   _nnz = nnz;
70   _tridiag =tridiag;
71   _isMapped = isMapped;
72   _remap_eqn = new int[_dim];
73   for (int i = 0; i < dim; i++) {
74     _remap_eqn[i] = remap_eqn[i];
75   }
76   _ptRows= new int[_dim + 1];
77   for (int i = 0; i < (dim + 1); i++) {
78     _ptRows[i] = ptUnsymRows[i];
79   }
80   _indCols = new int[nnz];
81   _indVals = new int[nnz];
82   for (int i = 0; i < nnz; i++) {
83     _indCols[i] = indUnsymCol[i];
84     _indVals[i] = indVals[i];
85   }
86   _coef = coef;
87 
88   if (_tridiag_solver == false) {
89     diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n",
90 	    __FILE__, __LINE__);
91   }
92   _allocated = true;
93 }
94 
95 template
96 void TridiagQueue<double>::
97 generate_queue(TridiagBlockMatrix<double> *tridiag,
98 	       const int dim,
99 	       const int nnz,
100 	       const bool isMapped,
101 	       int *remap_eqn,
102 	       int *ptUnsymRows,
103 	       int *indUnsymCol,
104 	       int *indVals,
105 	       double *coef);
106 
107 template
108 void TridiagQueue<quadruple>::
109 generate_queue(TridiagBlockMatrix<quadruple> *tridiag,
110 	       const int dim,
111 	       const int nnz,
112 	       const bool isMapped,
113 	       int *remap_eqn,
114 	       int *ptUnsymRows,
115 	       int *indUnsymCol,
116 	       int *indVals,
117 	       quadruple *coef);
118 
119 template
120 void TridiagQueue<complex<double>, double>::
121 generate_queue(TridiagBlockMatrix<complex<double>, double> *tridiag,
122 	       const int dim,
123 	       const int nnz,
124 	       const bool isMapped,
125 	       int *remap_eqn,
126 	       int *ptUnsymRows,
127 	       int *indUnsymCol,
128 	       int *indVals,
129 	       complex<double> *coef);
130 
131 template
132 void TridiagQueue<complex<quadruple>, quadruple>::
133 generate_queue(TridiagBlockMatrix<complex<quadruple>, quadruple> *tridiag,
134 	       const int dim,
135 	       const int nnz,
136 	       const bool isMapped,
137 	       int *remap_eqn,
138 	       int *ptUnsymRows,
139 	       int *indUnsymCol,
140 	       int *indVals,
141 	       complex<quadruple> *coef);
142 
143 template
144 void TridiagQueue<float>::
145 generate_queue(TridiagBlockMatrix<float> *tridiag,
146 	       const int dim,
147 	       const int nnz,
148 	       const bool isMapped,
149 	       int *remap_eqn,
150 	       int *ptUnsymRows,
151 	       int *indUnsymCol,
152 	       int *indVals,
153 	       float *coef);
154 
155 template
156 void TridiagQueue<complex<float>, float>::
157 generate_queue(TridiagBlockMatrix<complex<float>, float> *tridiag,
158 	       const int dim,
159 	       const int nnz,
160 	       const bool isMapped,
161 	       int *remap_eqn,
162 	       int *ptUnsymRows,
163 	       int *indUnsymCol,
164 	       int *indVals,
165 	       complex<float> *coef);
166 //
167 
168 template<typename T, typename U>
generate_queue_fwbw()169 void TridiagQueue<T, U>::generate_queue_fwbw() {} // dummy
170 
171 template
172 void TridiagQueue<double>::generate_queue_fwbw();
173 
174 template
175 void TridiagQueue<quadruple>::generate_queue_fwbw();
176 
177 template
178 void TridiagQueue<complex<double>, double>::generate_queue_fwbw();
179 
180 template
181 void TridiagQueue<complex<quadruple>, quadruple>::generate_queue_fwbw();
182 
183 template
184 void TridiagQueue<float>::generate_queue_fwbw();
185 
186 template
187 void TridiagQueue<complex<float>, float>::generate_queue_fwbw();
188 //
189 
190 template<typename T, typename U>
exec_symb_fact()191 void TridiagQueue<T, U>::exec_symb_fact()
192 {
193   if (_tridiag_solver == false) {
194     diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n",
195 	    __FILE__, __LINE__);
196   }
197   vector<int> color_mask(_dim, 1);
198   _tridiag->SymbolicFact(1, 1, &color_mask[0], _dim, // color = color_max = 1
199 			 _nnz, _ptRows, _indCols, _indVals);
200   color_mask.clear();
201 }
202 
203 template
204 void TridiagQueue<double>::exec_symb_fact();
205 
206 template
207 void TridiagQueue<quadruple>::exec_symb_fact();
208 
209 template
210 void TridiagQueue<complex<double>, double>::exec_symb_fact();
211 
212 template
213 void TridiagQueue<complex<quadruple>, quadruple>::exec_symb_fact();
214 
215 template
216 void TridiagQueue<float>::exec_symb_fact();
217 
218 template
219 void TridiagQueue<complex<float>, float>::exec_symb_fact();
220 //
221 
222 template<typename T, typename U>
exec_num_fact(const int called,const double eps_pivot,const bool kernel_detection,const int aug_dim,const U eps_machine,const bool higher_precision)223 void TridiagQueue<T, U>::exec_num_fact(const int called,
224 				       const double eps_pivot,
225 				       const bool kernel_detection,
226 				       const int aug_dim,
227 				       const U eps_machine,
228 				       const bool higher_precision)
229 {
230   double pivot;
231   vector<int> list_sing;
232   double nopd;
233   if (_tridiag_solver == false) {
234     diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n",
235 	    __FILE__, __LINE__);
236   }
237 
238   _tridiag->NumericFact(_coef,
239 			eps_pivot,
240 			&pivot,
241 			kernel_detection,
242 			higher_precision,
243 			aug_dim,
244 			eps_machine,
245 			&nopd);
246 }
247 
248 template
249 void TridiagQueue<double>::exec_num_fact(const int called,
250 					 const double eps_pivot,
251 					 const bool kernel_detection,
252 					 const int aug_dim,
253 					 const double eps_machine,
254 					 const bool higher_precision);
255 
256 template
257 void TridiagQueue<quadruple>::
258 exec_num_fact(const int called,
259 	      const double eps_pivot,
260 	      const bool kernel_detection,
261 	      const int aug_dim,
262 	      const quadruple eps_machine,
263 	      const bool higher_precision);
264 
265 template
266 void TridiagQueue<complex<double>, double>::
267 exec_num_fact(const int called,
268 	      const double eps_pivot,
269 	      const bool kernel_detection,
270 	      const int aug_dim,
271 	      const double eps_machine,
272 	      const bool higher_precision);
273 
274 template
275 void TridiagQueue<complex<quadruple>, quadruple>::
276 exec_num_fact(const int called,
277 	      const double eps_pivot,
278 	      const bool kernel_detection,
279 	      const int aug_dim,
280 	      const quadruple eps_machine,
281 	      const bool higher_precision);
282 
283 template
284 void TridiagQueue<float>::exec_num_fact(const int called,
285 					 const double eps_pivot,
286 					 const bool kernel_detection,
287 					 const int aug_dim,
288 					 const float eps_machine,
289 					 const bool higher_precision);
290 template
291 void TridiagQueue<complex<float>, float>::
292 exec_num_fact(const int called,
293 	      const double eps_pivot,
294 	      const bool kernel_detection,
295 	      const int aug_dim,
296 	      const float eps_machine,
297 	      const bool higher_precision);
298 //
299 
300 template<typename T, typename U>
exec_fwbw(T * x,const int nrhs,bool isTrans)301 void TridiagQueue<T, U>::exec_fwbw(T *x, const int nrhs, bool isTrans)
302 {
303   if (_tridiag_solver == false) {
304     diss_printf(_verbose, stderr, "%s %d : tridiga_solver is not defined\n",
305 	    __FILE__, __LINE__);
306   }
307 
308   const int nrow = _tridiag->nrow();
309   if (_isMapped) {
310     if (nrhs == 1) {
311       VectorArray<T> xx(nrow);
312       for (int i = 0; i < nrow; i++) {
313 	xx[i] = x[_remap_eqn[i]];
314       }
315       _tridiag->SolveSingle(true, isTrans, xx.addrCoefs());
316       for (int i = 0; i < nrow; i++) {
317 	x[_remap_eqn[i]] = xx[i];
318       }
319       xx.free();
320     }
321     else {
322       ColumnMatrix<T> xx(nrow, nrhs);
323       for (int n = 0; n < nrhs; n++) {
324 	for (int i = 0; i < nrow; i++) {
325 	  xx(i, n) = x[_remap_eqn[i] + n * nrow];
326 	}
327       }
328       _tridiag->SolveMulti(true, isTrans, nrhs, xx);
329       for (int n = 0; n < nrhs; n++) {
330 	for (int i = 0; i < nrow; i++) {
331 	  x[_remap_eqn[i] + n * nrow] = xx(i, n);
332 	}
333       }
334       xx.free();
335     }
336   }
337   else {
338     if (nrhs == 1) {
339       _tridiag->SolveSingle(true, isTrans, x);
340     }
341     else {
342       ColumnMatrix<T> xx(nrow, nrhs, x, false);
343       _tridiag->SolveMulti(true, isTrans, nrhs, xx);
344     }
345   }
346 }
347 
348 template
349 void TridiagQueue<double>::exec_fwbw(double *x, const int nrhs, bool isTrans);
350 
351 template
352 void TridiagQueue<quadruple>::exec_fwbw(quadruple *x, const int nrhs,
353 					bool isTrans);
354 
355 template
356 void TridiagQueue<complex<double>, double>::
357 exec_fwbw(complex<double> *x, const int nrhs, bool isTrans);
358 
359 template
360 void TridiagQueue<complex<quadruple>, quadruple>::
361 exec_fwbw(complex<quadruple> *x, const int nrhs, bool isTrans);
362 
363 template
364 void TridiagQueue<float>::exec_fwbw(float *x, const int nrhs, bool isTrans);
365 
366 template
367 void TridiagQueue<complex<float>, float>::
368 exec_fwbw(complex<float> *x, const int nrhs, bool isTrans);
369 //
370