1 /*
2
3 Copyright (C) 2008-2021 Michele Martone
4
5 This file is part of librsb.
6
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20
21 */
22 /* @cond INNERDOC */
23 /*!
24 * @file
25 * @author Michele Martone
26 * @brief
27 * This source file contains functions for sparse recursive multicore triangular solve.
28 */
29 /*
30 * FIXME: the submatrices sorting routines are buggy.
31 * */
32 #include "rsb_internals.h" /* */
33 #include "rsb_lock.h" /* */
34 #include "rsb_spsv.h" /* */
35
36 #define RSB_WANT_VERBOSE_SPSV 0
37 /* #define RSB_CBLAS_X_SCAL_SPSV rsb__cblas_Xscal */
38 #define RSB_CBLAS_X_SCAL_SPSV rsb__cblas_Xscal_parallel
39
40 RSB_INTERNALS_COMMON_HEAD_DECLS
41
rsb_do_spsv_terminal(const struct rsb_mtx_t * mtxAp,const void * x,void * y,const void * alphap,rsb_coo_idx_t incx,rsb_coo_idx_t incy,rsb_trans_t transl RSB_INNER_NRHS_SPSV_ARGS)42 static rsb_err_t rsb_do_spsv_terminal(const struct rsb_mtx_t * mtxAp, const void * x, void * y, const void * alphap, rsb_coo_idx_t incx, rsb_coo_idx_t incy, rsb_trans_t transl RSB_INNER_NRHS_SPSV_ARGS)
43 {
44 /**
45 \ingroup gr_internals
46 Entry function for SPSV.
47 alphap can be NULL.
48 */
49 rsb_err_t errval = RSB_ERR_NO_ERROR;
50
51 if(!mtxAp || !y || !x || transl == RSB_INVALID_FLAGS || !rsb__is_square(mtxAp) || !RSB_IS_VALID_INCX_VALUE(incx) || !RSB_IS_VALID_INCX_VALUE(incy))
52 {
53 errval = RSB_ERR_BADARGS;
54 goto ret;
55 }
56
57 /*
58 FIXME : should handle alphap in a more specialized fashion.
59 */
60
61 #if 0
62 if(betap && !RSB_IS_ELEMENT_ONE(betap,mtxAp->typecode))
63 {
64 if(incy>1)
65 rsb__cblas_Xscal(mtxAp->typecode,rsb__do_get_columns_of(mtxAp,transl),betap,y,incy);
66 else
67 {
68 /* if should zero the output vector */
69 if(RSB_IS_ELEMENT_ZERO(betap,mtxAp->typecode))
70 rsb__cblas_Xscal(mtxAp->typecode,rsb__do_get_columns_of(mtxAp,transl),NULL,y,incy);
71 else
72 /* if should scale the output vector */
73 if(!RSB_IS_ELEMENT_ONE(betap,mtxAp->typecode))
74 rsb_vector_scale(y,betap,mtxAp->typecode,rsb__do_get_columns_of(mtxAp,transl));
75 }
76 }
77 #endif
78
79
80 #if RSB_ENABLE_INNER_NRHS_SPSV
81 {
82 const size_t lenx=(mtxAp->el_size*rhsnri);
83 const size_t leny=(mtxAp->el_size*outnri);
84 rsb_int_t nrhsi=0;
85 for(nrhsi=0;nrhsi<nrhs;++nrhsi)
86 #endif
87 {
88 #if RSB_ENABLE_INNER_NRHS_SPSV
89 void *out=(( rsb_byte_t*)y)+(leny*nrhsi);
90 const void*rhs=((const rsb_byte_t*)x)+(lenx*nrhsi);
91 #else
92 void *out=(( rsb_byte_t*)y);
93 const void*rhs=((const rsb_byte_t*)x);
94 #endif /* RSB_ENABLE_INNER_NRHS_SPMV */
95 if(!alphap || RSB_IS_ELEMENT_ONE(alphap,mtxAp->typecode))
96 {
97 if(incy==1 && incx==1)
98 errval = rsb__do_spsv_uxua(mtxAp,rhs,out,transl);
99 else
100 errval = rsb__do_spsv_sxsx(mtxAp,rhs,y,alphap,incx,incy,transl);
101 }
102 else
103 errval = rsb__do_spsv_sxsx(mtxAp,rhs,out,alphap,incx,incy,transl);
104 }
105 #if RSB_ENABLE_INNER_NRHS_SPSV
106 }
107 #endif /* RSB_ENABLE_INNER_NRHS_SPMV */
108 ret:
109 return errval;
110 }
111
rsb_do_spsv_recursive_serial(const struct rsb_mtx_t * mtxAp,const void * x,void * y,const void * alphap,rsb_coo_idx_t incx,rsb_coo_idx_t incy,rsb_trans_t transl,enum rsb_op_flags_t op_flags RSB_INNER_NRHS_SPSV_ARGS)112 static rsb_err_t rsb_do_spsv_recursive_serial(const struct rsb_mtx_t * mtxAp, const void * x, void * y, const void * alphap, rsb_coo_idx_t incx, rsb_coo_idx_t incy, rsb_trans_t transl, enum rsb_op_flags_t op_flags RSB_INNER_NRHS_SPSV_ARGS)
113 {
114 /**
115 \ingroup gr_internals
116 *
117 * FIXME : document
118 * */
119 rsb_err_t errval = RSB_ERR_NO_ERROR;
120 rsb_aligned_t mone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
121 rsb_aligned_t pone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
122 rsb_int_t nrhsi = 0;
123 rsb__util_set_area_to_converted_integer(&mone[0],mtxAp->typecode,-1);
124 rsb__util_set_area_to_converted_integer(&pone[0],mtxAp->typecode,+1);
125
126 if(mtxAp->roff == mtxAp->coff)
127 if(rsb__is_root_matrix(mtxAp))
128 #if RSB_ENABLE_INNER_NRHS_SPSV
129 for (nrhsi=0;nrhsi<nrhs;++nrhsi)
130 #endif /* RSB_ENABLE_INNER_NRHS_SPSV */
131 RSB_CBLAS_X_SCAL_SPSV(mtxAp->typecode,rsb__do_get_rows_of(mtxAp,transl),alphap,RSB_TYPED_OFF_PTR(mtxAp->typecode,y,nrhsi*(outnri)*incy),incy);
132
133 if( rsb__is_recursive_matrix(mtxAp->flags))
134 #if 1
135 {
136 void*offy=NULL; const void *offx=NULL;
137 // rsb_coo_idx_t scoff=submatrix->coff; rsb_coo_idx_t sroff=submatrix->roff;
138 rsb_bool_t isupp = rsb__is_upper_triangle(mtxAp->flags);
139 rsb_bool_t istrans = (RSB_DOES_NOT_TRANSPOSE(transl))?0:1;
140 rsb_coo_idx_t half;
141 // offy=((rsb_byte_t*)y)+(mtxAp->el_size*sroff)*incy,offx=((const rsb_byte_t*)x)+(mtxAp->el_size*scoff)*incx;
142 if(mtxAp->coff!=mtxAp->roff)
143 {
144 RSB_ERROR("!\n");
145 errval = RSB_ERR_BADARGS;goto err;
146 }
147 if(!(RSB_SUBMATRIX_INDEX(mtxAp,0,0)) || !(RSB_SUBMATRIX_INDEX(mtxAp,1,1)))
148 {
149 RSB_ERROR("@ %d %d and with null diagonal elements, %p %p %p %p\n",mtxAp->roff,mtxAp->coff,
150 mtxAp->sm[0],
151 mtxAp->sm[1],
152 mtxAp->sm[2],
153 mtxAp->sm[3]
154 );
155 errval = RSB_ERR_BADARGS;goto err;
156 }
157 half = RSB_SUBMATRIX_INDEX(mtxAp,1,1)->roff-mtxAp->roff;
158 offy=(( rsb_byte_t*)y)+(mtxAp->el_size*half)*incy;
159 offx=((const rsb_byte_t*)x)+(mtxAp->el_size*half)*incx;
160
161 switch(isupp)
162 {
163 case RSB_BOOL_TRUE:
164 switch(istrans)
165 {
166 case RSB_BOOL_TRUE:
167 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,0,0),x,y,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
168 if(RSB_SUBMATRIX_INDEX(mtxAp,0,1) && op_flags != RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
169 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transl,&mone[0],RSB_SUBMATRIX_INDEX(mtxAp,0,1),offx,incx,NULL,y,incy,RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL RSB_INNER_NRHS_SPSV_ARGS_IDS));
170 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,1,1),offx,offy,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
171 break;
172 default:
173 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,1,1),offx,offy,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
174 if(RSB_SUBMATRIX_INDEX(mtxAp,0,1) && op_flags != RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
175 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transl,&mone[0],RSB_SUBMATRIX_INDEX(mtxAp,0,1),offx,incx,NULL,y,incy,RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL RSB_INNER_NRHS_SPSV_ARGS_IDS));
176 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,0,0),x,y,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
177 break;
178 }
179 break;
180 case RSB_BOOL_FALSE:
181 switch(istrans)
182 {
183 case RSB_BOOL_TRUE:
184 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,1,1),offx,offy,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
185 if(RSB_SUBMATRIX_INDEX(mtxAp,1,0) && op_flags != RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
186 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transl,&mone[0],RSB_SUBMATRIX_INDEX(mtxAp,1,0),x,incx,NULL,offy,incy,RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL RSB_INNER_NRHS_SPSV_ARGS_IDS));
187 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,0,0),x,y,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
188 break;
189 default:
190 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,0,0),x,y,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
191 if(RSB_SUBMATRIX_INDEX(mtxAp,1,0) && op_flags != RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
192 RSB_DO_ERROR_CUMULATE(errval,rsb__do_spmv_general(transl,&mone[0],RSB_SUBMATRIX_INDEX(mtxAp,1,0),x,incx,NULL,offy,incy,RSB_OP_FLAG_DIAGONAL_OVERRIDE_EXPLICIT_SERIAL RSB_INNER_NRHS_SPSV_ARGS_IDS));
193 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(RSB_SUBMATRIX_INDEX(mtxAp,1,1),offx,offy,&pone[0],incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
194 break;
195 }
196 break;
197 default:
198 errval = RSB_ERR_INTERNAL_ERROR;goto err;
199 break;
200 }
201 }
202 #else
203 {
204 rsb_submatrix_idx_t i,j;
205 struct rsb_mtx_t * submatrix=NULL;
206 void*offy=NULL; const void *offx=NULL;
207
208 if( RSB_DOES_NOT_TRANSPOSE( transl ) )
209 {
210 RSB_SUBMATRIX_FOREACH(mtxAp,submatrix,i,j)
211 if(submatrix)
212 {
213 // RSB_STDOUT("%d %d \n",i,j);
214 rsb_coo_idx_t scoff=submatrix->coff; rsb_coo_idx_t sroff=submatrix->roff;
215
216 RSB_DEBUG_ASSERT(scoff>=0);
217 RSB_DEBUG_ASSERT(sroff>=0);
218
219 //RSB_ERROR("-> 0x%p %d %d (%d) (%d)\n",submatrix,submatrix->roff,submatrix->coff,submatrix->nnz, rsb__is_recursive_matrix(mtxAp->flags));
220
221 offy=((rsb_byte_t*)y)+(mtxAp->el_size*sroff)*incy,offx=((const rsb_byte_t*)x)+(mtxAp->el_size*scoff)*incx;
222 if(submatrix->roff==submatrix->coff && (i==(RSB_DOES_NOT_TRANSPOSE(transl))?0:1) )
223 {
224 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(submatrix,x,y,alphap,incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
225 }
226 else
227 if(submatrix->roff==submatrix->coff && (i==(RSB_DOES_NOT_TRANSPOSE(transl))?1:0) )
228 {
229 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(submatrix,x,y,alphap,incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
230 }
231 else
232 //if(i==((RSB_DOES_NOT_TRANSPOSE(transl))?1:0))
233 if(i==1 && op_flags != RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
234 {
235 // RSB_STDOUT("offx %g offy %g\n",*(double*)offx,*(double*)offy);
236 // RSB_STDOUT("spmv %d %d\n",submatrix->roff,submatrix->coff);
237 /* FIXME : DOES NOT TAKE INTO ACCOUNT INCX,INCY */
238 /* transposition is not relevant, as long as we work with square matrices everywhere */
239 // if(y!=x)
240 // RSB_DO_ERROR_CUMULATE(errval,rsb__xcopy(offy,x,0,0,sroff,mtxAp->el_size));
241 // FIXME : the following lines should be equivalent, but they aren't . why ?
242 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spmv_recursive_serial(submatrix,offx,offy,&mone[0],NULL,1,1,transl RSB_INNER_NRHS_SPMV_ARGS_IDS));
243 // RSB_DO_ERROR_CUMULATE(errval,rsb_do_spmv_recursive_parallel(submatrix,offx,offy,&mone[0],NULL,1,1,transl));
244 //RSB_DO_ERROR_CUMULATE(errval,rsb_spmv_unua(submatrix,offx,offy,transl));
245 // RSB_STDOUT("offx %g offy %g\n",*(double*)offx,*(double*)offy);
246 }
247 if(RSB_SOME_ERROR(errval))
248 goto err;
249 }}
250 else
251 {
252 RSB_SUBMATRIX_FOREACH_REVERSE(mtxAp,submatrix,i,j)
253 {
254 // RSB_STDOUT("%d %d \n",i,j);
255 if(submatrix)
256 {
257 // RSB_STDOUT("%d %d \n",i,j);
258 rsb_coo_idx_t scoff=submatrix->coff; rsb_coo_idx_t sroff=submatrix->roff;
259
260 RSB_DEBUG_ASSERT(scoff>=0);
261 RSB_DEBUG_ASSERT(sroff>=0);
262
263 offy=((rsb_byte_t*)y)+(mtxAp->el_size*sroff)*incy,offx=((const rsb_byte_t*)x)+(mtxAp->el_size*scoff)*incx;
264 if(submatrix->roff==submatrix->coff && (i==(RSB_DOES_NOT_TRANSPOSE(transl)))?0:1) )
265 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(submatrix,x,y,alphap,incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
266 else
267 if(submatrix->roff==submatrix->coff && (i==RSB_DOES_NOT_TRANSPOSE(transl)))?1:0) )
268 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_recursive_serial(submatrix,x,y,alphap,incx,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS));
269 else
270 if(i==1 && op_flags != RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
271 {
272 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spmv_recursive_serial(submatrix,offx,offy,&mone[0],NULL,1,1,transl RSB_INNER_NRHS_SPMV_ARGS_IDS));
273 }
274 if(errval != RSB_ERR_NO_ERROR)goto err;
275 }}
276 }
277 }
278 #endif
279 else
280 {
281 void*offy=NULL; const void *offx=NULL;
282 rsb_coo_idx_t scoff=0;
283 rsb_coo_idx_t sroff=0;
284 RSB_DEBUG_ASSERT(scoff>=0);
285 RSB_DEBUG_ASSERT(sroff>=0);
286 offy=((rsb_byte_t*)y)+(mtxAp->el_size*sroff)*incy,offx=((const rsb_byte_t*)x)+(mtxAp->el_size*scoff)*incx;
287 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_terminal(mtxAp,offx,offy,alphap,incx,incy,transl RSB_OUTER_NRHS_SPSV_ARGS_IDS));
288 }
289 err:
290 RSB_DO_ERR_RETURN(errval)
291 }
292
rsb__do_get_submatrices_block_for_get_csr(const struct rsb_mtx_t * mtxAp,struct rsb_translated_matrix_t ** all_leaf_matricesp,rsb_submatrix_idx_t * all_leaf_matrices_np)293 rsb_err_t rsb__do_get_submatrices_block_for_get_csr(const struct rsb_mtx_t * mtxAp, struct rsb_translated_matrix_t ** all_leaf_matricesp, rsb_submatrix_idx_t * all_leaf_matrices_np)
294 {
295 /**
296 * \ingroup gr_internals
297 * FIXME: rename : csr -> csc
298 * */
299 rsb_err_t errval = RSB_ERR_NO_ERROR;
300 rsb_submatrix_idx_t all_leaf_matrices_n=0;
301 struct rsb_translated_matrix_t * all_leaf_matrices=NULL;
302
303 all_leaf_matrices_n=mtxAp->all_leaf_matrices_n;
304 all_leaf_matrices = rsb__clone_area(mtxAp->all_leaf_matrices,sizeof(struct rsb_translated_matrix_t)*all_leaf_matrices_n);
305 errval = rsb__sort_array_of_leaf_matrices(NULL,all_leaf_matrices,all_leaf_matrices_n,rsb_op_get_csr);
306
307 *all_leaf_matrices_np=all_leaf_matrices_n;
308 *all_leaf_matricesp=all_leaf_matrices;
309 RSB_DO_ERR_RETURN(errval)
310 }
311
rsb__do_get_submatrices_for_ussv(const struct rsb_mtx_t * mtxAp,struct rsb_translated_matrix_t ** all_leaf_matricesp,rsb_submatrix_idx_t * all_leaf_matrices_np,rsb_trans_t transT)312 rsb_err_t rsb__do_get_submatrices_for_ussv( const struct rsb_mtx_t * mtxAp, struct rsb_translated_matrix_t ** all_leaf_matricesp, rsb_submatrix_idx_t * all_leaf_matrices_np, rsb_trans_t transT)
313 {
314 /**
315 \ingroup gr_internals
316 */
317 rsb_err_t errval = RSB_ERR_NO_ERROR;
318 rsb_submatrix_idx_t all_leaf_matrices_n=0;
319 struct rsb_translated_matrix_t * all_leaf_matrices=NULL;
320
321 all_leaf_matrices_n=mtxAp->all_leaf_matrices_n;
322 all_leaf_matrices = rsb__clone_area(mtxAp->all_leaf_matrices,sizeof(struct rsb_translated_matrix_t)*all_leaf_matrices_n);
323 rsb__submatrices_exclude_nontriangular(all_leaf_matrices,&all_leaf_matrices_n,mtxAp);
324 errval = rsb__sort_array_of_leaf_matrices_for_ussv(mtxAp,all_leaf_matrices,all_leaf_matrices_n,transT);
325
326 *all_leaf_matrices_np=all_leaf_matrices_n;
327 *all_leaf_matricesp=all_leaf_matrices;
328 RSB_DO_ERR_RETURN(errval)
329 }
330
rsb__submatrices_exclude_nontriangular(struct rsb_translated_matrix_t * all_leaf_matrices,rsb_submatrix_idx_t * all_leaf_matrices_np,const struct rsb_mtx_t * mtxAp)331 void rsb__submatrices_exclude_nontriangular(struct rsb_translated_matrix_t * all_leaf_matrices, rsb_submatrix_idx_t * all_leaf_matrices_np, const struct rsb_mtx_t * mtxAp)
332 {
333 /**
334 \ingroup gr_internals
335 */
336 rsb_submatrix_idx_t n, all_leaf_matrices_n=0;
337 RSB_DEBUG_ASSERT(mtxAp);
338 RSB_DEBUG_ASSERT(all_leaf_matrices);
339 RSB_DEBUG_ASSERT(all_leaf_matrices_np);
340 if(rsb__is_upper_triangle(mtxAp->flags))
341 {
342 for(n=0;n<mtxAp->all_leaf_matrices_n;++n)
343 if (mtxAp->all_leaf_matrices[n].roff<=mtxAp->all_leaf_matrices[n].coff)
344 all_leaf_matrices[all_leaf_matrices_n++]=mtxAp->all_leaf_matrices[n];
345 }
346 else
347 {
348 for(n=0;n<mtxAp->all_leaf_matrices_n;++n)
349 if (mtxAp->all_leaf_matrices[n].roff>=mtxAp->all_leaf_matrices[n].coff)
350 all_leaf_matrices[all_leaf_matrices_n++]=mtxAp->all_leaf_matrices[n];
351 }
352 // if(all_leaf_matrices_n<mtxAp->all_leaf_matrices_n)
353 // ;
354 //;RSB_STDOUT("FIX : discarded %d upper diagonal matrices out of %d \n",mtxAp->all_leaf_matrices_n-all_leaf_matrices_n,mtxAp->all_leaf_matrices_n);
355 *all_leaf_matrices_np=all_leaf_matrices_n;
356 }
357
rsb__do_spsv_uxua_recursive_parallel(const struct rsb_mtx_t * mtxAp,const void * x,void * y,const void * alphap,rsb_coo_idx_t incx,rsb_coo_idx_t incy,rsb_trans_t transl,enum rsb_op_flags_t op_flags RSB_INNER_NRHS_SPSV_ARGS)358 static rsb_err_t rsb__do_spsv_uxua_recursive_parallel(const struct rsb_mtx_t * mtxAp, const void * x, void * y, const void * alphap, rsb_coo_idx_t incx, rsb_coo_idx_t incy, rsb_trans_t transl, enum rsb_op_flags_t op_flags RSB_INNER_NRHS_SPSV_ARGS)
359 {
360 /**
361 \ingroup gr_internals
362 * triangular solve for recursive
363 *
364 * each submatrix starting at (i,j) depends on :
365
366 i if i=j
367
368 1 2 3 4 5 6
369 +-+---------+
370 +-+-+ | 1
371 +-+-+-+ | 2
372 +-+-+-+-+ | 3
373 +-+-+-+-+-+ | 4
374 +-+-+-+-+-+-+ 5
375 +-+-+-+-+-+-+ 6
376
377 The active rows in the spmv are locked with an interval information.
378 Since the rows interval active in trsv is not under spmv, there is no need for another lock.
379
380 We need a ...
381
382 alphap NULL means alpha = 1
383
384 * */
385 rsb_err_t errval = RSB_ERR_NO_ERROR;
386 #if RSB_WANT_OMP_RECURSIVE_KERNELS
387 #if RSB_WANT_VERBOSE_SPSV
388 rsb_time_t sv_time = RSB_CONST_IMPOSSIBLY_BIG_TIME,dp_time = RSB_CONST_IMPOSSIBLY_BIG_TIME;
389 #endif /* RSB_WANT_VERBOSE_SPSV */
390 struct rsb_translated_matrix_t * all_leaf_matrices=NULL; /** NEW, EXPERIMENTAL */
391 struct rsb_rows_lock_struct_t lock;
392 rsb_submatrix_idx_t * deps=NULL; /** NEW, EXPERIMENTAL */
393 rsb_submatrix_idx_t all_leaf_matrices_n=0;
394 rsb_bool_t backdeps,isupptri;
395 rsb_aligned_t alpha_inv[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
396 rsb_aligned_t mone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
397 rsb_aligned_t pone[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
398 rsb__util_set_area_to_converted_integer(&mone[0],mtxAp->typecode,-1);
399 rsb__util_set_area_to_converted_integer(&pone[0],mtxAp->typecode,+1);
400
401 RSB_BZERO_P(&lock);
402 if( !mtxAp)
403 {errval = RSB_ERR_GENERIC_ERROR;goto err;}
404 if( !rsb__is_recursive_matrix(mtxAp->flags) || !mtxAp)
405 {errval = RSB_ERR_GENERIC_ERROR;goto err;}
406 if(x!=y)/* FIXME */
407 {errval = RSB_ERR_GENERIC_ERROR;goto err;}
408
409 isupptri = rsb__is_upper_triangle(mtxAp->flags);
410 backdeps = RSB_BOOL_XOR(RSB_DOES_TRANSPOSE(transl),isupptri);
411 rsb__util_set_area_to_negated_fraction(alpha_inv,alphap,mtxAp->typecode);
412
413 #if RSB_WANT_VERBOSE_SPSV
414 dp_time = rsb_time();
415 #endif /* RSB_WANT_VERBOSE_SPSV */
416
417 errval = rsb__do_get_submatrices_for_ussv(mtxAp,&all_leaf_matrices,&all_leaf_matrices_n,transl);
418 deps = rsb__malloc(sizeof(rsb_submatrix_idx_t)*all_leaf_matrices_n);
419 if(RSB_SOME_ERROR(errval) || !all_leaf_matrices || !deps)
420 {errval = RSB_ERR_ENOMEM;goto err;}
421
422 #if 0
423 {
424 /* printout */
425 rsb_submatrix_idx_t n;
426 for(n=0;n<all_leaf_matrices_n;++n)
427 RSB_STDOUT("got %d/%d:%d~%d,%d~%d\n",
428 n,all_leaf_matrices_n,
429 all_leaf_matrices[n].roff,all_leaf_matrices[n].mtxlp->nr+all_leaf_matrices[n].roff,
430 all_leaf_matrices[n].coff,all_leaf_matrices[n].mtxlp->nc+all_leaf_matrices[n].coff);
431 }
432 #endif
433 #if 0
434 {
435 rsb_submatrix_idx_t n;
436 rsb_coo_idx_t s=0;
437 for(n=0;n<all_leaf_matrices_n;++n)
438 if(all_leaf_matrices[n].roff==all_leaf_matrices[n].coff)
439 {
440 s+=all_leaf_matrices[n].mtxlp->nr;
441 // RSB_STDOUT("%d/%d [%d~%d,%d~%d] (on diag)\n",n,all_leaf_matrices_n,
442 // all_leaf_matrices[n].roff, all_leaf_matrices[n].roff+all_leaf_matrices[n].mtxlp->nr-1,
443 // all_leaf_matrices[n].coff, all_leaf_matrices[n].coff+all_leaf_matrices[n].mtxlp->nc-1);
444 }
445 else
446 ;
447 // RSB_STDOUT("%d/%d [%d~%d,%d~%d] (not on diag)\n",n,all_leaf_matrices_n,
448 // all_leaf_matrices[n].roff, all_leaf_matrices[n].roff+all_leaf_matrices[n].mtxlp->nr-1,
449 // all_leaf_matrices[n].coff, all_leaf_matrices[n].coff+all_leaf_matrices[n].mtxlp->nc-1);
450 if(mtxAp->nr != s)
451 { RSB_STDOUT("FATAL : sum of diagonal matrices rows %d != %d \n",s,mtxAp->nr);
452 goto err;
453 }
454 }
455 #endif
456 #if 0
457 if(RSB_DOES_TRANSPOSE(transl))
458 {
459 rsb_submatrix_idx_t n; for(n=0;n<all_leaf_matrices_n;++n) {
460 RSB_SWAP(rsb_coo_idx_t,all_leaf_matrices[n].roff,all_leaf_matrices[n].coff);
461 }
462 }
463 #endif
464
465 if(0)
466 if(RSB_DOES_TRANSPOSE(transl))
467 {
468 rsb_submatrix_idx_t n;
469 for(n=0;n<all_leaf_matrices_n;++n)
470 {
471 all_leaf_matrices[n].roff=mtxAp->nc-(all_leaf_matrices[n].coff+all_leaf_matrices[n].mtxlp->nc*1);
472 all_leaf_matrices[n].coff=mtxAp->nr-(all_leaf_matrices[n].roff+all_leaf_matrices[n].mtxlp->nr*1);
473 all_leaf_matrices[n].nr=all_leaf_matrices[n].mtxlp->nc;
474 all_leaf_matrices[n].nc=all_leaf_matrices[n].mtxlp->nr;
475 }
476 }
477
478 if(errval != RSB_ERR_NO_ERROR)
479 goto err;
480 #if 0
481 {
482 /* printout */
483 rsb_submatrix_idx_t n;
484 for(n=0;n<all_leaf_matrices_n;++n)
485 RSB_STDOUT("got %d/%d:%d~%d,%d~%d\n",
486 n,all_leaf_matrices_n,
487 // all_leaf_matrices[n].mtxlp->roff,all_leaf_matrices[n].mtxlp->nr+all_leaf_matrices[n].mtxlp->roff,
488 // all_leaf_matrices[n].mtxlp->coff,all_leaf_matrices[n].mtxlp->nc+all_leaf_matrices[n].mtxlp->coff);
489 all_leaf_matrices[n].roff,all_leaf_matrices[n].nr+all_leaf_matrices[n].roff,
490 all_leaf_matrices[n].coff,all_leaf_matrices[n].nc+all_leaf_matrices[n].coff);
491 }
492 #endif
493 #if 1
494 {
495 rsb_submatrix_idx_t n;
496 for(n=1;n<all_leaf_matrices_n;++n)
497 {
498 rsb_submatrix_idx_t np=n-1;
499 if(all_leaf_matrices[n].roff==all_leaf_matrices[n].coff)
500 {
501 while(np>0 && all_leaf_matrices[np].roff!=all_leaf_matrices[np].coff)
502 --np;
503 //for(;np<n;++np)
504 // RSB_STDOUT("%d vs %d ? %d\n",n,np,rsb__compar_rcsr_matrix_for_spsvl(all_leaf_matrices+np,all_leaf_matrices+n));
505 }
506 else
507 {
508 while(np>0
509 && !(
510 ( all_leaf_matrices[np].roff==all_leaf_matrices[np].coff ) /*&&
511 ( (all_leaf_matrices[np].coff+ all_leaf_matrices[np].mtxlp->nc)>=
512 (all_leaf_matrices[n ].coff+ all_leaf_matrices[n ].mtxlp->nc) )*/
513 ))
514 --np;
515 }
516 deps[n]=np;
517 //#define RSB__TRSV_OUT__ 1
518 if(RSB__TRSV_OUT__)RSB_STDOUT("dep: %d -> %d\n",n,np);
519 }
520 }
521 #endif
522 #if 0
523 if(RSB_DOES_TRANSPOSE(transl))
524 {
525 rsb_submatrix_idx_t n; for(n=0;n<all_leaf_matrices_n;++n) {
526 RSB_SWAP(rsb_coo_idx_t,all_leaf_matrices[n].roff,all_leaf_matrices[n].coff);
527 }
528 #if 1
529 for(n=0;n<all_leaf_matrices_n/2;++n) { RSB_SWAP(struct rsb_translated_matrix_t,all_leaf_matrices[n],all_leaf_matrices[(all_leaf_matrices_n-1)-n]);}
530 }
531 #endif
532 #endif
533 #if 0
534 {
535 rsb_submatrix_idx_t n;
536 for(n=0;n<all_leaf_matrices_n;++n)
537 if(all_leaf_matrices[n].roff<all_leaf_matrices[n].coff)
538 {
539 RSB_ERROR("all_leaf_matrices[n].roff<all_leaf_matrices[n].coff (%d<%d) in a lower triangular matrix!\n",
540 all_leaf_matrices[n].roff,all_leaf_matrices[n].coff);
541 {errval = RSB_ERR_GENERIC_ERROR;goto err;}
542 }
543 }
544 #endif
545 #if 0
546 {
547 rsb_submatrix_idx_t n,ad=0,d=0,pad=0,pda=0;
548 // int cppad=0,ncppad=0;
549 for(n=0;n<mtxAp->all_leaf_matrices_n;++n)
550 {
551 if(all_leaf_matrices[n].roff==all_leaf_matrices[n].coff)
552 {
553 rsb_submatrix_idx_t nn;
554 rsb_submatrix_idx_t dr=all_leaf_matrices[n].roff+all_leaf_matrices[n].mtxlp->nr;
555 ++d;
556 for(nn=n;nn<mtxAp->all_leaf_matrices_n;++nn)
557 if(
558 all_leaf_matrices[nn].roff >= dr &&
559 all_leaf_matrices[nn].roff > all_leaf_matrices[nn].coff &&
560 all_leaf_matrices[nn].coff + all_leaf_matrices[nn].mtxlp->nc <= dr &&
561 1
562 )
563 {
564 pad++; /* parallelizable anti diagonal */
565 }
566 for(nn=n+1;nn<mtxAp->all_leaf_matrices_n &&
567 all_leaf_matrices[nn].roff > all_leaf_matrices[nn].coff ;++nn)
568 pda++;
569 }
570 }
571 RSB_STDOUT(
572 " diagonal blocks : %d\n"
573 "antidiagonal blocks : %d\n"
574 "prediagonal blocks : %d\n"
575 // "antidiagonal blocks on the critical path : %d\n"
576 // "antidiagonal blocks not on the critical path : %d\n"
577 ,d,pad,pda//,cppad,ncppad
578 );
579 }
580 #endif
581
582 errval = rsb__do_lock_init(&lock,rsb_global_session_handle.rsb_want_threads,all_leaf_matrices_n,mtxAp,op_flags);
583 if(errval != RSB_ERR_NO_ERROR)
584 goto err;
585
586
587 {
588 #if RSB_ENABLE_INNER_NRHS_SPSV
589 const size_t leny=(mtxAp->el_size*outnri);
590 rsb_int_t nrhsi=0;
591 for(nrhsi=0;nrhsi<nrhs;++nrhsi)
592 #endif /* RSB_ENABLE_INNER_NRHS_SPMV */
593 {
594 #if RSB_ENABLE_INNER_NRHS_SPSV
595 void *out=(( rsb_byte_t*)y)+(leny*nrhsi);
596 #else /* RSB_ENABLE_INNER_NRHS_SPMV */
597 void *out=(( rsb_byte_t*)y);
598 #endif /* RSB_ENABLE_INNER_NRHS_SPMV */
599 RSB_CBLAS_X_SCAL_SPSV(mtxAp->typecode,rsb__do_get_rows_of(mtxAp,transl),alphap,out,incy);
600
601 #if 1
602 /* corner triangle solve */
603 if(all_leaf_matrices_n)
604 {
605 rsb_submatrix_idx_t n=0;
606 void * offy=((rsb_byte_t*)out)+(mtxAp->el_size*all_leaf_matrices[n].roff)*incy;
607 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_terminal(all_leaf_matrices[n].mtxlp,offy,offy,&pone[0],incy,incy,transl RSB_DEFAULT_OUTER_NRHS_SPMV_ARGS));
608 rsb__do_lock_get(&lock,0,all_leaf_matrices[n].mtxlp->nr+all_leaf_matrices[n].roff,all_leaf_matrices[n].mtxlp->nr,all_leaf_matrices[n].coff,all_leaf_matrices[n].mtxlp->nc,n,transl);
609 rsb__do_lock_release(&lock,0);
610 lock.dm=1; /* first matrix processed */
611 if(!backdeps)
612 lock.dr=all_leaf_matrices[n].mtxlp->nr+all_leaf_matrices[n].roff;
613 else
614 lock.dr=all_leaf_matrices[n].roff;
615 }
616 #else
617 if(all_leaf_matrices_n)
618 {
619 if(backdeps)
620 lock.dr=all_leaf_matrices[0].mtxlp->nr+all_leaf_matrices[0].roff;
621 else
622 lock.dr=all_leaf_matrices[0].mtxlp->nr;
623 }
624 #endif
625 }
626 }
627
628 #if RSB_WANT_VERBOSE_SPSV
629 sv_time = rsb_time();
630 dp_time=-dp_time+sv_time;
631 #endif /* RSB_WANT_VERBOSE_SPSV */
632 #pragma omp parallel reduction(|:errval) shared(lock,all_leaf_matrices,mtxAp) RSB_NTC
633 {
634 const rsb_thr_t th_id = omp_get_thread_num();
635 rsb_submatrix_idx_t n=0;
636 rsb_submatrix_idx_t dm=0;
637 #pragma omp barrier
638
639 if(th_id >= rsb_global_session_handle.rsb_want_threads)
640 goto skip;
641
642 #pragma omp critical (rsb_spsv_crs)
643 { dm=lock.dm; }
644
645 again:
646 for(n=0;n<all_leaf_matrices_n;++n)
647 //if(!RSB_BITMAP_GET(lock.bmap,1,lock.subms,0,n))
648 {
649 struct rsb_mtx_t *submatrix=all_leaf_matrices[n].mtxlp;
650 const rsb_byte_t* trhs=((rsb_byte_t*)y)+mtxAp->el_size*all_leaf_matrices[n].coff*incx;
651 rsb_byte_t* tout=((rsb_byte_t*)y)+mtxAp->el_size*all_leaf_matrices[n].roff*incy;
652 enum rsb_op_t op = rsb_op_nop;
653 rsb_coo_idx_t roff=all_leaf_matrices[n].roff;
654 rsb_coo_idx_t coff=all_leaf_matrices[n].coff;
655
656 //#define RSB__TRSV_OUT__ 1
657 #pragma omp critical (rsb_spsv_crs)
658 {
659 if( th_id==0 )
660 {
661 if(
662 (
663 (( backdeps) && coff+all_leaf_matrices[n].mtxlp->nc==lock.dr ) ||
664 ((!backdeps) && roff==lock.dr )
665 )
666 && roff==coff &&
667 (!RSB_BITMAP_GET(lock.bmap,1,lock.subms,0,n))
668 )
669 {
670 rsb_submatrix_idx_t np=deps[n];
671 while(RSB_BITMAP_GET(lock.bmap,1,lock.subms,0,np))
672 {
673 ++np;
674 if(RSB__TRSV_OUT__) RSB_STDOUT("%d -> %d\n",n,np);
675 }
676 if(np==n && (rsb__do_lock_get(&lock,th_id,roff,all_leaf_matrices[n].mtxlp->nr,coff,all_leaf_matrices[n].mtxlp->nc,n,transl)==RSB_BOOL_TRUE))
677 op = rsb_op_spsvl;
678 }
679 }
680
681 if(op == rsb_op_nop)
682 {
683 // if(RSB__TRSV_OUT)RSB_STDOUT("%d@%d %d %d %d %d\n",n,th_id,op,all_leaf_matrices[n].roff,all_leaf_matrices[n].coff,omp_get_num_threads());
684 if(
685 (((!backdeps) &&
686 (
687 (roff >=lock.dr && !isupptri &&
688 roff != coff &&
689 coff + all_leaf_matrices[n].mtxlp->nc <= lock.dr) ||
690 (coff >=lock.dr && isupptri &&
691 roff != coff &&
692 roff + all_leaf_matrices[n].mtxlp->nr <= lock.dr)
693 )
694 )
695 ||
696 (( backdeps) &&
697 (( isupptri &&(coff >= lock.dr)) ||
698 ((!isupptri)&&(roff >= lock.dr))) &&
699 roff != coff //&&
700 //coff + all_leaf_matrices[n].mtxlp->nc <=lock.dr
701 ))
702 &&
703 #if RSB_WANT_BOUNDED_BOXES_SPSV
704 (rsb__do_lock_get(&lock,th_id,all_leaf_matrices[n].mtxlp->broff,all_leaf_matrices[n].mtxlp->bm,all_leaf_matrices[n].mtxlp->bcoff,all_leaf_matrices[n].mtxlp->bk,n,transl)==RSB_BOOL_TRUE)&&
705 #else /* RSB_WANT_BOUNDED_BOXES_SPSV */
706 (rsb__do_lock_get(&lock,th_id,all_leaf_matrices[n].roff,all_leaf_matrices[n].mtxlp->nr,all_leaf_matrices[n].coff,all_leaf_matrices[n].mtxlp->nc,n,transl)==RSB_BOOL_TRUE)&&
707 #endif /* RSB_WANT_BOUNDED_BOXES_SPSV */
708 1
709 )
710 op = rsb_op_spmv ;
711 }
712 }
713 // if(RSB__TRSV_OUT)RSB_STDOUT("%d@%d %d %d %d %d\n",n,th_id,op,all_leaf_matrices[n].roff,all_leaf_matrices[n].coff,omp_get_num_threads());
714 if(RSB__TRSV_OUT__)dm=lock.dm;
715 if(RSB__TRSV_OUT__)
716 if(
717 (!RSB_BITMAP_GET(lock.bmap,1,lock.subms,0,n) && op == rsb_op_nop)||
718 ( RSB_BITMAP_GET(lock.bmap,1,lock.subms,0,n) && op != rsb_op_nop)
719 )
720 RSB_STDOUT("%d/%d [%d~%d,%d~%d] on th.%d -> op %d (dr:%d) (done:%d)\n",n,all_leaf_matrices_n,
721 all_leaf_matrices[n].roff, all_leaf_matrices[n].roff+submatrix->nr-1,
722 all_leaf_matrices[n].coff, all_leaf_matrices[n].coff+submatrix->nc-1,
723 th_id,op,lock.dr,dm);
724
725 switch(op){
726 case rsb_op_spsvl:
727 {
728 /* diagonal blocks */
729 if(RSB__TRSV_OUT__)RSB_STDOUT("spsv on %d on %d \n",n,th_id);
730 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_terminal(submatrix,trhs,tout,&pone[0],incx,incy,transl RSB_OUTER_NRHS_SPSV_ARGS_IDS));
731 //RSB_DO_ERROR_CUMULATE(errval,rsb_do_spsv_terminal(submatrix,trhs,tout,alphap,incx,incy,transl RSB_OUTER_NRHS_SPSV_ARGS_IDS));
732 #pragma omp critical (rsb_spsv_crs)
733 {
734 if(!backdeps)
735 lock.dr=submatrix->nr+roff;
736 else
737 lock.dr=submatrix->roff;
738 rsb__do_lock_release(&lock,th_id); ++lock.dm; dm=lock.dm;
739 }
740 }
741 break;
742 case rsb_op_spmv:
743 {
744 /* antidiagonal blocks */
745 if(RSB__TRSV_OUT__)RSB_STDOUT("spmv on %d on %d \n",n,th_id);
746 //RSB_DO_ERROR_CUMULATE(errval,rsb_spmv_unua(submatrix,trhs,tout,transl));
747 RSB_DO_ERROR_CUMULATE(errval,rsb_do_spmv_non_recursive(submatrix,trhs,tout,&mone[0],NULL,incx,incy,transl RSB_OUTER_NRHS_SPSV_ARGS_IDS));
748 #pragma omp critical (rsb_spsv_crs)
749 {rsb__do_lock_release(&lock,th_id);++lock.dm;dm=lock.dm;}
750 }
751 break;
752 case rsb_op_nop:
753 {
754 }
755 break;
756 default: //rsb_op_spsvlt, rsb_op_spsvu, rsb_op_spsvut, rsb_op_get_csr
757 RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
758 }
759 //if(errval != RSB_ERR_NO_ERROR)
760 // break;
761 //if(op != rsb_op_nop)
762
763 }
764
765 #pragma omp critical (rsb_spsv_crs)
766 { dm=lock.dm; }
767 if(RSB__TRSV_OUT__)RSB_STDOUT("on thread %d : done %d/%d \n",th_id,lock.dm,all_leaf_matrices_n);
768 if(dm<all_leaf_matrices_n
769 #if RSB_WANT_EARLY_PARALLEL_REGION_JUMPOUT_SPSV
770 && ((all_leaf_matrices_n-dm)>th_id)
771 #endif /* RSB_WANT_EARLY_PARALLEL_REGION_JUMPOUT_SPSV */
772 )goto again;
773 skip:
774 RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS;
775 /* FIXME : could place a barrier here. */
776 #pragma omp barrier
777 /* now we can leave the parallel region safely */
778 }
779 err:
780 RSB_CONDITIONAL_FREE(all_leaf_matrices);
781 RSB_CONDITIONAL_FREE(deps);
782 RSB_DO_ERROR_CUMULATE(errval,rsb__do_lock_free(&lock));
783 #if RSB_WANT_VERBOSE_SPSV
784 sv_time=-sv_time+rsb_time();
785 RSB_INFO("SPSV: solve time:%lg deps time:%lg\n",sv_time,dp_time);
786 #endif /* RSB_WANT_VERBOSE_SPSV */
787 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
788 errval = RSB_ERR_UNSUPPORTED_OPERATION;
789 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
790 RSB_DO_ERR_RETURN(errval)
791 }
792
rsb__do_spsv_general(rsb_trans_t transl,const void * alphap,const struct rsb_mtx_t * mtxAp,const void * x,rsb_coo_idx_t incx,void * y,rsb_coo_idx_t incy,enum rsb_op_flags_t op_flags RSB_INNER_NRHS_SPSV_ARGS)793 rsb_err_t rsb__do_spsv_general(rsb_trans_t transl, const void * alphap, const struct rsb_mtx_t * mtxAp, const void * x, rsb_coo_idx_t incx, void * y, rsb_coo_idx_t incy, enum rsb_op_flags_t op_flags RSB_INNER_NRHS_SPSV_ARGS)
794 {
795 /**
796 \ingroup gr_internals
797 computes \f$y \leftarrow \alpha op(A)^{-1} \cdot y \f$
798
799 Entry function for SPSV.
800 alphap can be NULL.
801
802 FIXME : incx and incy check should be stricter
803 FIXME : x!=y are disabled
804 * \return RSB_ERR_NO_ERROR on correct operation, an error code (see \ref errors_section) otherwise.
805 */
806 rsb_err_t errval = RSB_ERR_NO_ERROR;
807
808 if(!mtxAp || !y || !x || transl == RSB_INVALID_FLAGS || !rsb__is_square(mtxAp) || incx<1 || incy<1)
809 {
810 errval = RSB_ERR_BADARGS;goto err;
811 }
812 if(!RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_TRIANGULAR))
813 {
814 errval = RSB_ERR_BADARGS;goto err;
815 }
816 if(RSB__FLAG_HAS_UNSPECIFIED_TRIANGLE(mtxAp->flags))
817 {
818 errval = RSB_ERR_BADARGS;
819 RSB_PERR_GOTO(err,RSB_ERRM_BOH_TRI);
820 }
821 #if 1
822 if(x!=y)
823 {
824 // FIXME: should parallelize this
825 rsb_int_t nrhsi = 0;
826 #if RSB_ENABLE_INNER_NRHS_SPSV
827 for (nrhsi=0;nrhsi<nrhs;++nrhsi)
828 #endif /* RSB_ENABLE_INNER_NRHS_SPSV */
829 {
830 //RSB_DO_ERROR_CUMULATE(errval,rsb__xcopy_strided_typed(y,x,0,0,mtxAp->nr,mtxAp->typecode,incy,incx));
831 RSB_DO_ERROR_CUMULATE(errval,rsb__cblas_Xcopy(mtxAp->typecode,mtxAp->nr,RSB_TYPED_OFF_PTR(mtxAp->typecode,x,nrhsi*(rhsnri)*incx),incx,RSB_TYPED_OFF_PTR(mtxAp->typecode,y,nrhsi*(outnri)*incy),incy));
832 }
833 }
834 #endif
835
836 if(op_flags == RSB_OP_FLAG_INFINITE_PARALLELISM_EMULATE)
837 {
838 errval = rsb_do_spsv_recursive_serial(mtxAp,y,y,alphap,incy,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS);
839 goto done;
840 }
841 if( !rsb__is_recursive_matrix(mtxAp->flags))
842 errval = rsb_do_spsv_terminal(mtxAp,y,y,alphap,incy,incy,transl RSB_OUTER_NRHS_SPSV_ARGS_IDS);
843 else
844 {
845 #if RSB_WANT_OMP_RECURSIVE_KERNELS
846 if(op_flags == RSB_OP_FLAG_WANT_SERIAL)
847 errval = rsb_do_spsv_recursive_serial(mtxAp,y,y,alphap,incy,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS);
848 else
849 errval = rsb__do_spsv_uxua_recursive_parallel(mtxAp,y,y,alphap,incy,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS);
850 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
851 errval = rsb_do_spsv_recursive_serial(mtxAp,y,y,alphap,incy,incy,transl,op_flags RSB_INNER_NRHS_SPSV_ARGS_IDS);
852 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
853 }
854 goto done;
855 done:
856 #if 0
857 {
858 /* FIXME : won't work with when incx or incy is not 1 */
859 rsb_aligned_t checksum[RSB_CONST_ENOUGH_ALIGNED_FOR_ANY_TYPE];
860 RSB_CBLAS_X_SCAL_SPSV(mtxAp->typecode,1,NULL,checksum,1);
861 rsb_nnz_idx_t n;
862 rsb__util_vector_sum(checksum,y,mtxAp->typecode,mtxAp->nr);
863 RSB_STDOUT("#spsv checksum:\n");
864 rsb__debug_print_value(checksum,typecode);
865 RSB_STDOUT("\n");
866 }
867 #endif
868 err:
869 RSB_DO_ERR_RETURN(errval)
870 }
871
rsb__do_spsv(rsb_trans_t transT,const void * alphap,const struct rsb_mtx_t * mtxTp,const void * Xp,rsb_coo_idx_t incX,void * Yp,rsb_coo_idx_t incY)872 rsb_err_t rsb__do_spsv(rsb_trans_t transT, const void * alphap, const struct rsb_mtx_t * mtxTp, const void * Xp, rsb_coo_idx_t incX, void * Yp, rsb_coo_idx_t incY)
873 {
874 return rsb__do_spsv_general(transT,alphap,mtxTp,Xp,incX,Yp,incY,RSB_OP_FLAG_DEFAULT RSB_DEFAULT_OUTER_NRHS_SPSV_ARGS );
875 }
876
877 /* @endcond */
878