1 /*
2
3 Copyright (C) 2008-2021 Michele Martone
4
5 This file is part of librsb.
6
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20
21 */
22 /* @cond INNERDOC */
23 /*!
24 * @file
25 * @author Michele Martone
26 * @brief
27 * This source file contains locks for sparse recursive multicore operations.
28 * */
29 #include "rsb_lock.h"
30
31 RSB_INTERNALS_COMMON_HEAD_DECLS
32
33 #define RSB_WANT_DO_LOCK_TEST 0
34
35 /*
36 TODO: one shall reduce the external interface, e.g. to a single rsb__lock function.
37 */
38
rsb__do_lock_release(struct rsb_rows_lock_struct_t * lock,rsb_thr_t th_id)39 rsb_bool_t rsb__do_lock_release(struct rsb_rows_lock_struct_t *lock, rsb_thr_t th_id)
40 {
41 /* *
42 * \ingroup gr_internals
43 * */
44 if(RSB__TRSV_OUT_)RSB_INFO("thread %d releases %d %d\n",th_id,lock->coresrowf[th_id],lock->coresrowl[th_id]);
45 lock->corescoll[th_id]=RSB_MARKER_COO_VALUE;
46 lock->corescolf[th_id]=RSB_MARKER_COO_VALUE;
47 lock->coresrowl[th_id]=RSB_MARKER_COO_VALUE;
48 lock->coresrowf[th_id]=RSB_MARKER_COO_VALUE;
49 return RSB_BOOL_TRUE;
50 }
51
rsb_do_lock_check_if_matrix_done(const struct rsb_rows_lock_struct_t * lock,rsb_submatrix_idx_t subm)52 static RSB_INLINE rsb_bool_t rsb_do_lock_check_if_matrix_done(const struct rsb_rows_lock_struct_t *lock, rsb_submatrix_idx_t subm)
53 {
54 /**
55 * \ingroup gr_internals
56 * */
57 if(RSB_BITVECTOR_GET(lock->bmap,lock->subms,subm))
58 return RSB_BOOL_TRUE;
59 else
60 return RSB_BOOL_FALSE;
61 }
62
rsb_do_lock_check_interval(const struct rsb_rows_lock_struct_t * lock,rsb_thr_t th_id,rsb_coo_idx_t roff,rsb_coo_idx_t m,rsb_coo_idx_t coff,rsb_coo_idx_t k,rsb_trans_t transA)63 static RSB_INLINE rsb_bool_t rsb_do_lock_check_interval(const struct rsb_rows_lock_struct_t *lock, rsb_thr_t th_id, rsb_coo_idx_t roff, rsb_coo_idx_t m, rsb_coo_idx_t coff, rsb_coo_idx_t k, rsb_trans_t transA)
64 {
65 /**
66 * \ingroup gr_internals
67 * */
68 rsb_thr_t tn;
69 rsb_bool_t want_both=(lock->want_symlock == RSB_BOOL_TRUE);
70
71 if(want_both)
72 {
73 for(tn=0;tn<lock->nt; ++tn)
74 if( tn!=th_id && (
75 ((lock->coresrowf[tn] >= roff) && (lock->coresrowf[tn] < roff+m))
76 || ((lock->coresrowf[tn] <= roff) && (lock->coresrowl[tn]+1 > roff))
77 || ((lock->corescolf[tn] >= coff) && (lock->corescolf[tn] < coff+k))
78 || ((lock->corescolf[tn] <= coff) && (lock->corescoll[tn]+1 > coff))
79
80 || ((lock->coresrowf[tn] >= coff) && (lock->coresrowf[tn] < coff+k))
81 || ((lock->coresrowf[tn] <= coff) && (lock->coresrowl[tn]+1 > coff))
82 || ((lock->corescolf[tn] >= roff) && (lock->corescolf[tn] < roff+m))
83 || ((lock->corescolf[tn] <= roff) && (lock->corescoll[tn]+1 > roff))
84 ))
85 {
86 if(RSB__TRSV_OUT_)RSB_INFO("%d %d blocks %d %d\n",lock->coresrowf[tn],lock->coresrowl[tn],roff,m);
87 goto l_false;
88 }
89 }
90 else
91 {
92 if((RSB_DOES_NOT_TRANSPOSE(transA)) || want_both)
93 for(tn=0;tn<lock->nt; ++tn)
94 if( tn!=th_id
95 && (((lock->coresrowf[tn] >= roff) && (lock->coresrowf[tn] < roff+m))
96 || ((lock->coresrowf[tn] <= roff) && (lock->coresrowl[tn]+1 > roff))))
97 {
98 if(RSB__TRSV_OUT_)RSB_INFO("%d %d blocks %d %d\n",lock->coresrowf[tn],lock->coresrowl[tn],roff,m);
99 goto l_false;
100 }
101
102 if(RSB_DOES_TRANSPOSE(transA) || want_both)
103 for(tn=0;tn<lock->nt; ++tn)
104 if( tn!=th_id
105 && (((lock->corescolf[tn] >= coff) && (lock->corescolf[tn] < coff+k))
106 || ((lock->corescolf[tn] <= coff) && (lock->corescoll[tn]+1 > coff))))
107 {
108 if(RSB__TRSV_OUT_)RSB_INFO("%d %d blocks %d %d\n",lock->coresrowf[tn],lock->coresrowl[tn],coff,k);
109 goto l_false;
110 }
111 }
112 return RSB_BOOL_TRUE;
113 l_false:
114 return RSB_BOOL_FALSE;
115 }
116
117 /* sets only the interval info for a given thread */
118 #define RSB_DO_LOCK_INTERVALS(LOCK,TH_ID,R0,R,C0,C) \
119 (LOCK)->coresrowf[(TH_ID)]=(R0), (LOCK)->coresrowl[(TH_ID)]=(R0)+((R)-1), \
120 (LOCK)->corescolf[(TH_ID)]=(C0), (LOCK)->corescoll[(TH_ID)]=(C0)+((C)-1)
121
122 #define RSB_DO_LOCK_INTERVAL(LOCK,TH_ID,R0,R) \
123 (LOCK)->coresrowf[(TH_ID)]=(R0), (LOCK)->coresrowl[(TH_ID)]=(R0)+((R)-1), \
124 (LOCK)->corescolf[(TH_ID)]=(R0), (LOCK)->corescoll[(TH_ID)]=(R0)+((R)-1) /* FIXME: is there a reason for redundance ? */
125
126 /* FIXME: actually, this is the interval +1 */
127 #define RSB_GET_LOCK_INTERVAL_W(LOCK,TH_ID,R0,R1) \
128 (R0)=(LOCK)->coresrowf[(TH_ID)], (R1)=(LOCK)->coresrowl[(TH_ID)]+1
129 #define RSB_GET_LOCK_INTERVAL_L(LOCK,TH_ID,R0,R1) \
130 (R0)=(LOCK)->coresrolf[(TH_ID)], (R1)=(LOCK)->coresroll[(TH_ID)]+1
131
132 #if 0
133 #define RSB_GET_LOCK_INTERVALS(LOCK,TH_ID,R0,R,C0,C) \
134 (R0)=(LOCK)->coresrowf[(TH_ID)], \
135 (C0)=(LOCK)->corescolf[(TH_ID)], \
136 (R)=(LOCK)->coresrowl[(TH_ID)]-(R0)+1, \
137 (C)=(LOCK)->corescoll[(TH_ID)]-(C0)+1
138 #endif
139
rsb__do_lock_get(struct rsb_rows_lock_struct_t * lock,rsb_thr_t th_id,rsb_coo_idx_t roff,rsb_coo_idx_t m,rsb_coo_idx_t coff,rsb_coo_idx_t k,rsb_submatrix_idx_t subm,rsb_trans_t transA)140 rsb_bool_t rsb__do_lock_get(struct rsb_rows_lock_struct_t *lock, rsb_thr_t th_id, rsb_coo_idx_t roff, rsb_coo_idx_t m, rsb_coo_idx_t coff, rsb_coo_idx_t k, rsb_submatrix_idx_t subm, rsb_trans_t transA)
141 {
142 /**
143 * \ingroup gr_internals
144 * */
145 #if 0
146 if(th_id)
147 if(RSB__TRSV_OUT_)RSB_INFO("blocked by %p %d @ %d .. %d\n",lock->bmap,lock->subms,th_id,subm);
148 #endif
149
150 if(RSB_BITVECTOR_GET(lock->bmap,lock->subms,subm))
151 goto l_false;
152
153 if(lock->want_fake_lock == RSB_BOOL_TRUE)
154 goto l_true; /* debug only : no locked rows check */
155
156 if(!rsb_do_lock_check_interval(lock,th_id,roff,m,coff,k,transA))
157 goto l_false;
158
159 RSB_DO_LOCK_INTERVALS(lock,th_id,roff,m,coff,k);
160
161 if(RSB__TRSV_OUT_)RSB_INFO("thread %d locks %d %d with matrix %d\n",th_id,lock->coresrowf[th_id],lock->coresrowl[th_id],subm);
162 l_true:
163 /*
164 * WARNING : this does not mean that the matrix is 'done'.
165 * It only means that the matrix is now assigned to some core, and it will be processed soon.
166 * The guarantee that the matrix is done will be given us only by the lock-if this matrix
167 * is marked AND its row (or column) interval is free, then the matrix is done (in SPSV/SPMV).
168 * */
169 RSB_BITVECTOR_SET(lock->bmap,lock->subms,subm);
170 return RSB_BOOL_TRUE;
171 l_false:
172 return RSB_BOOL_FALSE;
173 }
174
rsb__do_lock_init(struct rsb_rows_lock_struct_t * lock,rsb_int_t num_threads,rsb_submatrix_idx_t subms,const struct rsb_mtx_t * mtxAp,enum rsb_op_flags_t op_flags)175 rsb_err_t rsb__do_lock_init(struct rsb_rows_lock_struct_t *lock, rsb_int_t num_threads, rsb_submatrix_idx_t subms, const struct rsb_mtx_t * mtxAp, enum rsb_op_flags_t op_flags)
176 {
177 /**
178 * \ingroup gr_internals
179 * */
180 rsb_int tn;
181
182 if(!mtxAp || !lock)
183 return RSB_ERR_BADARGS;
184
185 RSB_BZERO_P(lock);
186 lock->nt=num_threads;
187 for(tn=0;tn<RSB_CONST_MAX_SUPPORTED_CORES; ++tn)
188 lock->corescolf[tn]=RSB_MARKER_COO_VALUE, lock->corescoll[tn]=RSB_MARKER_COO_VALUE,
189 lock->coresrowf[tn]=RSB_MARKER_COO_VALUE, lock->coresrowl[tn]=RSB_MARKER_COO_VALUE;
190 lock->dm=0;
191 lock->subms=subms;
192 lock->want_symlock = rsb__is_not_unsymmetric(mtxAp);
193 lock->want_fake_lock=(op_flags == RSB_OP_FLAG_FAKE_LOCK);
194 lock->bmap = rsb__allocate_bitvector(subms);
195 return (lock->bmap!=NULL)?RSB_ERR_NO_ERROR:RSB_ERR_ENOMEM;
196 }
197
rsb__do_lock_free(struct rsb_rows_lock_struct_t * lock)198 rsb_err_t rsb__do_lock_free(struct rsb_rows_lock_struct_t *lock)
199 {
200 /**
201 * \ingroup gr_internals
202 * */
203 if(!lock)
204 return RSB_ERR_BADARGS;
205 RSB_CONDITIONAL_FREE(lock->bmap);
206 return RSB_ERR_NO_ERROR;
207 }
208
209 /* BEGIN EXPERIMENTAL CODE */
210
211 #if RSB_WANT_DO_LOCK_TEST
rsb_do_log2(size_t n)212 size_t static rsb_do_log2(size_t n)
213 {
214 /*!
215 * \ingroup gr_internals
216 * FIXME : document this
217 */
218 size_t res = 0;
219 while(n /= 2)
220 ++res;
221 return res;
222 }
223 #endif /* RSB_WANT_DO_LOCK_TEST */
224
225 #define RSB_MULTINT_BY_TWO(X) ((X)<<1) /* FIXME: this is not portable */
226 #define RSB_UPPER_BOUNDING_LOG2(X) (rsb_do_log2(rsb__nearest_power_of_two(X)))
227 #define RSB_LOUD_BTILS_TESTING 0 /* */
228 #define RSB_LOUD_MVL_TESTING 0 /* multivector lock */
229 #define RSB_LOUD_MVR_TESTING 0 /* multivector reduce */
230 #define RSB_INHIBIT_MULTIVECTOR 1 /* multivector reduce */
231 #define RSB_INHIBIT_REDUCE 0 /* multivector reduce */
232
rsb_do_btils_init(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t itl,rsb_coo_idx_t nlevels)233 static rsb_err_t rsb_do_btils_init(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t itl, rsb_coo_idx_t nlevels)
234 {
235 /**
236 * \ingroup gr_internals
237 * Initializes a lock structure.
238 * The input structure shall be freshly instantiated or freed.
239 * In case of error, it is safe but not required to call rsb_do_btils_free() to free it.
240 */
241 rsb_err_t errval = RSB_ERR_NO_ERROR;
242
243 if(!lock || nlevels<0)
244 {
245 errval = RSB_ERR_BADARGS;
246 RSB_PERR_GOTO(err,RSB_ERRM_ES);
247 }
248 RSB_BZERO_P(lock);
249 lock->bmap=NULL;
250 lock->nlevels=nlevels;
251 lock->itl=itl;
252 lock->mvleaves = RSB_POWER_OF_2(nlevels);
253 lock->bsz=(2*lock->mvleaves-1);
254 /* FIXME: need a check on nlevels */
255 lock->bmap = rsb__allocate_bitvector(lock->bsz);
256 lock->tmap = rsb__allocate_bitvector(lock->bsz);
257 if(!lock->bmap || !lock->tmap)
258 {
259 RSB_CONDITIONAL_FREE(lock->bmap);
260 RSB_CONDITIONAL_FREE(lock->tmap);
261 errval = RSB_ERR_ENOMEM;
262 RSB_PERR_GOTO(err,RSB_ERRM_ES);
263 }
264 err:
265 return errval;
266 }
267
rsb_do_btils_free(struct rsb_bti_lock_struct * lock)268 static rsb_err_t rsb_do_btils_free(struct rsb_bti_lock_struct * lock)
269 {
270 /**
271 * \ingroup gr_internals
272 * Frees a lock structure.
273 * The input structure shall be initialized with success.
274 * */
275 if(!lock)
276 return RSB_ERR_BADARGS;
277 RSB_CONDITIONAL_FREE(lock->bmap);
278 RSB_CONDITIONAL_FREE(lock->tmap);
279 return RSB_ERR_NO_ERROR;
280 }
281
rsb_do_rindex_to_lindex(rsb_coo_idx_t r0,rsb_coo_idx_t r1,rsb_coo_idx_t n,rsb_coo_idx_t nlevels)282 static rsb_coo_idx_t rsb_do_rindex_to_lindex(rsb_coo_idx_t r0, rsb_coo_idx_t r1, rsb_coo_idx_t n, rsb_coo_idx_t nlevels)
283 {
284 /**
285 * \ingroup gr_internals
286 * */
287 rsb_coo_idx_t l0=0,l1=0,doffset=1,offset=0;
288 rsb_coo_idx_t n0=n,n1=n;
289 rsb_int i,delta=0;
290 if(nlevels<1)
291 {
292 return 0;
293 }
294 if(r1==n1)
295 l1=2,r1=0;
296 for(i=0;i<nlevels;++i)
297 {
298 rsb_coo_idx_t m0=RSB_MIDDLE(n0);
299 rsb_coo_idx_t m1=RSB_MIDDLE(n1);
300
301 if(r0>=m0)
302 r0-=m0,++l0,n0-=m0;
303 else
304 n0=m0;
305 if(r1>=m1)
306 r1-=m1,++l1,n1-=m1;
307 else
308 n1=m1;
309
310 if(i<nlevels-1)
311 l0*=2,l1*=2;
312 }
313 #if 0
314 RSB_INFO("%d!\n",l1-l0);
315 #endif
316 delta=l1-l0;
317 l0=l0/(l1-l0);
318 offset = RSB_POWER_OF_2(nlevels)-1;
319 doffset = RSB_POWER_OF_2(nlevels-1);
320 for( ;delta>1;delta/=2,doffset/=2)
321 offset-=doffset;
322
323 if(RSB_LOUD_BTILS_TESTING)
324 RSB_INFO("@ bit %d + %d\n",l0,offset);
325 return offset+l0;
326 }
327
rsb_do_btils_lock_update_tmap(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t i)328 static rsb_bool_t rsb_do_btils_lock_update_tmap(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t i)
329 {
330 rsb_coo_idx_t iu,il,ii;
331 /* we taint the vector: after a lock, it will mark the interval as tainted
332 * (as opposed to the cases where an untainted vector is unlocked (e.g.: after a reduce)) */
333 RSB_BITVECTOR_SET(lock->tmap,lock->bsz,i);
334 /* did already any ancestor taint all way up ? */
335 for(iu=(i-1)/2;iu>0;iu=(iu-1)/2)
336 {
337 /* TODO: could speed up a little while by inverting the visit order */
338 if(RSB_LOUD_BTILS_TESTING)
339 RSB_INFO("updating tmap\n");
340 if(RSB_BITVECTOR_GET(lock->tmap,lock->bsz,iu))
341 goto l_done;
342 }
343 /* no ancestor tainted all way up */
344 RSB_ASSERT(iu==0);
345 if(RSB_BITVECTOR_GET(lock->tmap,lock->bsz,iu))
346 goto l_done;
347 if(RSB_LOUD_BTILS_TESTING)
348 RSB_INFO("reducing taint map:\n"),rsb__do_dump_bitmap(lock->tmap,1,lock->bsz),RSB_INFO("\n");
349
350 /* we look for neighbor leaves needing collapse, at any upper level */
351 while(i>0)
352 {
353 il=2*((i-1)/2 )+1;
354 iu=2*((i-1)/2+1)+1;
355 for(ii=il;ii<iu;++ii)
356 if(!RSB_BITVECTOR_GET(lock->tmap,lock->bsz,ii))
357 goto skip;/* The sibling interval is not tainted: we may stop merging here.
358 Pay attention: some descendant of ours may have still its bit set despite
359 at this level we are done with merging: thus that bit would be obsolete,
360 and it could be possible for it to remain.
361 This does not cause harm, so we don't force bit-clear to lower nodes, here.
362 */
363 /* merge the current subtree */
364 for(ii=il;ii<iu;++ii)
365 RSB_BITVECTOR_UNSET(lock->tmap,lock->bsz,ii);
366 /* collapse to the upper node */
367 i=(i-1)/2;
368 RSB_BITVECTOR_SET(lock->tmap,lock->bsz,i);
369 continue;
370 skip:
371 i=(i-1)/2;
372 }
373 /* the taint map is done */
374
375 l_done:
376 if(RSB_LOUD_BTILS_TESTING)
377 RSB_INFO("taint map:\n"),rsb__do_dump_bitmap(lock->tmap,1,lock->bsz),RSB_INFO("\n");
378 return RSB_BOOL_TRUE;
379 }
380
rsb_do_btils_lock_probe_inner(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t i)381 static RSB_INLINE rsb_bool_t rsb_do_btils_lock_probe_inner(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t i)
382 {
383 /**
384 * */
385 rsb_coo_idx_t iu,il;
386 rsb_coo_idx_t ili=2,ilii;
387 RSB_ASSERT(lock);
388 RSB_ASSERT(i>=0);
389
390 if(RSB_BITVECTOR_GET(lock->bmap,lock->bsz,i))
391 goto l_false;
392
393 for(iu=(i-1)/2;iu>0;iu=(iu-1)/2)
394 {
395 #if 0
396 if(1) RSB_INFO("checking bit .. %d:%d\n",iu,RSB_BOOL_TRUE == RSB_BITVECTOR_GET(lock->bmap,lock->bsz,iu));
397 #endif
398 if(RSB_BITVECTOR_GET(lock->bmap,lock->bsz,iu))
399 goto l_false;
400 }
401 if(RSB_BITVECTOR_GET(lock->bmap,lock->bsz,iu))/* iu==0 */
402 goto l_false;
403 for(il=2*i+1;il<lock->bsz;il=2*il+1,ili*=2)
404 {
405 for(ilii=0;ilii<ili;++ilii)
406 if(RSB_BITVECTOR_GET(lock->bmap,lock->bsz,il+ilii))
407 goto l_false;
408 }
409
410 return RSB_BOOL_TRUE;
411 l_false:
412 return RSB_BOOL_FALSE;
413 }
414
rsb_do_btils_lock_probe(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t m0,rsb_coo_idx_t m1,rsb_coo_idx_t * ip)415 static rsb_bool_t rsb_do_btils_lock_probe(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t m0, rsb_coo_idx_t m1, rsb_coo_idx_t *ip)
416 {
417 /**
418 * */
419 rsb_coo_idx_t i;
420 RSB_ASSERT(lock);
421 RSB_ASSERT(ip);
422
423 i = rsb_do_rindex_to_lindex(m0,m1,lock->itl,lock->nlevels);
424 if(!rsb_do_btils_lock_probe_inner(lock,i))
425 goto l_false;
426 *ip=i;
427 return RSB_BOOL_TRUE;
428 l_false:
429 return RSB_BOOL_FALSE;
430 }
431
rsb_do_btils_lock_get_sym(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t m0,rsb_coo_idx_t m1,rsb_coo_idx_t k0,rsb_coo_idx_t k1,rsb_trans_t transA,rsb_coo_idx_t * ip,rsb_coo_idx_t * jp)432 static rsb_bool_t rsb_do_btils_lock_get_sym(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t m0, rsb_coo_idx_t m1, rsb_coo_idx_t k0, rsb_coo_idx_t k1, rsb_trans_t transA, rsb_coo_idx_t *ip, rsb_coo_idx_t *jp)
433 {
434 /**
435 * \ingroup gr_internals
436 * */
437 rsb_coo_idx_t i,j;
438 if(!rsb_do_btils_lock_probe(lock,m0,m1,&i))
439 goto l_false;
440 j=i;
441 if((m0!=k0) && (m1!=k1))
442 if(!rsb_do_btils_lock_probe(lock,k0,k1,&j))
443 goto l_false;
444
445 RSB_BITVECTOR_SET(lock->bmap,lock->bsz,i);
446 if(i!=j)
447 RSB_BITVECTOR_SET(lock->bmap,lock->bsz,j);
448 if(RSB_LOUD_BTILS_TESTING)
449 rsb__do_dump_bitmap(lock->bmap,1,lock->bsz),RSB_INFO(" (%d)\n",lock->bsz);
450
451 /* we're going to lock up to i */
452 if(RSB_LOUD_BTILS_TESTING)
453 RSB_INFO("(nlev=%d)(%d .. %d) -> %d ok\n",lock->nlevels,m0,m1,i);
454
455 /* TODO: update the taint vector accordingly */
456 rsb_do_btils_lock_update_tmap(lock,i);
457 if(i!=j)
458 rsb_do_btils_lock_update_tmap(lock,j);
459
460 if(RSB_DOES_TRANSPOSE(transA))
461 RSB_SWAP(rsb_coo_idx_t,i,j);
462
463 *ip=i;
464 *jp=j;
465
466 return RSB_BOOL_TRUE;
467 l_false:
468 if(RSB_LOUD_BTILS_TESTING)
469 RSB_INFO("(nlev=%d)(%d .. %d) -> (%d %d) busy \n",lock->nlevels,m0,m1,i,j);
470 return RSB_BOOL_FALSE;
471 }
472
rsb_do_btils_lock_get(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t m0,rsb_coo_idx_t m1,rsb_trans_t transA,rsb_coo_idx_t * ip,rsb_coo_idx_t * jp)473 static rsb_bool_t rsb_do_btils_lock_get(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t m0, rsb_coo_idx_t m1, rsb_trans_t transA, rsb_coo_idx_t *ip, rsb_coo_idx_t *jp)
474 {
475 /**
476 * \ingroup gr_internals
477 * */
478 rsb_coo_idx_t i = RSB_MARKER_COO_VALUE,j = RSB_MARKER_COO_VALUE;
479 if(!rsb_do_btils_lock_probe(lock,m0,m1,&i))
480 goto l_false;
481
482 RSB_BITVECTOR_SET(lock->bmap,lock->bsz,i);
483 if(RSB_LOUD_BTILS_TESTING)
484 rsb__do_dump_bitmap(lock->bmap,1,lock->bsz),RSB_INFO(" (%d)\n",lock->bsz);
485
486 /* we're going to lock up to i */
487 if(RSB_LOUD_BTILS_TESTING)
488 RSB_INFO("(nlev=%d)(%d .. %d) -> %d ok\n",lock->nlevels,m0,m1,i);
489
490 /* TODO: update the taint vector accordingly */
491 rsb_do_btils_lock_update_tmap(lock,i);
492
493 if(RSB_DOES_TRANSPOSE(transA))
494 RSB_SWAP(rsb_coo_idx_t,i,j);
495
496 *ip=i,*jp=j;
497
498 return RSB_BOOL_TRUE;
499 l_false:
500 if(RSB_LOUD_BTILS_TESTING)
501 RSB_INFO("(nlev=%d)(%d .. %d) -> %d busy \n",lock->nlevels,m0,m1,i);
502 return RSB_BOOL_FALSE;
503 }
504
rsb_do_get_interval_info_from_btils_lock(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t i,rsb_coo_idx_t * m0p,rsb_coo_idx_t * m1p)505 static rsb_err_t rsb_do_get_interval_info_from_btils_lock(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t i, rsb_coo_idx_t *m0p, rsb_coo_idx_t * m1p)
506 {
507 /**
508 * \ingroup gr_internals
509 * FIXME: unfinished
510 * */
511 rsb_coo_idx_t m0=0,m1=lock->itl,h=lock->itl,iu,l=0,ii=0,pot=1, nl=0;
512
513 for(iu=i;iu>0;iu=(iu-1)/2)
514 {
515 ii = RSB_MULTINT_BY_TWO(ii);
516 if(RSB_IS_INTEGER_EVEN(iu))
517 ++ii;
518 ++nl;
519 }
520
521 for(l=0;l<nl;++l)
522 {
523 if(ii&pot)
524 m0+=RSB_MIDDLE(h),
525 h=h-RSB_MIDDLE(h);
526 else
527 m1-=h-RSB_MIDDLE(h),
528 h = RSB_MIDDLE(h);
529 #if 0
530 RSB_INFO("BIBO: ii=%d m0=%d, h=%d, pot=%d\n",ii,m0,h,pot);
531 #endif
532 pot = RSB_MULTINT_BY_TWO(pot);
533 }
534 *m0p=m0;
535 *m1p=m1;
536 return RSB_ERR_NO_ERROR;
537 }
538
rsb_do_btils_lock_release_inner(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t i)539 void RSB_INLINE rsb_do_btils_lock_release_inner(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t i)
540 {
541 /**
542 * \ingroup gr_internals
543 * FIXME: does this call free one or two intervals ?
544 * */
545 if(RSB_LOUD_BTILS_TESTING)
546 rsb__do_dump_bitmap(lock->bmap,1,lock->bsz),RSB_INFO(" (%d)\n",lock->bsz);
547 RSB_BITVECTOR_UNSET(lock->bmap,lock->bsz,i);
548 if(RSB_LOUD_BTILS_TESTING)
549 {
550 rsb_coo_idx_t m0,m1;
551 rsb_do_get_interval_info_from_btils_lock(lock,i,&m0,&m1);
552 RSB_INFO("freeing (%d .. %d)\n",m0,m1),
553 rsb__do_dump_bitmap(lock->bmap,1,lock->bsz),RSB_INFO(" (%d)\n",lock->bsz);
554 }
555 }
556
557 #if RSB_WANT_DO_LOCK_TEST
rsb_do_btils_lock_release(struct rsb_bti_lock_struct * lock,rsb_coo_idx_t m0,rsb_coo_idx_t m1)558 static rsb_err_t rsb_do_btils_lock_release(struct rsb_bti_lock_struct * lock, rsb_coo_idx_t m0, rsb_coo_idx_t m1)
559 {
560 /**
561 * \ingroup gr_internals
562 * FIXME: does this call free one or two intervals ?
563 * FIXME: deprecated
564 * */
565 rsb_coo_idx_t i;
566 if(!lock)
567 return RSB_ERR_BADARGS;
568 i = rsb_do_rindex_to_lindex(m0,m1,lock->itl,lock->nlevels);
569 RSB_ASSERT(i>=0);
570 rsb_do_btils_lock_release_inner(lock,i);
571 return RSB_ERR_NO_ERROR;
572 }
573 #endif /* RSB_WANT_DO_LOCK_TEST */
574
575 #define RSB_MV_OFFSET(LOCK,INDEX,OFFSET) \
576 ((((rsb_char_t *)((LOCK)->mv[INDEX]))) +(LOCK)->el_size*(OFFSET))
577
rsb__do_mv_lock_release_single(struct rsb_mv_lock_t * lock,rsb_thr_t th_id,rsb_char_t * ov)578 static rsb_err_t rsb__do_mv_lock_release_single(struct rsb_mv_lock_t *lock, rsb_thr_t th_id, rsb_char_t *ov)
579 {
580 /* *
581 * \ingroup gr_internals
582 * */
583 rsb_coo_idx_t nvi = RSB_MARKER_COO_VALUE;
584
585 /* in the case the locked vector was the master one */
586 if(ov==lock->ov)
587 {
588 if(RSB_LOUD_MVL_TESTING)
589 RSB_INFO("releasing master vector from thread %d\n",th_id);
590 rsb__do_lock_release(&(lock->olock),th_id);
591 goto ok;
592 }
593
594 /* in the case the locked vector was not the master one */
595 for(nvi=0;nvi<lock->nv;++nvi)
596 if( (ov >= RSB_MV_OFFSET(lock,nvi,0)) && (ov<RSB_MV_OFFSET(lock,nvi+1,0)))
597 {
598 struct rsb_bti_lock_struct * vlock=&(lock->locks[nvi]);
599 /* we localized the vector. now we shall see if that interval was locked */
600 if(RSB_LOUD_MVL_TESTING)
601 {
602 RSB_INFO("releasing vector %d from thread %d\n",nvi,th_id);
603 /* if(RSB_BITVECTOR_GET(vlock->bmap,vlock->bsz,rsb_do_rindex_to_lindex(roff,roff+m,vlock->itl,vlock->nlevels)))
604 RSB_INFO("freeing interval %d .. %d on vector %d (thread %d)\n",roff,roff+m,nvi,th_id);
605 else
606 {RSB_INFO("guessed pointer !?\n");goto failure;}*/
607 }
608 if(lock->it[th_id]!=RSB_MARKER_COO_VALUE)
609 {
610 if(RSB_LOUD_MVL_TESTING) RSB_INFO("releasing inner\n");
611 rsb_do_btils_lock_release_inner(vlock,lock->it[th_id]);
612 }
613 if(lock->in[th_id]!=RSB_MARKER_COO_VALUE)
614 {
615 if(RSB_LOUD_MVL_TESTING) RSB_INFO("releasing inner\n");
616 rsb_do_btils_lock_release_inner(vlock,lock->in[th_id]);
617 }
618 lock->it[th_id]=RSB_MARKER_COO_VALUE;
619 lock->in[th_id]=RSB_MARKER_COO_VALUE;
620 goto ok;
621 }
622 #if 0
623 failure:
624 if(RSB_LOUD_MVL_TESTING)
625 RSB_INFO("did not find a vector to release for thread %d\n",th_id);
626 return RSB_ERR_GENERIC_ERROR;
627 #endif
628 ok:
629 return RSB_ERR_NO_ERROR;
630 }
631
rsb__do_mv_lock_release(struct rsb_mv_lock_t * lock,rsb_thr_t th_id,rsb_char_t * ov)632 rsb_err_t rsb__do_mv_lock_release(struct rsb_mv_lock_t *lock, rsb_thr_t th_id, rsb_char_t *ov)
633 {
634 /* *
635 * \ingroup gr_internals
636 * */
637 #if RSB_LOUD_MVL_TESTING
638 #if 0
639 rsb_coo_idx_t roff = RSB_MARKER_COO_VALUE,m = RSB_MARKER_COO_VALUE,coff = RSB_MARKER_COO_VALUE,k = RSB_MARKER_COO_VALUE;
640 #endif
641 #endif /* RSB_LOUD_MVL_TESTING */
642 rsb_bool_t is_reduce_only = RSB_BOOL_TRUE;/* FIXME: ?? */
643 rsb_err_t errval = RSB_ERR_NO_ERROR;
644 RSB_ASSERT(lock);
645 RSB_ASSERT(ov);
646 RSB_ASSERT(th_id>=0);
647 #if 0
648 RSB_GET_LOCK_INTERVALS(&(lock->olock),th_id,roff,m,coff,k);
649 #endif
650 errval = rsb__do_mv_lock_release_single(lock,th_id,ov);
651 if(RSB_SOME_ERROR(errval))
652 {
653 RSB_PERR_GOTO(failure,RSB_ERRM_ES);
654 }
655 if(is_reduce_only)
656 goto reduce_ok;
657 else
658 goto ok;
659 reduce_ok:
660 #if 0
661 RSB_ASSERT(roff>=0); RSB_ASSERT(m>=0); RSB_ASSERT(th_id>=0);
662 if(RSB_LOUD_MVL_TESTING)
663 RSB_INFO("freeing interval %d .. %d on master vector (thread %d) \n",roff,roff+m,th_id);
664 /* it may still be the master vector, or a random pointer :) */
665 rsb__do_lock_release(&(lock->olock),th_id);
666 #endif
667 goto ok;
668 failure:
669 #if 0
670 RSB_ASSERT(roff>=0); RSB_ASSERT(m>=0); RSB_ASSERT(th_id>=0);
671 if(RSB_LOUD_MVL_TESTING)
672 RSB_INFO("not freeing interval %d .. %d\n",roff,roff+m);
673 #endif
674 ok:
675 return RSB_ERR_NO_ERROR;
676 }
677
rsb_do_is_bitmap_blank(rsb_bitmap_data_t * bmap,rsb_coo_idx_t r,rsb_coo_idx_t c)678 static rsb_bool_t rsb_do_is_bitmap_blank(rsb_bitmap_data_t *bmap, rsb_coo_idx_t r, rsb_coo_idx_t c)
679 {
680 /* *
681 * \ingroup gr_internals
682 * FIXME: new, untested
683 * */
684 size_t bs = RSB_WORDS_PER_BITMAP(r,c);
685 rsb_coo_idx_t i;
686 for(i=0;i<bs;++i)
687 {
688 if(bmap[i])
689 return RSB_BOOL_FALSE;
690 }
691 return RSB_BOOL_TRUE;
692 }
693
rsb_do_is_bitvector_blank(rsb_bitmap_data_t * bmap,rsb_coo_idx_t c)694 static rsb_bool_t rsb_do_is_bitvector_blank(rsb_bitmap_data_t *bmap, rsb_coo_idx_t c)
695 {
696 /* *
697 * \ingroup gr_internals
698 * */
699 return rsb_do_is_bitmap_blank(bmap,1,c);
700 }
701
rsb_do_mv_lock_is_used(struct rsb_mv_lock_t * lock)702 rsb_bool_t rsb_do_mv_lock_is_used(struct rsb_mv_lock_t *lock)
703 {
704 /* *
705 * \ingroup gr_internals
706 * */
707 rsb_coo_idx_t nvi;
708 for(nvi=0;nvi<lock->nv;++nvi)
709 {
710 struct rsb_bti_lock_struct * vlock=&(lock->locks[nvi]);
711 if(!rsb_do_is_bitvector_blank(vlock->bmap,vlock->bsz))
712 return RSB_BOOL_TRUE;
713 }
714 return RSB_BOOL_FALSE;
715 }
716
rsb_do_mv_lock_is_tainted(struct rsb_mv_lock_t * lock)717 rsb_bool_t rsb_do_mv_lock_is_tainted(struct rsb_mv_lock_t *lock)
718 {
719 /* *
720 * \ingroup gr_internals
721 * */
722 rsb_coo_idx_t nvi;
723 for(nvi=0;nvi<lock->nv;++nvi)
724 {
725 struct rsb_bti_lock_struct * vlock=&(lock->locks[nvi]);
726 if(!rsb_do_is_bitvector_blank(vlock->tmap,vlock->bsz))
727 return RSB_BOOL_TRUE;
728 }
729 return RSB_BOOL_FALSE;
730 }
731
rsb__do_mv_lock_free(struct rsb_mv_lock_t * lock)732 rsb_err_t rsb__do_mv_lock_free(struct rsb_mv_lock_t *lock)
733 {
734 /* *
735 * \ingroup gr_internals
736 * */
737 rsb_coo_idx_t nvi;
738 rsb_err_t errval = RSB_ERR_NO_ERROR;
739
740 #if !RSB_INHIBIT_MULTIVECTOR
741 rsb_bool_t tainted = rsb_do_mv_lock_is_tainted(lock);
742 #endif /* RSB_INHIBIT_MULTIVECTOR */
743 if(RSB_LOUD_MVL_TESTING)
744 {
745 if(lock->nv)
746 RSB_INFO("taint maps:\n");
747 for(nvi=0;nvi<lock->nv;++nvi)
748 rsb__do_dump_bitmap(lock->locks[nvi].tmap,1,lock->locks[nvi].bsz),RSB_INFO("\n");
749 }
750
751 /* FIXME: TODO: reduce all of the vectors, here. */
752 #if !RSB_INHIBIT_MULTIVECTOR
753 #if RSB_INHIBIT_REDUCE
754 /* no reduce. this will produce wrong results, of course */
755 #else /* RSB_INHIBIT_REDUCE */
756 if(rsb_do_mv_lock_is_used(lock))
757 {
758 errval = RSB_ERR_INTERNAL_ERROR;
759 RSB_PERR_GOTO(err,"no vector should not be in use before reducing!");
760 }
761 #if 0
762 /* this approach is likely to be faster for high nnz/row cases */
763 if(RSB_LOUD_MVR_TESTING)
764 RSB_INFO("summing up (%d) vectors to the master (strided %d)\n",lock->nv,lock->incov);
765 if(RSB_LOUD_MVR_TESTING)
766 RSB_INFO("on master vector:\n"),
767 RSB_DO_ERROR_CUMULATE(errval,rsb__do_print_some_vector_stats(lock->ov,lock->typecode,lock->itl));
768 for(nvi=0;nvi<lock->nv;++nvi)
769 {
770 if(RSB_LOUD_MVR_TESTING)
771 RSB_INFO("on vector %d:\n",nvi),
772 RSB_DO_ERROR_CUMULATE(errval,rsb__do_print_some_vector_stats(lock->mv[nvi],lock->typecode,lock->itl));
773 rsb__vectors_left_sum_reduce_and_zero(lock->ov,lock->mv[nvi],lock->typecode,lock->itl,lock->incov,0);
774 }
775 if(RSB_LOUD_MVR_TESTING)
776 RSB_INFO("\n");
777 #else
778 #if 1
779 #pragma omp parallel shared(tainted) RSB_NTC
780 {
781 rsb_thr_t th_id = omp_get_thread_num();
782 rsb_coo_idx_t oincy=lock->incov,rh,r0;
783 rsb_char_t *ov=NULL;
784 extern struct rsb_session_handle_t rsb_global_session_handle;
785
786 if(th_id>=lock->nv)
787 goto skip;
788 if(th_id >= rsb_global_session_handle.rsb_want_threads)
789 goto skip;
790
791 while(tainted)
792 {
793
794 if(RSB_LOUD_MVR_TESTING)
795 {
796 if(lock->nv)
797 RSB_INFO("taint maps:\n");
798 for(nvi=0;nvi<lock->nv;++nvi)
799 rsb__do_dump_bitmap(lock->locks[nvi].tmap,1,lock->locks[nvi].bsz),RSB_INFO(" (%d)\n",rsb_do_mv_lock_is_tainted(lock));
800 if(lock->nv)
801 RSB_INFO("use maps:\n");
802 for(nvi=0;nvi<lock->nv;++nvi)
803 rsb__do_dump_bitmap(lock->locks[nvi].bmap,1,lock->locks[nvi].bsz),RSB_INFO(" (%d)\n",rsb_do_mv_lock_is_tainted(lock));
804 }
805
806 ov=lock->ov;
807 #pragma omp critical (rsb_lock_crs)
808 { rsb__do_pick_candidate_interval_for_reduce(lock,th_id,&ov,&r0,&rh); }
809
810 if(ov && ov!=lock->ov)
811 {
812 if(RSB_LOUD_MVR_TESTING)
813 RSB_INFO("%d .. %d (incov = %d)\n",r0,rh,oincy);
814 rsb__vectors_left_sum_reduce_and_zero(lock->ov,ov,lock->typecode,rh,oincy,r0);/*wrong ?*/
815 #if 0
816 rsb__vectors_left_sum_reduce_and_zero(lock->ov,ov,lock->typecode,lock->itl,oincy,0);/*~works*/
817 rsb__vectors_left_sum_reduce_and_zero(lock->ov,ov,lock->typecode,lock->itl,lock->incov,0);/*~works*/
818 #endif
819 #pragma omp critical (rsb_lock_crs)
820 { rsb__do_release_candidate_interval_for_reduce(lock,th_id,ov,r0,rh); }
821 }
822 #pragma omp critical (rsb_lock_crs)
823 { tainted = rsb_do_mv_lock_is_tainted(lock); }
824 }
825 skip:
826 #pragma omp barrier
827 RSB_NULL_STATEMENT_FOR_COMPILER_HAPPINESS;
828 }
829 #else
830 if(RSB_LOUD_MVR_TESTING)
831 {
832 if(lock->nv)
833 RSB_INFO("taint maps:\n");
834 for(nvi=0;nvi<lock->nv;++nvi)
835 rsb__do_dump_bitmap(lock->locks[nvi].tmap,1,lock->locks[nvi].bsz),RSB_INFO("\n");
836 }
837 /* serial approach, for debugging purposes (very slow) ; it should be used to debug the rest */
838 for(nvi=0;nvi<lock->nv;++nvi)
839 rsb__vectors_left_sum_reduce_and_zero(lock->ov,lock->mv[nvi],lock->typecode,lock->itl,lock->incov,0);
840 #endif
841 #endif
842 #endif /* RSB_INHIBIT_REDUCE */
843 #endif /* RSB_INHIBIT_MULTIVECTOR */
844 goto nosync;
845 nosync:
846 if(!lock)
847 {
848 errval = RSB_ERR_BADARGS;
849 RSB_PERR_GOTO(err,RSB_ERRM_ES);
850 }
851 for(nvi=0;nvi<lock->nv;++nvi)
852 RSB_CONDITIONAL_FREE(lock->mv[nvi]);
853
854 for(;nvi>=0;--nvi)
855 rsb_do_btils_free(&(lock->locks[nvi]));
856
857 RSB_DO_ERROR_CUMULATE(errval,rsb__do_lock_free(&(lock->olock)));
858 err:
859 RSB_DO_ERR_RETURN(errval)
860 }
861
rsb__do_mv_lock_init(struct rsb_mv_lock_t * lock,rsb_int_t num_threads,rsb_submatrix_idx_t subms,const struct rsb_mtx_t * mtxAp,enum rsb_op_flags_t op_flags,rsb_trans_t transA,rsb_char_t * ov,rsb_coo_idx_t incov)862 rsb_err_t rsb__do_mv_lock_init(struct rsb_mv_lock_t *lock, rsb_int_t num_threads, rsb_submatrix_idx_t subms, const struct rsb_mtx_t * mtxAp, enum rsb_op_flags_t op_flags, rsb_trans_t transA, rsb_char_t * ov, rsb_coo_idx_t incov)
863 {
864 /* *
865 * \ingroup gr_internals
866 * */
867 rsb_coo_idx_t nvi,nlevels;
868 rsb_err_t errval = RSB_ERR_NO_ERROR;
869 rsb_int_t th_id=0;
870 rsb_coo_idx_t tn;
871
872 if(!lock || !mtxAp)
873 return RSB_ERR_BADARGS;
874
875 RSB_BZERO_P(lock);
876 errval = rsb__do_lock_init(&(lock->olock),num_threads,subms,mtxAp,op_flags);
877 if(RSB_SOME_ERROR(errval))
878 {
879 RSB_PERR_GOTO(err0,RSB_ERRM_ES);
880 }
881 /* FIXME: we need a policy for this */
882 #if RSB_INHIBIT_MULTIVECTOR
883 lock->nv=0; /* FIXME: for debugging purposes */
884 #else
885 #if 0
886 lock->nv = RSB_MIN(num_threads-1,((mtxAp->nnz+1)/(4*mtxAp->nr+1)));
887 lock->nv = RSB_MIN(num_threads-1,1);/*FIXME: temporary */
888 lock->nv = RSB_MIN(num_threads-1,rsb__submatrices(mtxAp));/*FIXME: temporary */
889 lock->nv=1; /* FIXME: for debugging purposes */
890 #endif
891 lock->nv = RSB_MIN(num_threads-1,mtxAp->all_leaf_matrices_n-1);/* FIXME: temporary */
892 #endif /* RSB_INHIBIT_MULTIVECTOR */
893 if(RSB_LOUD_MVR_TESTING)
894 RSB_INFO("Will use %d temporary vectors for %d threads\n",lock->nv,num_threads);
895
896 RSB_ASSERT(lock->nv<RSB_CONST_MAX_SUPPORTED_CORES);
897 lock->el_size=mtxAp->el_size;
898 lock->typecode=mtxAp->typecode;
899 lock->itl = rsb__do_get_rows_of(mtxAp,transA);
900 lock->ov=ov;
901 lock->incov=incov;
902 lock->transA=transA;
903 nlevels = rsb__get_recursive_matrix_depth(mtxAp);
904 for(tn=0;tn<RSB_CONST_MAX_SUPPORTED_CORES; ++tn)
905 lock->it[tn]=
906 lock->in[tn]=RSB_MARKER_COO_VALUE;
907 for(nvi=0;nvi<lock->nv;++nvi)
908 if((errval = rsb_do_btils_init(&(lock->locks[nvi]),lock->itl,nlevels))!=RSB_ERR_NO_ERROR)
909 {
910 RSB_PERR_GOTO(err1,RSB_ERRM_ES);
911 }
912 /* time to allocate the temporary vectors */
913 for(nvi=0;nvi<lock->nv;++nvi)
914 if((lock->mv[nvi]=rsb__calloc(lock->el_size*lock->itl))==NULL)
915 {
916 RSB_PERR_GOTO(err2,RSB_ERRM_ES);
917 }
918 for(th_id=0;th_id<num_threads;++th_id)
919 lock->last_subm[th_id]=RSB_SUBM_IDX_MARKER;
920 /* the multivector lock is allocated. nice! */
921
922 return RSB_ERR_NO_ERROR;
923 err2:
924 for(nvi=0;nvi<lock->nv;++nvi)
925 RSB_CONDITIONAL_FREE(lock->mv[nvi]);
926 err1:
927 for(nvi=0;nvi<lock->nv;++nvi)
928 rsb_do_btils_free(&(lock->locks[nvi]));
929 RSB_DO_ERROR_CUMULATE(errval,rsb__do_lock_free(&(lock->olock)));
930 err0:
931 RSB_DO_ERR_RETURN(errval)
932 }
933
rsb__do_mv_lock_get(struct rsb_mv_lock_t * lock,rsb_thr_t th_id,rsb_coo_idx_t roff,rsb_coo_idx_t m,rsb_coo_idx_t coff,rsb_coo_idx_t k,rsb_submatrix_idx_t subm,rsb_trans_t transA,rsb_char_t ** ov,rsb_coo_idx_t * incov)934 rsb_bool_t rsb__do_mv_lock_get(struct rsb_mv_lock_t *lock ,rsb_thr_t th_id, rsb_coo_idx_t roff, rsb_coo_idx_t m, rsb_coo_idx_t coff, rsb_coo_idx_t k, rsb_submatrix_idx_t subm, rsb_trans_t transA, rsb_char_t **ov, rsb_coo_idx_t *incov)
935 {
936 /* *
937 * \ingroup gr_internals
938 * */
939 rsb_coo_idx_t nvi = RSB_MARKER_COO_VALUE;
940 rsb_bool_t was_looping=(lock->last_subm[th_id]==subm);
941 rsb_coo_idx_t i,j;
942 if(!ov)
943 return RSB_BOOL_FALSE;
944 if(rsb_do_lock_check_if_matrix_done(&(lock->olock),subm))
945 {
946 if(RSB_LOUD_MVL_TESTING)
947 RSB_INFO("matrix %d is already locked (for thread %d)\n",subm,th_id);
948 /* if the thread was looping on this mtxAp, there's no reason to do so anymore (mtxAp locked or done) */
949 if(was_looping)
950 lock->last_subm[th_id]=RSB_SUBM_IDX_MARKER;
951
952 return RSB_BOOL_FALSE;/* nothing to do: matrix done */
953 }
954 /* first, we try to get a lock on the master vector */
955 if(rsb__do_lock_get(&(lock->olock),th_id,roff,m,coff,k,subm,transA))
956 {
957 if(RSB_LOUD_MVL_TESTING)
958 RSB_INFO("locking matrix %d [%d...%d) to thread %d on master vector\n",subm,roff,roff+m,th_id);
959 goto found;
960 }
961 /* if the master vector was not available, we check if this thread was in a loop on this matrix */
962 if(!was_looping)
963 {
964 /* it was not looping on this submatrix */
965 if(lock->last_subm[th_id]==RSB_SUBM_IDX_MARKER)
966 {
967 /* it was not looping at all;
968 * now, if the thread will be back here with the value unchanged, the loop will be detected */
969 lock->last_subm[th_id]=subm;
970 if(RSB_LOUD_MVL_TESTING)
971 RSB_INFO("not locking matrix %d to thread %d : waiting for a loop\n",subm,th_id);
972 return RSB_BOOL_ALMOST_TRUE;
973 }
974 else
975 ;/* the thread is looping on another submatrix: let it loop there, then */
976 return RSB_BOOL_FALSE;
977 }
978
979 /* the thread was looping, and then it has the right to use a temporary vector (if any) */
980 if(RSB_DOES_TRANSPOSE(transA))
981 { RSB_SWAP(rsb_coo_idx_t,k,m);RSB_SWAP(rsb_coo_idx_t,coff,roff); } /* FIXME: a dirty trick */
982 if((lock->olock.want_symlock == RSB_BOOL_TRUE))
983 {
984 for(nvi=0;nvi<lock->nv;++nvi)
985 if(rsb_do_btils_lock_get_sym(&(lock->locks[nvi]),roff,m+roff,coff,k+coff,transA,&i,&j))
986 {
987 lock->it[th_id]=j,lock->in[th_id]=i;
988 goto found;
989 }
990 }
991 else
992 {
993 for(nvi=0;nvi<lock->nv;++nvi)
994 if(rsb_do_btils_lock_get(&(lock->locks[nvi]),roff,m+roff,transA,&i,&j))/* FIXME */
995 {
996 lock->in[th_id]=i,lock->it[th_id]=RSB_MARKER_COO_VALUE;
997 goto found;
998 }
999 }
1000 /* TODO: are we sure that pushing the thread for looping is always the best thing ? */
1001 /* TODO: implement here the task of picking up some vector and "reducing" it (not here, but in a "returned" signalling)! */
1002 return RSB_BOOL_FALSE;
1003 found:
1004 /* found a temporary vector to perform computation */
1005 if(RSB_LOUD_MVL_TESTING)
1006 if(nvi != RSB_MARKER_COO_VALUE)
1007 RSB_INFO("locking interval %d .. %d on vector %d to thread %d\n",roff,roff+m,nvi,th_id);
1008
1009 lock->last_subm[th_id]=RSB_SUBM_IDX_MARKER;
1010 if(nvi == RSB_MARKER_COO_VALUE)
1011 *ov=lock->ov, /* we'll work on the master vector */
1012 *incov=lock->incov; /* unchanged */
1013 else
1014 *ov = RSB_MV_OFFSET(lock,nvi,0), /* we'll work on an auxiliary vector */
1015 *incov=1;
1016 RSB_BITVECTOR_SET(lock->olock.bmap,lock->olock.subms,subm);
1017 return RSB_BOOL_TRUE;
1018 }
1019
rsb__do_release_candidate_interval_for_reduce(struct rsb_mv_lock_t * lock,rsb_thr_t th_id,rsb_char_t * ov,rsb_coo_idx_t roff,rsb_coo_idx_t m)1020 rsb_err_t rsb__do_release_candidate_interval_for_reduce(struct rsb_mv_lock_t *lock, rsb_thr_t th_id, rsb_char_t *ov, rsb_coo_idx_t roff, rsb_coo_idx_t m)
1021 {
1022 rsb_err_t errval = RSB_ERR_NO_ERROR;
1023 /* retrieve working interval */
1024 /* ... */
1025 rsb_coo_idx_t m0=0,m1=0;
1026 RSB_GET_LOCK_INTERVAL_W(&(lock->olock),th_id,m0,m1);
1027 rsb__do_lock_release(&(lock->olock),th_id);
1028 rsb__do_mv_lock_release(lock,th_id,ov);
1029 if(RSB_LOUD_MVR_TESTING)
1030 RSB_INFO("releasing reduce interval %d .. %d from thread %d\n",m0,m1,th_id);
1031 RSB_DO_ERR_RETURN(errval)
1032 }
1033
rsb__do_pick_candidate_interval_for_reduce(struct rsb_mv_lock_t * lock,rsb_thr_t th_id,rsb_char_t ** ov,rsb_coo_idx_t * roff,rsb_coo_idx_t * m)1034 rsb_err_t rsb__do_pick_candidate_interval_for_reduce(struct rsb_mv_lock_t *lock, rsb_thr_t th_id, rsb_char_t ** ov, rsb_coo_idx_t * roff, rsb_coo_idx_t * m)
1035 {
1036 /*
1037 pick an interval which is free on both the master vector AND some vector v_i, and candidate for reducing
1038 we begin with the last temporary vector first
1039 */
1040 rsb_coo_idx_t nvi;
1041 rsb_coo_idx_t i;
1042 rsb_err_t errval = RSB_ERR_NO_ERROR;
1043
1044 if(!lock)
1045 goto err;
1046 /* we start looking for a vector to reduce with the last one */
1047 for(nvi=lock->nv-1;nvi>=0;--nvi)
1048 {
1049 struct rsb_bti_lock_struct * vlock=&(lock->locks[nvi]);
1050 if(RSB_LOUD_MVR_TESTING)
1051 RSB_INFO("looking for tainted subvectors in temporary vector %d\n",nvi),
1052 RSB_INFO("taint map: "),rsb__do_dump_bitmap(vlock->tmap,1,vlock->bsz),RSB_INFO("\n"),
1053 RSB_INFO("use map: "),rsb__do_dump_bitmap(vlock->bmap,1,vlock->bsz),RSB_INFO("\n");
1054 for(i=0;i<vlock->bsz;++i)
1055 {
1056 if(RSB_BITVECTOR_GET(vlock->tmap,vlock->bsz,i) && rsb_do_btils_lock_probe_inner(vlock,i))
1057 {
1058 /* let's see if the master vector has available subvector i (TODO) */
1059 /* first step is to obtain the bounds of the lock */
1060 rsb_coo_idx_t m0,m1;
1061 rsb_bool_t goir = RSB_BOOL_FALSE;
1062
1063 errval = rsb_do_get_interval_info_from_btils_lock(vlock,i,&m0,&m1);
1064 if(RSB_LOUD_MVR_TESTING)
1065 RSB_INFO("temporary vector %d is tainted at interval %d\n",nvi,i);
1066
1067 if(RSB_LOUD_MVR_TESTING)
1068 RSB_INFO("let's see if the master vector has available subvector %d at [%d .. %d]... ",nvi,m0,m1);
1069 goir = rsb_do_lock_check_interval(&(lock->olock),th_id,m0,m1-m0,m0,m1-m0,lock->transA);
1070 if(RSB_LOUD_MVR_TESTING)
1071 if(!goir)
1072 RSB_INFO("no\n");
1073 if(goir)
1074 {
1075 if(RSB_LOUD_MVR_TESTING)
1076 RSB_INFO("yes. will lock now [%d .. %d].\n",m0,m1);
1077 /* The interval is free on both subvectors.
1078 * We lock the interval on both vectors, then.
1079 * */
1080 /* mark the interval as not tainted anymore, but locked, on nvi */
1081 RSB_BITVECTOR_SET(vlock->bmap,vlock->bsz,i);
1082 RSB_BITVECTOR_UNSET(vlock->tmap,vlock->bsz,i);
1083 /* mark the interval as locked, on the master vector */
1084 RSB_DO_LOCK_INTERVAL(&(lock->olock),th_id,m0,m1-m0);/* FIXME */
1085
1086 lock->it[th_id]=i;/* FIXME */
1087 /* RSB_BITVECTOR_SET(lock->ir,RSB_CONST_MAX_SUPPORTED_CORES,th_id); */
1088
1089 /* let's give the vector info */
1090 *ov = RSB_MV_OFFSET(lock,nvi,0);
1091 *roff=m0;
1092 *m=m1-m0;
1093 goto done;
1094 }
1095 }
1096 #if 0
1097 else
1098 if(RSB_LOUD_MVR_TESTING)
1099 RSB_INFO("interval %d in vector %d is not available\n",i,nvi);
1100 #endif
1101 }
1102 }
1103 if(RSB_LOUD_MVR_TESTING)
1104 RSB_INFO("there are no available taint vectors.\n");
1105 /* no tainted subvectors found, or no free ones found. */
1106 /* in this case, we could look for a common subvector to both some v_i and v_j, i<j, and reduce it into v_i */
1107 goto done;
1108 err:
1109 errval = RSB_ERR_BADARGS;
1110 done:
1111 return errval;
1112 }
1113
rsb_do_perform_partial_reduce(struct rsb_mv_lock_t * lock,rsb_thr_t th_id,rsb_trans_t transA,rsb_coo_idx_t incv)1114 rsb_err_t rsb_do_perform_partial_reduce(struct rsb_mv_lock_t *lock, rsb_thr_t th_id, rsb_trans_t transA, rsb_coo_idx_t incv)
1115 {
1116 /* FIXME: this is only an example routine */
1117 rsb_char_t * ov=NULL;
1118 rsb_coo_idx_t roff, m;
1119
1120 rsb__do_pick_candidate_interval_for_reduce(lock,th_id,&ov,&roff,&m);
1121
1122 if(!ov)
1123 goto done;
1124 RSB_ASSERT(lock->ov);
1125 if(RSB_LOUD_BTILS_TESTING)
1126 RSB_INFO("on thread %d about to reduce %d .. %d\n",th_id,roff,m+roff);
1127 rsb__vectors_left_sum_reduce_and_zero(lock->ov,ov,lock->typecode,m,incv,roff);
1128 /* perform reduce here */
1129 rsb__do_release_candidate_interval_for_reduce(lock,th_id,ov,roff,m);
1130 /*
1131 (with no symmetry or transposition, here)
1132 lock it for both vectors
1133 reduce the corresponding subvector (via sum), and zero it on the v_i vector
1134 update v_i's taint vector accordingly
1135 release the lock
1136 */
1137 done:
1138 return RSB_ERR_NO_ERROR;
1139 }
1140
1141 #if RSB_WANT_DO_LOCK_TEST
rsb__do_lock_test()1142 rsb_err_t rsb__do_lock_test()
1143 {
1144 /**
1145 * \ingroup gr_internals
1146 * FIXME: NEW, UNFINISHED
1147 **/
1148 rsb_err_t errval = RSB_ERR_NO_ERROR;
1149 struct rsb_bti_lock_struct lock;
1150 rsb_coo_idx_t itl=8,nlevels = RSB_UPPER_BOUNDING_LOG2(itl);
1151 rsb_int in,it,i;
1152 rsb_trans_t transA = RSB_TRANSPOSITION_N;
1153
1154 RSB_ASSERT(nlevels==3);
1155 if((errval = rsb_do_btils_init(&lock,itl,nlevels))!=RSB_ERR_NO_ERROR)
1156 {
1157 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1158 }
1159 RSB_ASSERT( rsb_do_btils_lock_get(&lock,2,4,transA,&in,&it));
1160 RSB_ASSERT( rsb_do_btils_lock_get(&lock,4,8,transA,&in,&it));
1161 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,0,4,transA,&in,&it));
1162 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,0,8,transA,&in,&it));
1163 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,2,3,transA,&in,&it));
1164 RSB_ASSERT( rsb_do_btils_lock_get(&lock,0,1,transA,&in,&it));
1165 RSB_ASSERT( rsb_do_btils_lock_get(&lock,1,2,transA,&in,&it));
1166 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,0,2,transA,&in,&it));
1167 rsb_do_btils_lock_release(&lock,1,2);
1168 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,0,2,transA,&in,&it));
1169 rsb_do_btils_lock_release(&lock,0,1);
1170 RSB_ASSERT( rsb_do_btils_lock_get(&lock,0,2,transA,&in,&it));
1171 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,2,3,transA,&in,&it));
1172 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,3,4,transA,&in,&it));
1173 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,4,5,transA,&in,&it));
1174 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,6,7,transA,&in,&it));
1175 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,7,8,transA,&in,&it));
1176 rsb_do_btils_lock_release(&lock,4,8);
1177 RSB_ASSERT( rsb_do_btils_lock_get(&lock,4,5,transA,&in,&it));
1178 RSB_ASSERT( rsb_do_btils_lock_get(&lock,6,7,transA,&in,&it));
1179 RSB_ASSERT( rsb_do_btils_lock_get(&lock,7,8,transA,&in,&it));
1180 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,7,8,transA,&in,&it));
1181 rsb_do_btils_free(&lock);
1182
1183 itl=10,nlevels = RSB_UPPER_BOUNDING_LOG2(itl);/* 4 */
1184 RSB_ASSERT(nlevels==4);
1185 if((errval = rsb_do_btils_init(&lock,itl,nlevels))!=RSB_ERR_NO_ERROR)
1186 {
1187 RSB_PERR_GOTO(err,RSB_ERRM_ES);
1188 }
1189 RSB_ASSERT( rsb_do_btils_lock_get(&lock,0,itl,transA,&in,&it));
1190 for(i=0;i<itl;++i)
1191 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,i,i+1,transA,&in,&it));
1192 for(i=0;i<RSB_MIDDLE(itl);++i)
1193 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,2*i,2*i+1,transA,&in,&it));
1194 rsb_do_btils_lock_release(&lock,0,itl);
1195 for(i=0;i<RSB_MIDDLE(itl);++i)
1196 RSB_ASSERT( rsb_do_btils_lock_get(&lock,2*i,2*i+1,transA,&in,&it)),
1197 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,2*i,2*i+1,transA,&in,&it));
1198 for(i=0;i<RSB_MIDDLE(itl);++i)
1199 rsb_do_btils_lock_release(&lock,2*i,2*i+1);
1200 RSB_ASSERT( rsb_do_btils_lock_get(&lock,0,RSB_MIDDLE(itl),transA,&in,&it)),
1201 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,0,RSB_MIDDLE(itl),transA,&in,&it)),
1202 RSB_ASSERT(!rsb_do_btils_lock_get(&lock,0,RSB_MIDDLE(itl),transA,&in,&it)),
1203 rsb_do_btils_free(&lock);
1204
1205 /*
1206 * TODO:
1207 * need symmetry and transposition support.
1208 * need routines for reducing the temporary vectors after 'failed' double loops
1209 * */
1210 RSB_INFO("binary tree lock test ok\n");
1211 {
1212 rsb_err_t errval = RSB_ERR_NO_ERROR;
1213 struct rsb_mv_lock_t lock;
1214 rsb_int_t num_threads=4,th_id=0;
1215 struct rsb_mtx_t *mtxAp=NULL;
1216 struct rsb_mtx_t *submatrix=NULL;
1217 enum rsb_op_flags_t op_flags = RSB_OP_FLAG_DEFAULT;
1218 rsb_submatrix_idx_t si=0;
1219 rsb_char_t * y=NULL,*oy=NULL,*oY=NULL,*oX=NULL;
1220 rsb_submatrix_idx_t subms=0;
1221 rsb_coo_idx_t incv=1;
1222 mtxAp = rsb__generate_dense_lower_triangular(2000,NULL,RSB_NUMERICAL_TYPE_DEFAULT);
1223 if(!mtxAp)
1224 { RSB_PERR_GOTO(erri,RSB_ERRM_ES); }
1225 subms=mtxAp->all_leaf_matrices_n;
1226 y = rsb__calloc(mtxAp->el_size*mtxAp->nr*incv);
1227 if(!y)
1228 { RSB_PERR_GOTO(erri,RSB_ERRM_ES); }
1229 if((errval = rsb__do_mv_lock_init(&lock,num_threads,subms,mtxAp,op_flags,transA,y,incv))!=RSB_ERR_NO_ERROR)
1230 { RSB_PERR_GOTO(erri,RSB_ERRM_ES); }
1231
1232 submatrix=mtxAp->all_leaf_matrices[si].mtxlp;
1233 RSB_ASSERT(rsb__do_mv_lock_get(&lock,th_id,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si,transA,&oy,&incv));
1234 RSB_ASSERT(!rsb__do_mv_lock_get(&lock,th_id+1,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si+1,transA,&oY,&incv));
1235 RSB_ASSERT( rsb__do_mv_lock_get(&lock,th_id+1,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si+1,transA,&oY,&incv));
1236 RSB_ASSERT(!rsb__do_mv_lock_get(&lock,th_id,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si,transA,&oy,&incv));
1237 RSB_ASSERT(!rsb__do_mv_lock_get(&lock,th_id+2,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si+2,transA,&oX,&incv));
1238 RSB_ASSERT( rsb__do_mv_lock_get(&lock,th_id+2,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si+2,transA,&oX,&incv));
1239 RSB_ASSERT(!rsb__do_mv_lock_get(&lock,th_id+3,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si+3,transA,&oy,&incv));
1240 RSB_ASSERT(!rsb__do_mv_lock_get(&lock,th_id+3,submatrix->roff,submatrix->nr,submatrix->coff,submatrix->nc,si+3,transA,&oy,&incv));
1241 RSB_ASSERT(!rsb__do_mv_lock_release(&lock,th_id,oy));
1242 RSB_ASSERT(!rsb__do_mv_lock_release(&lock,th_id+2,oY));
1243 RSB_ASSERT(!rsb__do_mv_lock_release(&lock,th_id+1,oX));
1244 RSB_ASSERT(!rsb__do_mv_lock_release(&lock,th_id,oy)); /* harmless duplicate */
1245 RSB_ASSERT(!rsb__do_mv_lock_release(&lock,th_id+1,oX)); /* harmless duplicate */
1246
1247 rsb_do_perform_partial_reduce(&lock,th_id,transA,incv);
1248 rsb_do_perform_partial_reduce(&lock,th_id+1,transA,incv);
1249
1250 /*
1251 The following idea was inspired by Frigo's 'reducers & hyperobjects' paper.
1252 To support it, we could extend the rsb_bool_t to handle a trivalent logic:
1253 RSB_BOOL_FALSE=0, RSB_BOOL_TRUE=1, RSB_BOOL_ALMOST=2
1254 When encountering RSB_BOOL_ALMOST, the thread would perform the reducing strategy.
1255 After a detected loop, the lock (which could effectively turned out into a scheduler)
1256 would "propose" the thread to perform a "partial reduce".
1257
1258 * */
1259 /* TODO: this way of handling things forces 'incx' to be 1, then */
1260 RSB_INFO("FIXME: missing handling of after-reduce release.\n");
1261 goto oki;
1262 erri:
1263 RSB_INFO("binary tree based multi-lock test problems..\n");
1264 oki:
1265 RSB_MTX_FREE(mtxAp);
1266 RSB_CONDITIONAL_FREE(y);
1267 rsb__do_mv_lock_free(&lock);
1268 }
1269 goto ok;
1270 ok:
1271 RSB_INFO("binary tree based multi-lock test ok\n");
1272 return RSB_ERR_NO_ERROR;
1273 err:
1274 RSB_DO_ERR_RETURN(errval)
1275 }
1276 #endif /* RSB_WANT_DO_LOCK_TEST */
1277
1278 /* END EXPERIMENTAL CODE */
1279 /* @endcond */
1280