1 /*
2
3 Copyright (C) 2008-2021 Michele Martone
4
5 This file is part of librsb.
6
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20
21 */
22 /* @cond INNERDOC */
23 /*!
24 * @file
25 * @author Michele Martone
26 * @brief
27 * This source file contains parallel sorting functions.
28 * */
29
30 #include "rsb_common.h"
31
32 #define RSB_DO_WANT_PSORT_VERBOSE 0 /* set this to >0 to print parallel sort statistics */
33 #define RSB_DO_WANT_PSORT_TIMING (RSB_DO_WANT_PSORT_VERBOSE+0) /* set this to 1 to print some statistics */
34 #define RSB_DO_WANT_PSORT_FASTER_BUT_RISKY 0 /* FIXME: STILL UNFINISHED */
35 #define RSB_WANT_SORT_PARALLEL_BUT_SLOW 1
36
37 RSB_INTERNALS_COMMON_HEAD_DECLS
38
rsb__util_sort_row_major_parallel(void * VA,rsb_coo_idx_t * IA,rsb_coo_idx_t * JA,rsb_nnz_idx_t nnz,rsb_coo_idx_t m,rsb_coo_idx_t k,rsb_type_t typecode,rsb_flags_t flags)39 rsb_err_t rsb__util_sort_row_major_parallel(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t m, rsb_coo_idx_t k, rsb_type_t typecode, rsb_flags_t flags)
40 {
41 /**
42 * \ingroup gr_util
43 * TODO: should describe somewhere our technique: this is a mixed counting sort + merge sort.
44 */
45
46 rsb_err_t errval = RSB_ERR_NO_ERROR;
47 int cfi;
48 size_t el_size;
49 float cfa[] = {2}; // will use threads count as a reference
50 // float cfa[] = {1}; // will use cache size as a reference
51 // float cfa[] = {-1}; // will use as much memory as possible
52 const long wet = rsb_get_num_threads(); /* want executing threads */
53
54 if(nnz<RSB_MIN_THREAD_SORT_NNZ*wet)
55 {
56 // FIXME: it is known that very small matrices (e.g.: 2x2 from `make tests`) are not handled, here.
57 // however, this should be handled in a better way than this :)
58 return rsb__util_sort_row_major_inner(VA,IA,JA,nnz,m,k,typecode,flags);
59 }
60
61 if(!IA || !JA || !VA)
62 {
63 errval = RSB_ERR_BADARGS;
64 RSB_PERR_GOTO(err,RSB_ERRM_ES);
65 }
66
67 el_size = RSB_SIZEOF(typecode);
68
69 for(cfi=0;cfi<sizeof(cfa)/sizeof(float);++cfi)
70 {
71 void *W = NULL;
72 rsb_char_t *IW = NULL;
73 int ti;
74 long cs,bs,tc,ns,tcs;
75 rsb_nnz_idx_t cnnz = 0;
76 size_t bnnz = 0,fsm = rsb__sys_free_system_memory();
77 size_t wb = 0;
78
79 #if RSB_DO_WANT_PSORT_TIMING
80 rsb_time_t dt,st,mt,tt;
81 #endif /* RSB_DO_WANT_PSORT_TIMING */
82 // compute how many bytes are necessary for an element
83 ns=2*sizeof(rsb_coo_idx_t)+el_size;
84 // compute how many bytes are necessary for the whole processing
85 bs=nnz*ns;
86 if(cfa[cfi]>1)
87 tcs=bs+(wet*ns);
88 else if(cfa[cfi]>0)
89 tcs = rsb__get_lastlevel_c_size()*cfa[cfi]*wet;/* FIXME: '*wet' is a hack just for benchmark-related issues */
90 else
91 tcs=fsm/2; /* could be 0 */
92
93 if(tcs<1)
94 tcs=bs; /* could happen, for an interfacing problem */
95 else
96 if(fsm>0)
97 {
98 tcs = RSB_MIN(fsm,tcs);
99 }
100
101 // prepare a buffer
102 W = rsb__malloc(tcs);
103 if(!W)
104 {
105 errval = RSB_ERR_ENOMEM;
106 RSB_PERR_GOTO(erri,RSB_ERRM_ES)
107 }
108 cs=tcs/wet;
109 //RSB_INFO("cache is %d bytes\n",cs);
110
111 // compute the nnz fitting in the buffer
112 cnnz=cs/ns;
113 // compute the total count of necessary passes
114 //tc=(bs+cs-1)/cs;
115 tc=(nnz+(cnnz-1))/(cnnz);
116
117 wb = RSB_DO_REQUIRE_BYTES_FOR_INDEX_BASED_SORT(cnnz,m,k,1,1);
118 IW = rsb__malloc(wb*wet);
119 if(!IW)
120 {
121 errval = RSB_ERR_ENOMEM;
122 RSB_PERR_GOTO(erri,RSB_ERRM_ES);
123 }
124
125 //RSB_INFO("there are %d nnz (%d bytes), %d times the cache (%z bytes), %d nnz per cache\n",nnz,bs,tc,cs,cnnz);
126 #if RSB_DO_WANT_PSORT_TIMING
127 dt = rsb_time();
128 st=-dt;
129 #endif /* RSB_DO_WANT_PSORT_TIMING */
130
131 // this phase is potentially parallel, and is the slower one.
132 // NOTE:before parallelization, one should avoid allocations during sort, or serialize in them in some way!
133 #pragma omp parallel for schedule(static,1) shared(IA,JA,VA) RSB_NTC
134 for(ti=0;ti<tc;++ti)
135 {
136 size_t fnnz=ti*cnnz;
137 rsb_nnz_idx_t bnnz=(ti==tc-1)?(nnz-fnnz):cnnz;
138 // RSB_INFO("s:%d..%d (bnnz=%d)\n",fnnz,fnnz+bnnz-1,bnnz);
139 rsb__util_sort_row_major_buffered(((rsb_byte_t*)VA)+el_size*fnnz,IA+fnnz,JA+fnnz,bnnz,m,k,typecode,flags,IW+wb*ti,wb);
140 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(VA+fnnz,IA+fnnz,JA+fnnz,bnnz,typecode,NULL,flags));
141 }
142
143 #if RSB_DO_WANT_PSORT_TIMING
144 dt = rsb_time();
145 st+=dt;
146 #endif /* RSB_DO_WANT_PSORT_TIMING */
147 // this phase is potentially parallel, too
148 for(bnnz=cnnz;bnnz<nnz;)
149 {
150 // size_t fnnz;
151 int fi, fn = ((nnz-bnnz)+(2*bnnz-1))/(2*bnnz);
152 #pragma omp parallel for schedule(static,1) shared(IA,JA,VA) RSB_NTC
153 // for(fnnz=0;fnnz<nnz-bnnz;fnnz+=2*bnnz)
154 for(fi=0;fi<fn;++fi)
155 {
156 #if RSB_WANT_OMP_RECURSIVE_KERNELS
157 rsb_char_t * lW=((rsb_char_t*)W)+cs*omp_get_thread_num();
158 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
159 rsb_char_t * lW=((rsb_char_t*)W)+cs*0;
160 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
161 size_t fnnz=fi*2*bnnz;
162 size_t lnnz=(fnnz+2*bnnz>nnz)?(nnz-(fnnz+bnnz)):bnnz;
163 void *fVA=((rsb_char_t*)VA)+el_size*fnnz;
164 void *fIA=IA+fnnz;
165 void *fJA=JA+fnnz;
166 #if RSB_PS_ASSERT
167 void *bVA=((rsb_char_t*)VA)+el_size*(fnnz+bnnz);
168 void *bIA=IA+fnnz+bnnz;
169 void *bJA=JA+fnnz+bnnz;
170 #endif /* RSB_PS_ASSERT */
171 //RSB_INFO("m:%d..%d %d..%d (%d) (bnnz=%d) (lnnz=%d)\n",fnnz,fnnz+bnnz-1,fnnz+bnnz,fnnz+bnnz+lnnz-1,cs,bnnz,lnnz);
172 // RSB_INFO("sentinel:%x %d %d\n",IA+fnnz+bnnz+lnnz,IA[fnnz+bnnz+lnnz],JA[fnnz+bnnz+lnnz]);
173 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(fVA,fIA,fJA,bnnz,typecode,NULL,flags));
174 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(bVA,bIA,bJA,lnnz,typecode,NULL,flags));
175 rsb__do_util_merge_sorted_subarrays_in_place(fVA,fIA,fJA,lW,bnnz,lnnz,cs,flags,typecode);
176 // RSB_INFO("sentinel:%x %d %d\n",IA+fnnz+bnnz+lnnz,IA[fnnz+bnnz+lnnz],JA[fnnz+bnnz+lnnz]);
177 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(fVA,fIA,fJA,bnnz,typecode,NULL,flags));
178 RSB_PS_ASSERT(!rsb__util_is_sorted_coo_as_row_major(fVA,fIA,fJA,bnnz+lnnz,typecode,NULL,flags));
179 }
180 #pragma omp barrier
181 bnnz *= 2;
182 }
183 #if RSB_DO_WANT_PSORT_TIMING
184 mt = - dt;
185 dt = rsb_time();
186 mt += dt;
187 tt = mt + st;
188 RSB_INFO("using %zd partitions, (sort=%.5lg+merge=%.5lg)=%.5lg, on %d threads\n",(size_t)tc,st,mt,tt,wet);
189 #endif /* RSB_DO_WANT_PSORT_TIMING */
190
191 // assert(!rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,nnz,typecode,NULL,flags));
192 // if(rsb__util_is_sorted_coo_as_row_major(VA,IA,JA,nnz,typecode,NULL,flags))
193 // RSB_PERR_GOTO(err,RSB_ERRM_EM);
194
195 erri:
196 RSB_CONDITIONAL_FREE(W);
197 RSB_CONDITIONAL_FREE(IW);
198 }
199 err:
200 RSB_DO_ERR_RETURN(errval)
201 }
202
rsb__util_sort_row_major_inner(void * RSB_RESTRICT VA,rsb_coo_idx_t * RSB_RESTRICT IA,rsb_coo_idx_t * RSB_RESTRICT JA,const rsb_nnz_idx_t nnz,const rsb_coo_idx_t m,const rsb_coo_idx_t k,const rsb_type_t typecode,const rsb_flags_t flags)203 rsb_err_t rsb__util_sort_row_major_inner(void * RSB_RESTRICT VA, rsb_coo_idx_t * RSB_RESTRICT IA, rsb_coo_idx_t * RSB_RESTRICT JA, const rsb_nnz_idx_t nnz, const rsb_coo_idx_t m, const rsb_coo_idx_t k, const rsb_type_t typecode , const rsb_flags_t flags /*, void * WA, size_t wb */)
204 {
205 rsb_err_t errval = RSB_ERR_NO_ERROR;
206 #if RSB_WANT_SORT_PARALLEL_BUT_SLOW
207 if( rsb_global_session_handle.asm_sort_method > 0 )
208 /* parallel and scaling but slow */
209 errval = rsb__util_sort_row_major_parallel(VA,IA,JA,nnz,m,k,typecode,flags);
210 else
211 #else /* RSB_WANT_SORT_PARALLEL_BUT_SLOW */
212 #endif /* RSB_WANT_SORT_PARALLEL_BUT_SLOW */
213 /* not so parallel nor scaling but fast */
214 errval = rsb_util_sort_row_major_bucket_based_parallel(VA,IA,JA,nnz,m,k,typecode,flags);
215 return errval;
216 }
217
rsb_util_sort_row_major_bucket_based_parallel(void * RSB_RESTRICT VA,rsb_coo_idx_t * RSB_RESTRICT IA,rsb_coo_idx_t * RSB_RESTRICT JA,const rsb_nnz_idx_t nnz,const rsb_coo_idx_t m,const rsb_coo_idx_t k,const rsb_type_t typecode,const rsb_flags_t flags)218 rsb_err_t rsb_util_sort_row_major_bucket_based_parallel(void * RSB_RESTRICT VA, rsb_coo_idx_t * RSB_RESTRICT IA, rsb_coo_idx_t * RSB_RESTRICT JA, const rsb_nnz_idx_t nnz, const rsb_coo_idx_t m, const rsb_coo_idx_t k, const rsb_type_t typecode , const rsb_flags_t flags /*, void * WA, size_t wb */)
219 {
220 /**
221 \ingroup gr_internals
222 FIXME: EXPERIMENTAL, DOCUMENT ME
223 FIXME: shall stand duplicates, and so consequently e.g. m*n<nnz (it is actual input, e.g. out of the sparse matrices' sum).
224 */
225 int psc = RSB_PSORT_CHUNK;
226 rsb_err_t errval = RSB_ERR_NO_ERROR;
227 // rsb_nnz_idx_t frnz = 0;
228 rsb_nnz_idx_t mnzpr = 0;
229 rsb_nnz_idx_t *PA = NULL;
230 void *WA = NULL;
231 rsb_coo_idx_t *iWA = NULL;
232 rsb_coo_idx_t *jWA = NULL;
233 rsb_coo_idx_t *nWA = NULL;
234 void *vWA = NULL;
235 rsb_nnz_idx_t n = 0;
236 size_t el_size = RSB_SIZEOF(typecode);
237 //struct rsb_mtx_partitioning_info_t pinfop;
238 /* const long wet = rsb_get_num_threads();*/ /* want executing threads */
239 const long wet = rsb__set_num_threads(RSB_THREADS_GET_MAX_SYS); /* want executing threads; FIXME: it seems there is a severe bug with the definition of RSB_NTC */
240 #if RSB_DO_WANT_PSORT_TIMING
241 rsb_time_t dt,pt,st,mt,ct;
242 rsb_time_t tt = - rsb_time();
243 #endif /* RSB_DO_WANT_PSORT_TIMING */
244 rsb_int_t ei = RSB_DO_FLAG_HAS(flags,RSB_FLAG_FORTRAN_INDICES_INTERFACE) ? 1 : 0;
245
246 if(wet==0)
247 {
248 errval = RSB_ERR_GENERIC_ERROR;
249 RSB_PERR_GOTO(ret,"%s\n","No threads detected! Are you sure to have initialized the library?\n"); // RSB_ERRM_FLI
250 }
251
252 if(RSB_MATRIX_UNSUPPORTED_TYPE(typecode))
253 {
254 errval = RSB_ERR_UNSUPPORTED_TYPE;
255 RSB_PERR_GOTO(ret,"\n");
256 }
257
258 if(nnz<2)
259 goto ret;
260
261 if(RSB_MUL_OVERFLOW(sizeof(rsb_nnz_idx_t),(m+2),size_t,rsb_non_overflowing_t)
262 || RSB_MUL_OVERFLOW(sizeof(rsb_nnz_idx_t)*2+el_size,(nnz),size_t,rsb_non_overflowing_t))
263 {
264 errval = RSB_ERR_LIMITS;
265 RSB_PERR_GOTO(err,"sorry, allocating that much memory would cause overflows\n");
266 }
267 PA = rsb__calloc(sizeof(rsb_nnz_idx_t)*(m+2));
268 // WA = rsb__calloc(RSB_MAX(sizeof(rsb_coo_idx_t),el_size)*(nnz+1));
269 // WA = rsb__calloc((2+3*sizeof(rsb_coo_idx_t)+el_size)*nnz);
270 // WA = rsb__calloc((2*sizeof(rsb_coo_idx_t)+el_size)*nnz);
271 WA = rsb__calloc_parallel((sizeof(rsb_coo_idx_t)*2+el_size)*nnz); // NEW 20101201
272
273 if(!PA || !WA)
274 {
275 errval = RSB_ERR_ENOMEM;
276 RSB_PERR_GOTO(err,"after calloc, pa=%p, wa=%p\n",PA,WA);
277 }
278 iWA=((rsb_coo_idx_t*) WA);
279 jWA=((rsb_coo_idx_t*)iWA)+nnz;
280 vWA=((rsb_coo_idx_t*)jWA)+nnz;
281 // nWA=((rsb_char_t*)vWA)+(el_size*nnz);
282
283 /* saving one head element with a trick */
284 ++PA;
285 #if RSB_DO_WANT_PSORT_TIMING
286 dt = rsb_time();
287 mt=-dt;
288 #endif /* RSB_DO_WANT_PSORT_TIMING */
289
290 #if RSB_DO_WANT_PSORT_FASTER_BUT_RISKY
291 # if 1
292 /* FIXME: unfinished and incorrect code */
293 #pragma omp parallel for schedule(static,psc) shared(PA,IA) RSB_NTC
294 for(n=0;n<nnz;++n)
295 PA[IA[n]+1-ei]++;
296 # else
297 /* actually, this code is VERY SLOW :) */
298 #pragma omp parallel reduction(|:errval) shared(PA,IA)
299 {
300 rsb_nnz_idx_t n;
301 rsb_thr_t th_id = omp_get_thread_num();
302 rsb_thr_t tn = omp_get_num_threads();
303
304 for(n=0;RSB_LIKELY(n<nnz);++n)
305 if(IA[n]%tn==th_id)
306 PA[IA[n]+1-ei]++;
307 }
308 #pragma omp barrier
309 #endif
310 #else /* RSB_DO_WANT_PSORT_FASTER_BUT_RISKY */
311 /* setting PA[i] to contain the count of elements on row i */
312 for(n=0;RSB_LIKELY(n<nnz);++n)
313 {
314 RSB_ASSERT(IA[n]>=0);
315 RSB_ASSERT(IA[n]<=m);
316 PA[IA[n]+1-ei]++;
317 #if RSB_DO_WANT_PSORT_VERBOSE>1
318 RSB_INFO("PA[m] = %d\n",PA[m]);
319 RSB_INFO("IA[%d] = %d PA[%d] = %d\n",n,IA[n],IA[n]+1-ei,PA[IA[n]+1-ei]);
320 #endif /* RSB_DO_WANT_PSORT_VERBOSE */
321 }
322 #endif /* RSB_DO_WANT_PSORT_FASTER_BUT_RISKY */
323 /* setting PA[i] to contain the count of elements before row i */
324 for(n=0;RSB_LIKELY(n<m);++n)
325 {
326 PA[n+1] += PA[n];
327 #if RSB_DO_WANT_PSORT_VERBOSE>1
328 RSB_INFO("PA[%d] = %d\n",n,PA[n]);
329 #endif /* RSB_DO_WANT_PSORT_VERBOSE */
330 }
331 #if RSB_DO_WANT_PSORT_VERBOSE>1
332 RSB_INFO("PA[%d] = %d\n",n,PA[n]);
333 #endif /* RSB_DO_WANT_PSORT_VERBOSE */
334
335 #if RSB_DO_WANT_PSORT_TIMING
336 dt = rsb_time();
337 mt+=dt;
338 pt=-dt;
339 #endif /* RSB_DO_WANT_PSORT_TIMING */
340 /* shuffling elements on the basis of their row
341 * FIXME : this is the slowest part of this code
342 * its performance is largely dependent on cache lenghts and latencies.. */
343 /* FIXME : parallelization of this is challenging */
344 rsb_util_do_scatter_rows(vWA,iWA,jWA,VA,IA,JA,PA-ei,nnz,typecode);
345 --PA; /* PA has been modified. */
346 #if RSB_DO_WANT_PSORT_TIMING
347 dt = rsb_time();
348 pt+=dt;
349 st=-dt;
350 #endif /* RSB_DO_WANT_PSORT_TIMING */
351 // RSB_COA_MEMCPY(IA,iWA,0,0,nnz);
352 // RSB_COA_MEMCPY(JA,jWA,0,0,nnz);
353 // RSB_A_MEMCPY(VA,vWA,0,0,nnz,el_size);
354 // SB_COA_MEMCPY_parallel(IA,iWA,0,0,nnz);
355 // RSB_COA_MEMCPY_parallel(JA,jWA,0,0,nnz);
356 // RSB_A_MEMCPY_parallel(VA,vWA,0,0,nnz,el_size);
357 /* restore the row pointers with a trick */
358
359 RSB_ASSERT(PA[m]==nnz);
360
361 /* TODO: parallelization of this ? FIXME: is this necessary ? */
362 for(n=0;n<m;++n)
363 mnzpr = RSB_MAX(mnzpr,PA[n+1]-PA[n]);
364
365 nWA = rsb__malloc(sizeof(rsb_nnz_idx_t)*(mnzpr+2)*wet);/* rsb__malloc is evil inside openmp */
366 if(!nWA) { RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_ENOMEM);RSB_PERR_GOTO(err,RSB_ERRM_ES); }
367
368 psc = RSB_MIN(psc,m);
369 /* the rows are ready to be sorted (FIXME: this is slow, and could be optimized very much) */
370 // #pragma omp parallel for reduction(|:errval)
371 // #pragma omp parallel for
372 // #pragma omp parallel for schedule(static,10)
373 // #pragma omp parallel for schedule(static,1)
374 // #pragma omp parallel for schedule(static,psc) shared(iWA,jWA,vWA,nWA,PA) num_threads(wet)
375 #pragma omp parallel for schedule(static,psc) shared(iWA,jWA,vWA,nWA,PA) RSB_NTC
376 for(n=0;n<m;++n)
377 {
378 rsb_nnz_idx_t nnz1,nnz0;
379 #if 1
380 #if RSB_WANT_OMP_RECURSIVE_KERNELS
381 rsb_thread_t th_id = omp_get_thread_num();
382 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
383 rsb_thread_t th_id=0;
384 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
385 rsb_nnz_idx_t tnoff=th_id*(mnzpr+2);
386 nnz1=PA[n+1];
387 nnz0=PA[n];
388 #if RSB_DO_WANT_PSORT_VERBOSE
389 RSB_INFO("psort row %d/%d: nonzeros [%d .. %d/%d] on thread %d\n",(int)n,m,(int)nnz0,(int)nnz1,(int)nnz,(int)th_id);
390 #endif /* RSB_DO_WANT_PSORT_VERBOSE */
391 if(nnz1-nnz0<2)
392 continue;/* skip empty line. TODO: could implement with macro sorting algorithms for few nnz */
393 if(!RSB_SOME_ERROR(rsb_do_msort_up(nnz1-nnz0,jWA+nnz0,nWA+tnoff)))
394 rsb_ip_reord(nnz1-nnz0,((rsb_char_t*)vWA)+el_size*nnz0,iWA+nnz0,jWA+nnz0,nWA+tnoff,typecode);
395 #else
396 nnz1=PA[n+1];
397 nnz0=PA[n];
398 rsb__do_util_sortcoo(vWA+nnz0,iWA+nnz0,jWA+nnz0,m,k,nnz1-nnz0,typecode,NULL,flags,NULL,0);
399 #endif
400 }
401
402 #if RSB_DO_WANT_PSORT_TIMING
403 dt = rsb_time();
404 st+=dt;
405 ct=-dt;
406 #endif /* RSB_DO_WANT_PSORT_TIMING */
407
408 RSB_COA_MEMCPY_parallel(IA,iWA,0,0,nnz);
409 RSB_COA_MEMCPY_parallel(JA,jWA,0,0,nnz);
410 RSB_A_MEMCPY_parallel(VA,vWA,0,0,nnz,el_size);
411 #if RSB_DO_WANT_PSORT_TIMING
412 dt = rsb_time();
413 ct+=dt;
414 #endif /* RSB_DO_WANT_PSORT_TIMING */
415 err:
416 RSB_CONDITIONAL_FREE(PA);
417 RSB_CONDITIONAL_FREE(WA);
418 RSB_CONDITIONAL_FREE(nWA);
419 #if RSB_DO_WANT_PSORT_TIMING
420 dt = rsb_time();
421 tt+=dt;
422 RSB_INFO("pt:%lg st:%lg tt:%lg mt:%lg ct:%lg\n",pt,st,tt,mt,ct);
423 #endif /* RSB_DO_WANT_PSORT_TIMING */
424 ret:
425 RSB_DO_ERR_RETURN(errval)
426 }
427 /* @endcond */
428