1 /*
2
3 Copyright (C) 2008-2021 Michele Martone
4
5 This file is part of librsb.
6
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20
21 */
22 /* @cond INNERDOC */
23 /*!
24 * @file
25 * @author Michele Martone
26 * @brief System, or standard library related functions.
27 * */
28
29 #include <unistd.h> /* sysconf */
30 #include "rsb_internals.h"
31 #include "rsb.h"
32 #ifdef RSB_HAVE_LIMITS_H
33 #include <limits.h> /* CHAR_BIT */
34 #endif /* RSB_HAVE_LIMITS_H */
35 #include <assert.h> /* assert */
36 #ifdef RSB_HAVE_STDLIB_H
37 #include <stdlib.h> /* posix_memalign */
38 #endif /* RSB_HAVE_STDLIB_H */
39 #ifdef RSB_HAVE_MALLOC_H
40 #include <malloc.h> /* posix_memalign */
41 #endif /* RSB_HAVE_MALLOC_H */
42 #ifdef RSB_HAVE_SYS_SYSTEMCFG_H
43 #include <sys/systemcfg.h> /* for _H_SYSTEMCFG */
44 #endif /* RSB_HAVE_SYS_SYSTEMCFG_H */
45 #ifdef RSB_HAVE_SYS_MMAN_H
46 #if RSB_HAVE_SYS_MMAN_H
47 #include <sys/mman.h> /* for mlockall */
48 #endif /* RSB_HAVE_SYS_MMAN_H */
49 #endif /* RSB_HAVE_SYS_MMAN_H */
50 #if defined(RSB_HAVE_DMALLOC_H) && defined(RSB_WANT_DMALLOC) && (RSB_WANT_DMALLOC!=0)
51 #include <dmalloc.h> /* a debug library */
52 #endif /* defined(RSB_HAVE_DMALLOC_H) && defined(RSB_WANT_DMALLOC) && (RSB_WANT_DMALLOC!=0) */
53 #if defined(RSB_HAVE_DUMA_H) && defined(RSB_WANT_DUMA) && (RSB_WANT_DUMA!=0)
54 #include <duma.h> /* a debug library */
55 #endif /* defined(RSB_HAVE_DUMA_H) && defined(RSB_WANT_DUMA) && (RSB_WANT_DUMA!=0) */
56 /* #include <stdio.h> */ /* fileno */
57 #if RSB_WITH_HWLOC
58 #include <hwloc.h>
59 #endif /* RSB_WITH_HWLOC */
60
61 /* set the following to 0 to get some real fun */
62 #define RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION 0
63 #if RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION
64 /* FIXME: need a true random number generator interface */
65 #define RSB_SHOULD_RANDOMLY_FAIL (rsb__rand_coo_index(1349)==42)
66 #else /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
67 #define RSB_SHOULD_RANDOMLY_FAIL 0
68 #endif /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
69
70 #define RSB_SUSPECT_ALLOCATION_SIZE 1024*1024*16 /* i.e. of notable size. */
71 #define RSB_MEM_DEBUG 0 /* verbosity */
72 #define RSB_MEM_DEBUG_REALLOC 0 /* verbosity */
73 #define RSB_CHEAP_DEBUG 0 /* cheap extra checks on suspect memory wrapper related values */
74 #define RSB_DEBUG_MARKER_AFTER_FREE 0 /* double free protection */
75 #define RSB_DEBUG_SHRED_AFTER_FREE 0 /* protection against of re-use of freed areas */
76 #define RSB_SHRED_BYTE /* 0xFF */ 0xF0 /* byte value to use when shredding memory */
77 #define RSB_SHRED_WORD ( RSB_SHRED_BYTE | ( RSB_SHRED_BYTE<<8 ) | ( RSB_SHRED_BYTE<<16 ) | ( RSB_SHRED_BYTE<<24 ) )
78 #define RSB_FREE_MARKER 0xDEADBEEF /* ( 3735928559) */ /* a marker for detecting double free */
79 #define RSB_OVW_MARKER 0xBEEFBABE /* (-1091585346) */ /* a marker for detecting accidental overwrites */
80 #define RSB_MW_ODMO ( 0) /* memory wrapper overwrite detection marker offset (set 0 to deactivate, set to a non-*MW* overlapping value to activate) */
81 #define RSB_MW_SHMO (-1) /* memory wrapper shift marker offset */
82 #define RSB_MW_SZMO (-2) /* memory wrapper size marker offset */
83 #define RSB_MW_ESLC (-RSB_MIN(RSB_MIN(RSB_MW_ODMO,RSB_MW_ODMO),RSB_MIN(RSB_MW_SZMO,RSB_MW_SHMO))) /* memory wrapper extra 'sizeof' locations count */
84
85 #define RSB_MD_ASSERT RSB_ASSERT /* memory debug assert macro */
86
87 #if RSB_DEBUG_SHRED_AFTER_FREE
88 #include <string.h> /* memset */
89 #endif
90
91 #ifdef RSB_HAVE_SYS_RESOURCE_H
92 #define RSB_USE_RUSAGE 1
93 #else
94 #define RSB_USE_RUSAGE 0
95 #endif
96
97 #if RSB_USE_RUSAGE
98 #include <sys/resource.h> /* getrusage */
99 #endif
100
101 RSB_INTERNALS_COMMON_HEAD_DECLS
102
103 #if RSB_WANT_ALLOCATOR_LIMITS
104 #define RSB_ALLOC_MEMAAA_LIMIT rsb_global_session_handle.memory_count_max
105 #define RSB_ALLOC_MEMAAC_LIMIT rsb_global_session_handle.allocations_count_max
106 #define RSB_ALLOC_LIMITS_TRESPASSED(AAA,AAC) RSB_UNLIKELY( ( ( RSB_ALLOC_MEMAAA_LIMIT > 0 && rsb_global_session_handle.allocated_memory+(AAA) >= RSB_ALLOC_MEMAAA_LIMIT ) || ( RSB_ALLOC_MEMAAC_LIMIT > 0 && rsb_global_session_handle.allocations_count+(AAC) > RSB_ALLOC_MEMAAC_LIMIT ) ) ? 1 : 0 )
107 #else /* RSB_WANT_ALLOCATOR_LIMITS */
108 #define RSB_ALLOC_LIMITS_TRESPASSED(AAA,AAC) 0
109 #endif /* RSB_WANT_ALLOCATOR_LIMITS */
110 #define RSB_ZERO_BYTE_ALLOC_CHECK 0
111
112 #define RSB_TIME_SET_THREADS 0
113
rsb__g_rsb_memory_counter_init(void)114 void rsb__g_rsb_memory_counter_init(void)
115 {
116 /*!
117 * \ingroup gr_internals
118 *
119 * Memory counter reset.
120 */
121 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
122 rsb_global_session_handle.allocated_memory=0;
123 rsb_global_session_handle.allocations_count=0;
124 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
125 }
126
rsb__get_g_rsb_memory_count(void)127 size_t rsb__get_g_rsb_memory_count(void)
128 {
129 /*!
130 * \ingroup gr_internals
131 *
132 * A mere accessor function.
133 */
134 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
135 return rsb_global_session_handle.allocated_memory;
136 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
137 return 0;
138 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
139 }
140
rsb__get_g_rsb_allocations_count(void)141 size_t rsb__get_g_rsb_allocations_count(void)
142 {
143 /*!
144 * \ingroup gr_internals
145 *
146 * A mere accessor function.
147 */
148 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
149 return rsb_global_session_handle.allocations_count;
150 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
151 return 0;
152 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
153 }
154
155 #if 1
156
rsb_aligned_free(void * p)157 static void * rsb_aligned_free(void *p)
158 {
159 /*!
160 * \ingroup gr_internals
161 *
162 * \param p a generic pointer allocated with rsb__aligned_malloc
163 * \return the input pointer, in case of correct operation, NULL in case of error
164 *
165 * frees a memory area previously allocated with rsb__aligned_malloc,
166 * and returns the pointer in case of successfull free,
167 * or NULL in case of suspect error.
168 * */
169 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
170 size_t size;
171 size_t shift;
172 if( p == NULL )
173 return p;
174 #if RSB_DEBUG_SHRED_AFTER_FREE
175 if( (((rsb_int_t)p) & RSB_SHRED_WORD ) == RSB_SHRED_WORD ) /* shred-area read pointer detection */
176 {
177 RSB_STDERR("Warning: it is likely that pointer %p is invalid and was read from a previously freed area. Expect a crash now.\n",p);
178 RSB_MD_ASSERT(0);
179 }
180 #endif /* RSB_DEBUG_SHRED_AFTER_FREE */
181 size = ((size_t*)p)[RSB_MW_SZMO];
182 #if RSB_MEM_DEBUG
183 RSB_STDERR("freeing %zu bytes at %p (in hex:0x%0zx bytes)\n",size,p,size);
184 #endif
185 shift = ((size_t*)p)[RSB_MW_SHMO];
186 #if RSB_DEBUG_MARKER_AFTER_FREE
187 if(size == RSB_FREE_MARKER || shift == RSB_FREE_MARKER)
188 {
189 RSB_STDERR("Warning: it is almost certain that memory at %p has been already deallocated! Expect a crash now.\n",p);
190 RSB_MD_ASSERT(0);
191 }
192 ((size_t*)p)[RSB_MW_SZMO] = RSB_FREE_MARKER;
193 ((size_t*)p)[RSB_MW_SHMO] = RSB_FREE_MARKER;
194 #endif /* RSB_DEBUG_MARKER_AFTER_FREE */
195 if( RSB_MW_ODMO && ((size_t*)p)[RSB_MW_ODMO] != RSB_OVW_MARKER )
196 {
197 RSB_STDERR("Warning: memory at %p has been overwritten (marker value is %x instead of 0x%0zx) ! Expect crashes.\n",p,((size_t*)p)[RSB_MW_ODMO],RSB_OVW_MARKER );
198 RSB_MD_ASSERT(0);
199 }
200 #if RSB_DEBUG_SHRED_AFTER_FREE
201 if( ( size >= sizeof(rsb_int_t) ) && ( *(rsb_int_t*)p == RSB_SHRED_WORD ) ) /* shredded area re-free detection */
202 {
203 RSB_STDERR("Warning: it is possible that %zd-byte area at %p was recently freed (points to {0x%x, ...). May expect a crash now.\n",size,p,*(rsb_int_t*)p);
204 /* RSB_MD_ASSERT(0); */
205 }
206 memset(p, RSB_SHRED_BYTE, size);
207 #endif /* RSB_DEBUG_SHRED_AFTER_FREE */
208 p = (( char *)p)-(shift);
209 p = ((size_t*)p)-RSB_MW_ESLC; /* we make room for the markers */
210 #pragma omp atomic
211 rsb_global_session_handle.allocated_memory -= size;
212 #pragma omp atomic
213 rsb_global_session_handle.allocations_count--;
214 free(p);
215 return p;
216 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
217 free(p);
218 return p;
219 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
220 }
221
rsb__realloc(void * rsb_data,size_t size)222 void * rsb__realloc(void *rsb_data, size_t size)
223 {
224 #ifdef RSB_WANT_DOUBLE_ALIGNED
225 return rsb__do_realloc(rsb_data,size,sizeof(double)*2);
226 #else /* RSB_WANT_DOUBLE_ALIGNED */
227 return rsb__do_realloc(rsb_data,size,1);
228 #endif /* RSB_WANT_DOUBLE_ALIGNED */
229 }
230
rsb__do_realloc(void * rsb_data,size_t size,size_t alignment)231 void * rsb__do_realloc(void *rsb_data, size_t size, size_t alignment)
232 {
233 void * p = rsb_data;
234 size_t extra = 0;
235 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
236 size_t osize;
237 size_t shift;
238 extra = sizeof(size_t)*RSB_MW_ESLC+alignment;
239
240 if(p==NULL)
241 return p;
242 osize = ((size_t*)p)[RSB_MW_SZMO];
243 #if ( RSB_MEM_DEBUG || RSB_MEM_DEBUG_REALLOC )
244 RSB_STDERR("reallocating from %zu to %zu bytes (%+zd) at 0x%p (in hex: to 0x%0zx bytes)\n",osize,size,(size-osize),p,size);
245 #endif
246 shift = ((size_t*)p)[RSB_MW_SHMO];
247 p = (( char *)p)-(shift);
248 p = ((size_t*)p)-RSB_MW_ESLC; /* we make room for the markers */
249 /* no free was performed, since extra>0 */
250 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
251 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
252 if(size==0) /* a free shall be performed */
253 extra=0;
254
255 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
256 if(size==0 && p) /* a free shall be performed */
257 rsb_global_session_handle.allocations_count--;
258
259 if(size>osize)
260 #pragma omp atomic
261 rsb_global_session_handle.allocated_memory+=size-osize;
262 else
263 if(p) /* a free shall be performed */
264 #pragma omp atomic
265 rsb_global_session_handle.allocated_memory-=osize-size;
266 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
267 p = realloc(p,size+extra);/* if freeing, either p or NULL will be returned */
268
269 if(!p)
270 return p;/* failure */
271
272 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
273 /*!
274 * \ingroup gr_internals
275 * TODO : no way to make survive the alignment ?
276 * */
277 #if 1
278 /* restoring back allocation info (and potentially losing alignment, because we have to keep the same shift!) */
279 p = ((size_t*)p)+RSB_MW_ESLC; /* we make room for markers */
280 p = (( char *)p)+(shift);
281 /* to restore alignment, should perform a memmove */
282 ((size_t*)p)[RSB_MW_SHMO] = shift;
283 ((size_t*)p)[RSB_MW_SZMO] = size;
284 if( RSB_MW_ODMO ) ((size_t*)p)[RSB_MW_ODMO] = RSB_OVW_MARKER;
285 #else
286 /* bugful way */
287
288 {
289 size_t off;
290 off=((size_t)(((size_t*)p)+2))%(alignment); /* to the return address from ((size_t*)p)+2 */
291 shift = (alignment-off);
292 p = ((size_t*)p)+2; /* we make room for two markers */
293 p = (( char *)p)+(shift);
294 ((size_t*)p)[RSB_MW_SHMO] = shift;
295 ((size_t*)p)[RSB_MW_SZMO] = size;
296 if( RSB_MW_ODMO ) ((size_t*)p)[RSB_MW_ODMO] = RSB_OVW_MARKER;
297 }
298 #endif
299 return p;
300 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
301 return p; /* success */
302 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
303 return NULL;
304 }
305
rsb__aligned_malloc(size_t size,size_t alignment)306 void * rsb__aligned_malloc(size_t size, size_t alignment)
307 {
308 /*!
309 * \ingroup gr_internals
310 *
311 * allocates size bytes and an integer, in a way to keep track of the allocated chunk size
312 * \param size is the amount of needed bytes to allocate
313 *
314 * the returned area will be alignment bytes aligned.
315 * this area should be deallocated with rsb_aligned_free.
316 *
317 * note that although the address will be aligned as asked,
318 * there will be 100% alignment and contiguity guarantee only
319 * on machines with no virtual memory (on linux there seems not to be a user space contiguos
320 * memory allocator like kernel's vmalloc).
321 *
322 * in case of lack of all of RSB_DISABLE_ALLOCATOR_WRAPPER, RSB_HAVE_POSIX_MEMALIGN and RSB_HAVE_MEMALIGN, malloc() will be used.
323 * */
324 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
325 void * p;
326 size_t extra = sizeof(size_t)*RSB_MW_ESLC+alignment;
327 size_t off;
328 size_t shift;
329
330 if(RSB_ALLOC_LIMITS_TRESPASSED(size+extra,1))
331 {
332 rsb__print_memory_allocation_info();
333 return NULL;
334 }
335
336 #if RSB_ZERO_BYTE_ALLOC_CHECK
337 if(size == 0 && extra == 0)
338 {
339 RSB_ERROR(RSB_ERRM_ZSM);
340 }
341 #endif
342 p = malloc( size + extra );
343 if(!p)
344 return p;/* failure */
345 RSB_DEBUG_ASSERT(rsb_global_session_handle.allocated_memory <=(rsb_global_session_handle.allocated_memory+size ));
346 /* the following could trigger during very very long/big runs, so .. */
347 RSB_DEBUG_ASSERT((rsb_global_session_handle.allocations_count < (rsb_global_session_handle.allocations_count+1))
348 /* ... this line should fix that cases */
349 || (rsb_global_session_handle.allocations_count+1)==0);
350
351 #pragma omp atomic
352 rsb_global_session_handle.allocated_memory+=size;
353 #pragma omp atomic
354 rsb_global_session_handle.allocations_count++;
355 off=((size_t)(((size_t*)p)+RSB_MW_ESLC))%(alignment); /* to the return address from ((size_t*)p)+RSB_MW_ESLC */
356 shift = (alignment-off);
357 p = ((size_t*)p)+RSB_MW_ESLC; /* we make room for the markers */
358 p = (( char *)p)+(shift);
359 ((size_t*)p)[RSB_MW_SHMO] = shift;
360 ((size_t*)p)[RSB_MW_SZMO] = size;
361 if( RSB_MW_ODMO ) ((size_t*)p)[RSB_MW_ODMO] = RSB_OVW_MARKER;
362 return p;
363 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
364 #if RSB_HAVE_POSIX_MEMALIGN
365 void * p = NULL;
366 size_t ca = sizeof(void*); /* corrected alignment */
367 while(ca<alignment)
368 ca*=2;
369 alignment = ca; /* "The address of the allocated memory will be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *)." */
370 if(posix_memalign(&p, alignment, size))
371 return p; /* failure */
372 else
373 return p; /* success */
374 #elif RSB_HAVE_MEMALIGN
375 return memalign( alignment, size);
376 #else
377 return malloc(size); /* no platform support for aligned alloc */
378 #endif
379 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
380 }
381
rsb__free(void * p)382 void * rsb__free(void *p)
383 {
384 /*!
385 * \ingroup gr_internals
386 *
387 * see rsb_aligned_free
388 * */
389 #if RSB_MEM_DEBUG
390 RSB_STDERR("freeing %p\n",p);
391 #endif
392 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
393 return rsb_aligned_free(p);
394 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
395 free(p);
396 return p;
397 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
398 }
399
rsb__malloc(size_t size)400 void * rsb__malloc(size_t size)
401 {
402 /*!
403 * \ingroup gr_internals
404 *
405 * see rsb__aligned_malloc
406 * */
407 void * data=NULL;
408
409 #if RSB_ZERO_BYTE_ALLOC_CHECK
410 if(size == 0)
411 {
412 RSB_ERROR(RSB_ERRM_ZSM);
413 }
414 #endif
415 if(size >= RSB_MAX_ALLOCATABLE_MEMORY_CHUNK)
416 {
417 #if RSB_MEM_DEBUG
418 RSB_STDERR("cannot allocate %zu bytes since it is more than the maximum allowed %zu\n",size,RSB_MAX_ALLOCATABLE_MEMORY_CHUNK);
419 #endif
420 return data;
421 }
422 #ifdef RSB_WANT_DOUBLE_ALIGNED
423 data = rsb__aligned_malloc(size,sizeof(double)*2);
424 #else /* RSB_WANT_DOUBLE_ALIGNED */
425 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
426 data = rsb__aligned_malloc(size,1);
427 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
428 data = malloc(size);
429 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
430 #endif /* RSB_WANT_DOUBLE_ALIGNED */
431 #if RSB_DEBUG_SHRED_AFTER_FREE
432 /* prevent previously shredded and freed, and now re-allocated *data to cause false alarm if unused before next free */
433 if(data && size>0)
434 memset(data, ~RSB_SHRED_BYTE, size);
435 #endif /* RSB_DEBUG_SHRED_AFTER_FREE */
436 #if RSB_MEM_DEBUG
437 RSB_STDERR("allocated %zu bytes to %p (in hex:0x%0zx bytes)\n",size,data,size);
438 #endif /* RSB_MEM_DEBUG */
439
440 #if RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION
441 if(data)if(RSB_SHOULD_RANDOMLY_FAIL){rsb__free(data);data=NULL;}
442 #endif /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
443 return data;
444 }
445
446 #else
447 /* BEGIN OF DEAD CODE */
rsb__free(void * rsb_data)448 void * rsb__free(void *rsb_data)
449 {
450 /*!
451 * TODO: DELETE THIS DEAD CODE
452 *
453 * \param rsb_data a generic pointer allocated with rsb__malloc
454 * \return the input pointer, in case of correct operation, NULL in case of error
455 *
456 * deletes a memory area previously allocated with rsb__malloc,
457 * and returns the pointer in case of successfull free,
458 * or NULL in case of suspect error.
459 * */
460 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
461 size_t size;
462 if(!rsb_data)return rsb_data;
463
464 #ifdef RSB_WANT_DOUBLE_ALIGNED
465 /*!
466 * This is a trick : we take the offset written in the byte just
467 * behind the allocated area and use it to restore the original
468 * [m|c]alloc-ated area.
469 * */
470 size_t off = ((unsigned rsb_byte_t*)rsb_data)[RSB_MW_SHMO];
471 /* we decode back the whole allocated area size */
472 size=((size_t*)((unsigned rsb_byte_t*)(rsb_data)-(0x10-off)))[RSB_MW_SHMO];
473 /*
474 RSB_STDERR("freeing address after offset %d ... \n",off);
475 RSB_STDERR("freeing %d bytes ... \n",size);
476 */
477 #else /* RSB_WANT_DOUBLE_ALIGNED */
478 /* we decode back the whole allocated area size */
479 size=*(((size_t*)(rsb_data))RSB_MW_SHMO);
480 #endif /* RSB_WANT_DOUBLE_ALIGNED */
481
482 #if RSB_CHEAP_DEBUG
483 if( size < 0 || size > RSB_SUSPECT_ALLOCATION_SIZE )/* this is a BAD sign and a warning should be issued */
484 {
485 RSB_ERROR("WARNING : pointer x%08x[%d] contains (has been overwritten with ?) a suspect value : %08x==%d", rsb_data,RSB_MW_SHMO,size,size );
486 gaa
487 return NULL;
488 }
489 #endif /* RSB_CHEAP_DEBUG */
490 /* We update the global memory bookkeeping counter */
491 #pragma omp atomic
492 rsb_global_session_handle.allocated_memory-=size;
493 #pragma omp atomic
494 rsb_global_session_handle.allocations_count--;
495 #ifdef RSB_WANT_DOUBLE_ALIGNED
496 /* We use the offset to restore the original pointer to free. */
497 free(( size_t*)((unsigned rsb_byte_t*)(rsb_data)-(0x10-off))RSB_MW_SHMO);
498 #else /* RSB_WANT_DOUBLE_ALIGNED */
499 free((((size_t*)(rsb_data))RSB_MW_SHMO));
500 #endif /* RSB_WANT_DOUBLE_ALIGNED */
501 return rsb_data;
502 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
503 /* RSB_DISABLE_ALLOCATOR_WRAPPER undefined */
504 #ifdef RSB_WANT_DOUBLE_ALIGNED
505 return free(rsb_data);
506 #else /* RSB_WANT_DOUBLE_ALIGNED */
507 return free(rsb_data);
508 #endif /* RSB_WANT_DOUBLE_ALIGNED */
509 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
510 }
511
rsb__malloc(size_t size)512 void * rsb__malloc(size_t size)
513 {
514 /*!
515 * TODO: DELETE THIS DEAD CODE
516 *
517 * (c)allocates size bytes and an integer, in a way to keep track of the allocated chunk size
518 * \param size is the amount of needed bytes to allocate
519 *
520 * if RSB_WANT_DOUBLE_ALIGNED is defined, the returned area will be double (64 bits) aligned.
521 * this area should be deallocated with rsb__free.
522 * */
523 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
524 void * p;
525 #ifdef RSB_HAVE_POSIX_MEMALIGN
526 /* we could integrate/replace with posix_memalign, memalign or continue using our custom code */
527 #endif /* RSB_HAVE_POSIX_MEMALIGN */
528 #ifdef RSB_WANT_DOUBLE_ALIGNED
529 /*
530 * This is an explicit trick to give the user a double aligned memory area.
531 * To achieve this, we allocate one extra double element, and a byte (four, really, for congruency reasons)
532 * and write in one there information on the shift amount.
533 *
534 * Of course; we count that sizeof(size_t)>sizeof(char).
535 * */
536 size_t extra = sizeof(double)+sizeof(size_t)*2;
537 #else /* RSB_WANT_DOUBLE_ALIGNED */
538 /*
539 * We allocate one size_t element for storing allocation information (for explicit memory leaking checking).
540 * */
541 size_t extra = sizeof(size_t);
542 #endif /* RSB_WANT_DOUBLE_ALIGNED */
543 if(RSB_ALLOC_LIMITS_TRESPASSED(size,1))
544 {rsb__print_memory_allocation_info(); return NULL;}
545 p = calloc( size + extra, 1 );
546 if(!p)return p;
547 *(size_t*)p=size;/* note : not size + extra */
548 #pragma omp atomic
549 rsb_global_session_handle.allocated_memory+=size;
550 #pragma omp atomic
551 rsb_global_session_handle.allocations_count++;
552 #ifdef RSB_WANT_DOUBLE_ALIGNED
553 /*
554 * WARNING : We determine the current 64 bits p alignment, so we are interested in the last 5 bits,
555 * really.
556 * DANGER : is this portable ?
557 * */
558 size_t off=(((size_t)p)+sizeof(size_t))&0xF; /* can be 0 ... F */
559 /*
560 RSB_STDERR("allocated totally %d bytes \n",size+extra);
561 RSB_STDERR("allocated %d ... \n",size);
562 RSB_STDERR("allocation offset %d ... \n",off);
563 */
564 ((unsigned rsb_byte_t*)(p))[sizeof(size_t)+((0x10-off)RSB_MW_SHMO)]=(unsigned char)off;/* will be used to compute back the base pointer allocated */
565 return ((unsigned rsb_byte_t*)p)+(0x10-off)+sizeof(size_t);
566 #else /* RSB_WANT_DOUBLE_ALIGNED */
567 return ((size_t*)p)+1;
568 #endif /* RSB_WANT_DOUBLE_ALIGNED */
569 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
570 /* RSB_DISABLE_ALLOCATOR_WRAPPER undefined */
571 #ifdef RSB_WANT_DOUBLE_ALIGNED
572 return calloc(size,1);
573 #else /* RSB_WANT_DOUBLE_ALIGNED */
574 return calloc(size,1);
575 #endif /* RSB_WANT_DOUBLE_ALIGNED */
576 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
577 }
578 /* END OF DEAD CODE */
579 #endif /* 1 */
580
rsb_do_time(void)581 rsb_time_t rsb_do_time(void)
582 {
583 /*!
584 \ingroup gr_internals
585
586 Returns a current relative time in seconds.
587 The user should rely on this function only for time difference computations.
588 */
589 #if RSB_WANT_OMP_RECURSIVE_KERNELS
590 /* return omp_get_wtime(); */ /* for future use */
591 #endif
592 /* SVr4, 4.3BSD. POSIX.1-2001 */
593 /* ( could also use psb_wtime or mpi_wtime )*/
594 /* FIXME : gettimeofday() gives pessimistic estimates ! */
595 /* FIXME : gettimeofday() could be in time.h or sys/times.h */
596 /* FIXME : timer sanity is of paramount inportance ! Should check for its sanity at startup! */
597 #if defined(RSB_HAVE_GETTIMEOFDAY)
598 register double t = RSB_REAL_ZERO;
599 struct timeval tv1;
600 gettimeofday(&tv1, NULL);
601 t = (double)(tv1.tv_sec) + ((double)(tv1.tv_usec))*1.e-6;
602 return t;
603 #elif defined(RSB_HAVE_TIMES) && defined(RSB_HAVE_SYSCONF) && defined(_SC_CLK_TCK)
604 /* POSIX.1 */
605 struct tms buffer;
606 times(&buffer);
607 return ( (rsb_time_t) ((clock_t)buffer.tms_utime) ) / ( (rsb_time_t)sysconf(_SC_CLK_TCK) );
608 #else /* defined(RSB_HAVE_TIMES) && defined(RSB_HAVE_SYSCONF) && defined(_SC_CLK_TCK) */
609 #error("You should better find timing routine, dude.\n")
610 return -1;/* this is bad */
611 #endif /* defined(RSB_HAVE_TIMES) && defined(RSB_HAVE_SYSCONF) && defined(_SC_CLK_TCK) */
612 }
613
rsb__timer_sanity(void)614 rsb_time_t rsb__timer_sanity(void)
615 {
616 /*!
617 \ingroup gr_internals
618
619 Could the timer lead to negative time intervals ? (we found it can happen, sadly)
620 \return the minimum interval length after a bunch of timer calls
621
622 TODO : could we do something about this ?
623 */
624 rsb_time_t md,d;
625 int i;
626
627 md = - rsb_time();
628 md += rsb_time();
629 for(i=0;i<RSB_TIMER_SANITY_TEST_TIMES;++i)
630 {
631 d = - rsb_time();
632 d += rsb_time();
633 md=d<md?d:md;
634 }
635 return md;
636 }
637
rsb__timer_granularity(void)638 rsb_time_t rsb__timer_granularity(void)
639 {
640 /*!
641 \ingroup gr_internals
642
643 * Tries to estimate the granularity of the timing function (that is, its overhead) by measuring it.
644 * This value will be important when estimating minimal times for various self benchmarking operations.
645 *
646 * A test measuring the average and deviation of this parameter may be also useful.
647 * It does not guarantee return with a broken timing function.
648 * */
649 register double t = RSB_TIME_ZERO, t0 = RSB_TIME_ZERO;
650 int times = RSB_TIMER_GRANULARITY_TEST_TIMES;
651 register int i = times;
652
653 #if 0
654 i = times;
655 /* results in the following two code snippets differ. it should be due to numerical roundoff. */
656 while(i--)
657 {
658 t -= rsb_time();
659 /* no op, only call overhead */
660 t += rsb_time();
661 }
662 return t/(times*2);
663 #else
664 /* this is more accurate (in particular: slower) but could be optimized out without an accumulator cookie (FIXME) */
665 t0 = rsb_time();
666 --i;
667 t = -t0;
668
669 while(i--)
670 {
671 /* no op, only call overhead */
672 rsb_time();
673 rsb_time();
674 }
675
676 t += rsb_time();
677
678 t = t/(times*2);
679
680 if(t <= RSB_TIME_ZERO)
681 goto so_fast;
682 else
683 goto ret;
684 so_fast: /* FIXME: No guarantee of return with a broken timing function. */
685 while( ( t = rsb_time() ) <= t0)
686 ;
687 t -= t0;
688 ret:
689 return t;
690 #endif
691 }
692
rsb__print_memory_allocation_info(void)693 rsb_err_t rsb__print_memory_allocation_info(void)
694 {
695 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
696 /*!
697 \ingroup gr_internals
698
699 * A global memory counter, used for debugging purposes.
700 * */
701 #if RSB_ALLOW_STDOUT
702 RSB_STDOUT("rsb_global_session_handle.allocated_memory \t:%zu\n",(rsb_printf_int_t)rsb_global_session_handle.allocated_memory);
703 RSB_STDOUT("rsb_global_session_handle.allocations_count \t:%zu\n",(rsb_printf_int_t)rsb_global_session_handle.allocations_count);
704 return RSB_ERR_NO_ERROR;
705 #endif /* RSB_ALLOW_STDOUT */
706 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
707 return RSB_ERR_UNSUPPORTED_FEATURE;
708 }
709
rsb__calloc(size_t n)710 void * rsb__calloc(size_t n)
711 {
712 /*!
713 * \ingroup gr_internals
714 * Allocates an amount of n bytes set to zero.
715 *
716 * \param n is the amount of bytes to allocates
717 * \return the newly allocated area.
718 *
719 * This memory area should be freed with rsb__free.
720 * */
721 void * p = rsb__malloc(n);
722 if(p)
723 RSB_BZERO(p,n);
724 #if(!RSB_QUIET_MEM_ERRORS)
725 /* TODO : message diagnostics should be optable out, or debug levels-based */
726 else
727 {
728 RSB_ERROR("cannot allocate %zu bytes!\n",n);
729 rsb__print_memory_allocation_info();
730 }
731 #endif /* (!RSB_QUIET_MEM_ERRORS) */
732 /* should be ((int*)p)[RSB_MW_SHMO]==n */
733 return p;
734 }
735
rsb__calloc_parallel(size_t n)736 void * rsb__calloc_parallel(size_t n)
737 {
738 /*!
739 * \ingroup gr_internals
740 */
741 void *p = rsb__calloc(n);
742
743 if(p)
744 RSB_BZERO_parallel(p,n);
745 return p;
746 }
747
rsb__error(const char * format,...)748 int rsb__error(const char * format, ...)
749 {
750 /*!
751 * \ingroup gr_internals
752 *
753 * \param as the first printf argument.
754 * \return an error code or 0
755 *
756 * For now, a wrapper around printf.
757 * It will print given arguments on stdout.
758 * */
759 va_list ap;
760 int rc=0;
761
762 va_start(ap,format);
763 #ifndef RSB_QUIET
764 rc = vprintf(format,ap);
765 #endif /* RSB_QUIET */
766 va_end(ap);
767 return rc;
768 }
769
rsb__get_lnc_size(int n)770 long rsb__get_lnc_size(int n)
771 {
772 /*!
773 * \ingroup gr_internals
774 *
775 * \returns the nth level data cache size if > 0, -1 on error, 0 if no such level cache
776 * Cache levels start from 1.
777 * */
778 long cs=0;
779
780 if(rsb_global_session_handle.memory_hierarchy_levels>0)
781 return rsb_global_session_handle.caches[n].size;
782
783 switch(n)
784 {
785 case 1:
786 {
787 #ifdef RSB_HAVE_SYSCONF
788 #ifdef _SC_LEVEL1_DCACHE_SIZE
789 cs=sysconf(_SC_LEVEL1_DCACHE_SIZE);
790 #endif /* _SC_LEVEL1_DCACHE_SIZE */
791 #endif /* RSB_HAVE_SYSCONF */
792 #ifdef _H_SYSTEMCFG
793 cs=_system_configuration.dcache_size;
794 #endif /* _H_SYSTEMCFG */
795 if(cs == 0) cs = rsb__get_lnc_size_hwloc(n);
796 }
797 break;
798 case 2:
799 {
800 #ifdef RSB_HAVE_SYSCONF
801 #ifdef _SC_LEVEL2_CACHE_SIZE
802 cs=sysconf(_SC_LEVEL2_CACHE_SIZE);
803 #endif /* _SC_LEVEL2_CACHE_SIZE */
804 #endif /* RSB_HAVE_SYSCONF */
805 #ifdef _H_SYSTEMCFG
806 cs=_system_configuration.L2_cache_size;
807 #endif /* _H_SYSTEMCFG */
808 if(cs == 0) cs = rsb__get_lnc_size_hwloc(n);
809 }
810 break;
811 case 3:
812 {
813 #ifdef RSB_HAVE_SYSCONF
814 #ifdef _SC_LEVEL3_CACHE_SIZE
815 cs=sysconf(_SC_LEVEL3_CACHE_SIZE);
816 #endif /* _SC_LEVEL3_CACHE_SIZE */
817 #endif /* RSB_HAVE_SYSCONF */
818 #ifdef _H_SYSTEMCFG
819 // cs=_system_configuration.L3_cache_size; // Does not exist :(
820 #endif /* _H_SYSTEMCFG */
821 if(cs == 0) cs = rsb__get_lnc_size_hwloc(n);
822 }
823 break;
824 default :
825 /* For now, we don't handle more cache levels */
826 cs=-1;
827 }
828 cs=cs<0?0:cs;
829 return cs;
830 }
831
rsb__get_l1c_size(void)832 long rsb__get_l1c_size(void)
833 {
834 /*!
835 * \ingroup gr_internals
836 *
837 * \return the first level data cache size.
838 * \note see rsb__get_lnc_size
839 * */
840 return rsb__get_lnc_size(1);
841 }
842
rsb__get_l2c_size(void)843 long rsb__get_l2c_size(void)
844 {
845 /*!
846 * \ingroup gr_internals
847 *
848 * \return the second level data cache size.
849 * \note see rsb__get_lnc_size
850 * */
851 return rsb__get_lnc_size(2);
852 }
853
rsb__get_l3c_size(void)854 long rsb__get_l3c_size(void)
855 {
856 /*!
857 * \ingroup gr_internals
858 *
859 * \return the third level data cache size.
860 * \note see rsb__get_lnc_size
861 * */
862 return rsb__get_lnc_size(3);
863 }
864
rsb__get_l4c_size(void)865 long rsb__get_l4c_size(void)
866 {
867 /*!
868 * \ingroup gr_internals
869 *
870 * \return the fourth level data cache size.
871 * \note see rsb__get_lnc_size
872 * */
873 return rsb__get_lnc_size(4);
874 }
875
rsb__know_cache_sizes(void)876 long rsb__know_cache_sizes(void)
877 {
878 /*!
879 * \ingroup gr_internals
880 *
881 * \return do we know the caches sizes ?
882 * */
883 return rsb__get_cache_levels_num() > 0;
884 }
885
rsb__get_first_level_c_size(void)886 long rsb__get_first_level_c_size(void)
887 {
888 /*!
889 * \ingroup gr_internals
890 *
891 * \return the first level data cache size
892 * or zero if not known or not available.
893 * */
894 rsb_int_t cln = rsb__get_cache_levels_num();
895
896 if(rsb_global_session_handle.memory_hierarchy_levels>0)
897 return rsb_global_session_handle.caches[1].size;
898
899 if(cln>0)
900 return rsb__get_lnc_size(1);
901 else
902 return 0;
903 }
904
rsb__get_lastlevel_c_size(void)905 long rsb__get_lastlevel_c_size(void)
906 {
907 /*!
908 * \ingroup gr_internals
909 *
910 * \return the last level data cache size
911 * or zero if not known or not available.
912 * */
913 rsb_int_t cln = rsb__get_cache_levels_num();
914
915 if(rsb_global_session_handle.memory_hierarchy_levels>0)
916 return rsb_global_session_handle.caches[rsb_global_session_handle.memory_hierarchy_levels].size;
917
918 if(cln>0)
919 return rsb__get_lnc_size(cln);
920 else
921 return 0;
922 }
923
rsb__get_cache_block_byte_size(void)924 long rsb__get_cache_block_byte_size(void)
925 {
926 /*!
927 * \ingroup gr_internals
928 * */
929 long flc = RSB_MIN(RSB_MAX(1,rsb_global_session_handle.memory_hierarchy_levels),2);
930 long cbs = RSB_MIN(rsb__get_lnc_size(flc),rsb__get_lastlevel_c_size_per_thread());
931
932 switch(rsb_global_session_handle.cache_blocking_method)
933 {
934 case -1: return cbs/2; break;
935 case 1: return cbs*2; break;
936 default:
937 case 0: return cbs;
938 }
939 }
940
rsb_want_executing_threads(void)941 static long rsb_want_executing_threads(void)
942 {
943 /*!
944 \ingroup gr_internals
945 Will always return a value 1 <= N <= RSB_CONST_MAX_SUPPORTED_CORES
946 FIXME: make so that
947 rsb_want_executing_threads() == rsb_set_executing_threads()
948 or rather write
949 rsb__set_num_threads(RSB_THREADS_GET)
950 */
951 long wt = 1;
952
953 #if RSB_WANT_OMP_RECURSIVE_KERNELS
954 wt = rsb_global_session_handle.rsb_want_threads;
955 if(wt<RSB_CONST_MIN_SUPPORTED_CORES)
956 return RSB_CONST_MIN_SUPPORTED_CORES;
957 wt = RSB_MIN(wt,RSB_CONST_MAX_SUPPORTED_CORES);
958 #endif
959 return wt;
960 }
961
rsb__get_lastlevel_c_size_per_thread(void)962 long rsb__get_lastlevel_c_size_per_thread(void)
963 {
964 /*!
965 * \ingroup gr_internals
966 *
967 * \return the last level data cache size divided by the number of active threads
968 * or zero if not known or not available.
969 * */
970 return (rsb__get_lastlevel_c_size()/rsb_want_executing_threads());
971 // return rsb__get_lastlevel_c_size()/sqrt(rsb_want_executing_threads());
972 }
973
rsb__get_cache_levels_num(void)974 rsb_int_t rsb__get_cache_levels_num(void)
975 {
976 /*!
977 \ingroup internals
978 \return the count of cache levels if >0, -1 if unknown, 0 if no caches
979 */
980 long cs,l=1;
981
982 for(l=1;l<RSB_MAX_SUPPORTED_CACHE_LEVELS;++l)
983 {
984 cs = rsb__get_lnc_size(l);
985 if(!cs){--l;break;}
986 }
987 return l;
988 }
989
rsb_getopt_long(int argc,char * const argv[],const char * optstring,const rsb_option * longopts,int * longindex)990 int rsb_getopt_long(
991 int argc, char * const argv[], const char *optstring,
992 const rsb_option *longopts, int *longindex)
993 {
994 /*!
995 \ingroup internals
996
997 A compatibility wrapper.
998 */
999 #ifdef RSB_HAVE_GETOPT_LONG
1000 return getopt_long(argc,argv,optstring,longopts,longindex);
1001 #else /* RSB_HAVE_GETOPT_LONG */
1002 return getopt(argc,argv,optstring); /* a remedy */
1003 #endif /* RSB_HAVE_GETOPT_LONG */
1004 }
1005
rsb__sys_init(void)1006 rsb_err_t rsb__sys_init(void)
1007 {
1008 /*!
1009 \ingroup internals
1010
1011 should check some system related init stuff,
1012 to prevent nasty errors during execution.
1013 */
1014 rsb_err_t errval = RSB_ERR_NO_ERROR;
1015
1016 if(sizeof(rsb_err_t)<4)
1017 RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1018
1019 if(sizeof(rsb_flags_t)<4)
1020 RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1021
1022 /*! \todo : we could check for 'base' overflow cases to define
1023 some limit cases..
1024 */
1025 if(sizeof(rsb_coo_idx_t)>sizeof(rsb_nnz_idx_t))
1026 RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1027
1028 if(sizeof(rsb_blk_idx_t)>sizeof(rsb_coo_idx_t))
1029 RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1030
1031 if(rsb__get_l1c_size()<=0)
1032 rsb_global_session_handle.min_leaf_matrix_bytes = RSB_EXPERIMENTAL_MIN_LEAF_ELEMENTS*sizeof(double);
1033 else
1034 rsb_global_session_handle.min_leaf_matrix_bytes = rsb__get_l1c_size();
1035
1036 #ifdef CHAR_BIT /* limits.h */
1037 if( RSB_CHAR_BIT != CHAR_BIT )
1038 RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1039 #endif /* CHAR_BIT */
1040
1041 if(rsb__get_lastlevel_c_size()<0)
1042 rsb_global_session_handle.avg_leaf_matrix_bytes=4*RSB_EXPERIMENTAL_MIN_LEAF_ELEMENTS*sizeof(double);
1043 else
1044 rsb_global_session_handle.avg_leaf_matrix_bytes = rsb__get_lastlevel_c_size()*2;
1045 #if RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION
1046 {
1047 rsb_time_t tseed = rsb_time();
1048 unsigned int uiseed=(*(unsigned int*)(&tseed));
1049 RSB_WARN("#Starting library with enabled malloc fault injection.\n# Initializing with random seed of value: %u\n",uiseed);
1050 /* In this way, the user may introduce faults by recompiling the code. TODO: rsb_lib_init() based seed passing. */
1051 srand(uiseed);
1052 }
1053 #endif /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
1054
1055 RSB_DO_ERR_RETURN(errval)
1056 }
1057
rsb__memcpy(void * RSB_RESTRICT dest,const void * RSB_RESTRICT src,size_t n)1058 void *rsb__memcpy(void *RSB_RESTRICT dest, const void *RSB_RESTRICT src, size_t n)
1059 {
1060 /*!
1061 \ingroup gr_internals
1062
1063 Say you want to use a custom memcpy function
1064 or perform some statistics measurement.
1065
1066 Well, this is the place to hack.
1067 TODO: use the 'restrict' keyword.
1068 */
1069 #if 0
1070 {
1071 register unsigned char*dp=NULL;
1072 register const unsigned char*sp=NULL;
1073 for(dp=dest,sp=src;RSB_LIKELY(dp<dest+n);++dp,++sp)
1074 *dp=*sp;
1075 }
1076 #else
1077 return memcpy(dest,src,n);
1078 #endif
1079 }
1080
rsb__sys_free_system_memory(void)1081 size_t rsb__sys_free_system_memory(void)
1082 {
1083 /*!
1084 \ingroup gr_internals
1085
1086 System free memory, or 0.
1087 */
1088 size_t free_mem=0;
1089 long int pagesize =0;
1090 long int mem_pages=0;
1091
1092 #ifdef RSB_HAVE_SYSCONF
1093 #if defined(PAGESIZE)
1094 pagesize=sysconf(PAGESIZE);
1095 #elif defined(_SC_PAGESIZE)
1096 pagesize=sysconf(_SC_PAGESIZE);
1097 #elif defined(PAGE_SIZE)
1098 pagesize=sysconf(PAGE_SIZE);
1099 #else /* PAGESIZE */
1100 #endif /* PAGESIZE */
1101 #if defined(_SC_AVPHYS_PAGES)
1102 mem_pages=sysconf(_SC_AVPHYS_PAGES);
1103 #endif /* _SC_AVPHYS_PAGES */
1104 #endif /* RSB_HAVE_SYSCONF */
1105 if(pagesize<1 || mem_pages<1)
1106 free_mem=0;
1107 else
1108 free_mem=((size_t)pagesize)*((size_t)mem_pages);
1109 return free_mem;
1110 }
1111
rsb__sys_total_system_memory(void)1112 size_t rsb__sys_total_system_memory(void)
1113 {
1114 /*!
1115 \ingroup gr_internals
1116 */
1117 size_t tot_mem=0;
1118 long int pagesize =0;
1119 long int mem_pages=0;
1120
1121 #ifdef RSB_HAVE_SYSCONF
1122 #if defined(PAGESIZE)
1123 pagesize=sysconf(PAGESIZE);
1124 #elif defined(_SC_PAGESIZE)
1125 pagesize=sysconf(_SC_PAGESIZE);
1126 #elif defined(PAGE_SIZE)
1127 pagesize=sysconf(PAGE_SIZE);
1128 #else /* PAGE_SIZE */
1129 #endif /* PAGE_SIZE */
1130 #if defined(_SC_PHYS_PAGES)
1131 mem_pages = sysconf(_SC_PHYS_PAGES);
1132 #endif /* _SC_PHYS_PAGES */
1133 #endif /* RSB_HAVE_SYSCONF */
1134 if(pagesize<1 || mem_pages<1)
1135 tot_mem=0;
1136 else
1137 tot_mem=((size_t)pagesize)*((size_t)mem_pages);
1138 return tot_mem;
1139 }
1140
rsb_set_executing_threads(long tn)1141 static long rsb_set_executing_threads(long tn)
1142 {
1143 /*!
1144 FIXME: new
1145 \ingroup gr_internals
1146 */
1147 #if RSB_WANT_OMP_RECURSIVE_KERNELS
1148 /* multi threaded case */
1149 if(tn > RSB_CONST_MAX_SUPPORTED_CORES)
1150 {
1151 RSB_ERROR("cannot set %ld threads: a maximum of %ld is supported\n",tn,RSB_CONST_MAX_SUPPORTED_CORES);
1152 return RSB_CONST_MIN_SUPPORTED_CORES;
1153 }
1154 tn = RSB_MIN(tn,RSB_CONST_MAX_SUPPORTED_CORES);
1155 if(tn < RSB_CONST_MIN_SUPPORTED_CORES)
1156 {
1157 /* a value < 0 means the user wants the threads count to be set automatically */
1158 // return 1;
1159 tn = rsb_global_session_handle.rsb_g_threads;
1160 }
1161
1162 #if (RSB_TIME_SET_THREADS==1)
1163 rsb_time_t dt = -rsb_time();
1164 #endif
1165 #if (RSB_USE_OMP_SET_NUM_THREADS==1)
1166 omp_set_num_threads(tn);
1167 #endif
1168 #if (RSB_TIME_SET_THREADS==1)
1169 dt += rsb_time();
1170 RSB_STDOUT("setting threads (%d) took %lf s\n",tn,dt);
1171 #endif
1172 /* FIXME : 20101111 on my GNU box, the following does not return tn, but seems to have effect. Weird */
1173 //tn = omp_get_num_threads();
1174 rsb_global_session_handle.rsb_want_threads = tn;/* FIXME : a hack */
1175 return tn;
1176 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1177 /* single threaded case */
1178 return RSB_CONST_MIN_SUPPORTED_CORES;
1179 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1180 }
1181
rsb__lock_as_memory_resident(rsb_bool_t dolock)1182 rsb_err_t rsb__lock_as_memory_resident(rsb_bool_t dolock)
1183 {
1184 /* FIXME: need mechanisms to get/set/restore this setting */
1185 rsb_err_t errval = RSB_ERR_NO_ERROR;
1186 #if RSB_HAVE_MLOCKALL
1187 int retval = 0;
1188 /* "mlockall() locks all pages mapped into the address space of the calling process. "*/
1189 if(dolock)
1190 {
1191 retval = mlockall(MCL_FUTURE|MCL_CURRENT);
1192 }
1193 else
1194 {
1195 retval = munlockall();
1196 }
1197 if(retval)
1198 {
1199 errval = RSB_ERR_INTERNAL_ERROR;
1200 /* FIXME: shall introduce RSB_ERR_SYSCALL_ERROR */
1201 RSB_ERROR(RSB_ERRM_FCOVMU);
1202 }
1203 #else /* RSB_HAVE_MLOCKALL */
1204 RSB_ERROR(RSB_ERRM_COVMUINS);
1205 errval = RSB_ERR_UNSUPPORTED_FEATURE;
1206 #endif /* RSB_HAVE_MLOCKALL */
1207 return errval; /* what about retval ? */
1208 }
1209
rsb__fileno(FILE * stream)1210 int rsb__fileno(FILE *stream)
1211 {
1212 #ifndef RSB_HAVE_FILENO
1213 return -1;
1214 #else /* RSB_HAVE_FILENO */
1215 return fileno(stream);
1216 #endif /* RSB_HAVE_FILENO */
1217 }
1218
rsb__set_num_threads(rsb_int_t tn)1219 rsb_int_t rsb__set_num_threads(rsb_int_t tn)
1220 {
1221 /*!
1222 \ingroup rsb_doc_library rsb_doc_rsb
1223
1224 Gets and/or sets number of librsb running threads.
1225 If \a tn is RSB_THREADS_AUTO, sets threads count to a default value.
1226 If \a tn is RSB_THREADS_GET, only returns the count of active threads.
1227 If \a tn is RSB_THREADS_GET_MAX, returns max count of supported threads.
1228 \todo: shall promote this as a rsb.h function and make all similar ones static, and put them in thread.c !
1229 \return: number of running threads.
1230 */
1231
1232 long rtn=0;
1233
1234 switch(tn)
1235 {
1236 case(RSB_THREADS_GET):
1237 rtn = rsb_want_executing_threads();
1238 break;
1239 case(RSB_THREADS_AUTO):
1240 tn=0;
1241 break;
1242 case(RSB_THREADS_GET_MAX_SYS):
1243 #if RSB_WANT_OMP_RECURSIVE_KERNELS
1244 /* rtn = omp_get_max_threads(); */
1245 /* rtn = omp_get_thread_limit(); */
1246 rtn = rsb_global_session_handle.rsb_g_threads;
1247 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1248 rtn = 1;
1249 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1250 break;
1251 case(RSB_THREADS_GET_MAX_LIB):
1252 rtn = RSB_CONST_MAX_SUPPORTED_CORES;
1253 break;
1254 case(RSB_THREADS_GET_MAX):
1255 #if RSB_WANT_OMP_RECURSIVE_KERNELS
1256 rtn = omp_get_max_threads();
1257 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1258 rtn = RSB_MIN(rtn,RSB_CONST_MAX_SUPPORTED_CORES);
1259 break;
1260 default:
1261 rtn = rsb_set_executing_threads(tn);
1262 }
1263 RSB_DEBUG_ASSERT(rtn>0);
1264 return (rsb_int_t) rtn;
1265 }
1266
1267 #if 0
1268 #include <execinfo.h>
1269 void rsb_print_trace (void)
1270 {
1271 /* according to a glibc docs example */
1272 void *array[10];
1273 size_t size;
1274 char **strings;
1275 size_t i;
1276 size = backtrace (array, 10);
1277 strings = backtrace_symbols (array, size);
1278 printf ("Obtained %zd stack frames.\n", size);
1279 for (i = 0; i < size; i++)
1280 printf ("%s\n", strings[i]);
1281 free (strings);
1282 }
1283 #endif
1284
1285 #if RSB_USE_RUSAGE
rsb_rstv(struct timeval * tvp)1286 static rsb_time_t rsb_rstv(struct timeval*tvp)
1287 {
1288 register double t = 0.0;
1289 t = (double)(tvp->tv_sec) + ((double)(tvp->tv_usec))*1.e-6;
1290 return t;
1291 }
1292
1293 #define RSB_K 1024
rsb__getrusage(void)1294 rsb_err_t rsb__getrusage(void)
1295 {
1296 /*
1297 * Shall work independently from rsb_lib_init/rsb_lib_exit.
1298 * */
1299 struct rusage usage;
1300 int gru = getrusage(RUSAGE_SELF,&usage);
1301
1302 RSB_STDOUT("getrusage() stats:\n");
1303 /*("ru_ixrss : %ld (integral shared memory size)\n",usage.ru_ixrss);*/
1304 RSB_STDOUT("ru_maxrss: %ld (maximum resident set size -- MB)\n",usage.ru_maxrss / RSB_K);
1305 RSB_STDOUT("ru_stime : %0.4lgs (system CPU time used)\n",rsb_rstv(&usage.ru_stime));
1306 RSB_STDOUT("ru_utime : %0.4lgs (user CPU time used)\n",rsb_rstv(&usage.ru_utime));
1307 #if 0
1308 RSB_STDOUT("ru_utime : %0.4lg (user page faults (hard page faults))\n",rsb_rstv(&usage.ru_majflt));
1309 RSB_STDOUT("ru_utime : %0.4lg (page reclaims (soft page faults))\n",rsb_rstv(&usage.ru_minflt));
1310 #endif
1311
1312 return gru == 0 ? RSB_ERR_NO_ERROR : RSB_ERR_GENERIC_ERROR;
1313 }
1314 #else /* RSB_USE_RUSAGE */
rsb__getrusage(void)1315 rsb_err_t rsb__getrusage(void)
1316 {
1317 return RSB_ERR_NO_ERROR;
1318 }
1319 #endif /* RSB_USE_RUSAGE */
1320
rsb__getenv(const rsb_char_t * name)1321 const rsb_char_t * rsb__getenv(const rsb_char_t * name)
1322 {
1323 const rsb_char_t * evv = NULL;
1324
1325 #ifdef RSB_HAVE_GETENV
1326 evv = getenv(name);
1327 #endif /* RSB_HAVE_GETENV */
1328
1329 return evv;
1330 }
1331
rsb__getenv_nnr(const rsb_char_t * name)1332 const rsb_char_t * rsb__getenv_nnr(const rsb_char_t * name)
1333 {
1334 RSB_DEBUG_ASSERT( name != NULL );
1335 return rsb__getenv(name) ? rsb__getenv(name) : name + strlen(name);
1336 }
1337
1338 #if RSB_WITH_HWLOC
1339 #if HWLOC_API_VERSION >= 0x00020000
rsb__hwloc_max_cs_at_level(hwloc_topology_t topology,hwloc_obj_t obj,unsigned int depth,unsigned int cd)1340 static hwloc_uint64_t rsb__hwloc_max_cs_at_level(hwloc_topology_t topology, hwloc_obj_t obj, unsigned int depth, unsigned int cd)
1341 {
1342 /* for rsb__get_lnc_size_hwloc */
1343 unsigned int i, cl;
1344 hwloc_uint64_t cs=0;
1345
1346 switch (obj->type)
1347 {
1348 case(HWLOC_OBJ_L1CACHE): cl=1; break;
1349 case(HWLOC_OBJ_L2CACHE): cl=2; break;
1350 case(HWLOC_OBJ_L3CACHE): cl=3; break;
1351 case(HWLOC_OBJ_L4CACHE): cl=4; break;
1352 case(HWLOC_OBJ_L5CACHE): cl=5; break;
1353 default: cl=0;
1354 }
1355 if(cl && cl==cd)
1356 cs = obj->attr->cache.size;
1357
1358 for (i = 0; i < obj->arity; i++)
1359 {
1360 hwloc_uint64_t ccs = rsb__hwloc_max_cs_at_level(topology, obj->children[i], depth + 1, cd);
1361 cs = ccs > cs ? ccs : cs;
1362 }
1363 return cs;
1364 }
1365 #endif /* HWLOC_API_VERSION >= 0x00020000 */
1366 #endif /* RSB_WITH_HWLOC */
1367
rsb__get_lnc_size_hwloc(int n)1368 long rsb__get_lnc_size_hwloc(int n)
1369 {
1370 /* Gets cache size using hwloc.h. EXPERIMENTAL */
1371 long size = 0;
1372 #if RSB_WITH_HWLOC
1373 hwloc_uint64_t cs;
1374 hwloc_topology_t topology;
1375 hwloc_obj_t obj;
1376 hwloc_topology_init(&topology);
1377 hwloc_topology_load(topology);
1378 #if HWLOC_API_VERSION >= 0x00020000
1379 cs = rsb__hwloc_max_cs_at_level(topology, hwloc_get_root_obj(topology), 0, n);
1380 size = (long) cs;
1381 #else /* HWLOC_API_VERSION */
1382 int levels = 0;
1383 for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); obj; obj = obj->parent)
1384 #if HWLOC_API_VERSION >= 0x20000
1385 if (obj->type >= HWLOC_OBJ_L1CACHE && obj->type <= HWLOC_OBJ_L5CACHE)
1386 #else
1387 if (obj->type == HWLOC_OBJ_CACHE)
1388 #endif
1389 if(++levels == n)
1390 size = obj->attr->cache.size;
1391 #endif /* HWLOC_API_VERSION */
1392 hwloc_topology_destroy(topology);
1393 #endif /* RSB_WITH_HWLOC */
1394 return size;
1395 }
1396
1397 /* @endcond */
1398