1 /*
2 
3 Copyright (C) 2008-2021 Michele Martone
4 
5 This file is part of librsb.
6 
7 librsb is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11 
12 librsb is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16 
17 You should have received a copy of the GNU Lesser General Public
18 License along with librsb; see the file COPYING.
19 If not, see <http://www.gnu.org/licenses/>.
20 
21 */
22 /* @cond INNERDOC  */
23 /*!
24  * @file
25  * @author Michele Martone
26  * @brief System, or standard library related functions.
27  * */
28 
29 #include <unistd.h>	/* sysconf */
30 #include "rsb_internals.h"
31 #include "rsb.h"
32 #ifdef RSB_HAVE_LIMITS_H
33 #include <limits.h>	/* CHAR_BIT */
34 #endif /* RSB_HAVE_LIMITS_H */
35 #include <assert.h>	/* assert */
36 #ifdef RSB_HAVE_STDLIB_H
37 #include <stdlib.h>	/* posix_memalign */
38 #endif /* RSB_HAVE_STDLIB_H */
39 #ifdef RSB_HAVE_MALLOC_H
40 #include <malloc.h>	/* posix_memalign */
41 #endif /* RSB_HAVE_MALLOC_H */
42 #ifdef RSB_HAVE_SYS_SYSTEMCFG_H
43 #include <sys/systemcfg.h>	/* for _H_SYSTEMCFG */
44 #endif /* RSB_HAVE_SYS_SYSTEMCFG_H  */
45 #ifdef RSB_HAVE_SYS_MMAN_H
46 #if RSB_HAVE_SYS_MMAN_H
47 #include <sys/mman.h>	/* for mlockall */
48 #endif /* RSB_HAVE_SYS_MMAN_H */
49 #endif /* RSB_HAVE_SYS_MMAN_H */
50 #if defined(RSB_HAVE_DMALLOC_H) && defined(RSB_WANT_DMALLOC) && (RSB_WANT_DMALLOC!=0)
51 #include <dmalloc.h>	/* a debug library */
52 #endif /* defined(RSB_HAVE_DMALLOC_H) && defined(RSB_WANT_DMALLOC) && (RSB_WANT_DMALLOC!=0) */
53 #if defined(RSB_HAVE_DUMA_H) && defined(RSB_WANT_DUMA) && (RSB_WANT_DUMA!=0)
54 #include <duma.h>	/* a debug library */
55 #endif /* defined(RSB_HAVE_DUMA_H) && defined(RSB_WANT_DUMA) && (RSB_WANT_DUMA!=0) */
56 /* #include <stdio.h> */	/* fileno */
57 #if RSB_WITH_HWLOC
58 #include <hwloc.h>
59 #endif	/* RSB_WITH_HWLOC */
60 
61 /* set the following to 0 to get some real fun */
62 #define RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION 0
63 #if RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION
64 /* FIXME: need a true random number generator interface */
65 #define RSB_SHOULD_RANDOMLY_FAIL (rsb__rand_coo_index(1349)==42)
66 #else /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
67 #define RSB_SHOULD_RANDOMLY_FAIL 0
68 #endif /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
69 
70 #define RSB_SUSPECT_ALLOCATION_SIZE 1024*1024*16 /* i.e. of notable size. */
71 #define RSB_MEM_DEBUG 0 /* verbosity */
72 #define RSB_MEM_DEBUG_REALLOC 0 /* verbosity */
73 #define RSB_CHEAP_DEBUG 0 /* cheap extra checks on suspect memory wrapper related values */
74 #define RSB_DEBUG_MARKER_AFTER_FREE 0 /* double free protection */
75 #define RSB_DEBUG_SHRED_AFTER_FREE 0 /* protection against of re-use of freed areas */
76 #define RSB_SHRED_BYTE /* 0xFF */ 0xF0 /* byte value to use when shredding memory */
77 #define RSB_SHRED_WORD ( RSB_SHRED_BYTE | ( RSB_SHRED_BYTE<<8 ) | ( RSB_SHRED_BYTE<<16 ) | ( RSB_SHRED_BYTE<<24 ) )
78 #define RSB_FREE_MARKER 0xDEADBEEF /* ( 3735928559) */ /* a marker for detecting double free */
79 #define RSB_OVW_MARKER  0xBEEFBABE /* (-1091585346) */ /* a marker for detecting accidental overwrites */
80 #define RSB_MW_ODMO ( 0) /* memory wrapper overwrite detection marker offset (set 0 to deactivate, set to a non-*MW* overlapping value to activate) */
81 #define RSB_MW_SHMO (-1) /* memory wrapper shift marker offset */
82 #define RSB_MW_SZMO (-2) /* memory wrapper size  marker offset */
83 #define RSB_MW_ESLC (-RSB_MIN(RSB_MIN(RSB_MW_ODMO,RSB_MW_ODMO),RSB_MIN(RSB_MW_SZMO,RSB_MW_SHMO))) /* memory wrapper extra 'sizeof' locations count */
84 
85 #define RSB_MD_ASSERT RSB_ASSERT  /* memory debug assert macro */
86 
87 #if RSB_DEBUG_SHRED_AFTER_FREE
88 #include <string.h>	/* memset */
89 #endif
90 
91 #ifdef RSB_HAVE_SYS_RESOURCE_H
92 #define RSB_USE_RUSAGE 1
93 #else
94 #define RSB_USE_RUSAGE 0
95 #endif
96 
97 #if RSB_USE_RUSAGE
98 #include <sys/resource.h>	/* getrusage */
99 #endif
100 
101 RSB_INTERNALS_COMMON_HEAD_DECLS
102 
103 #if RSB_WANT_ALLOCATOR_LIMITS
104 #define RSB_ALLOC_MEMAAA_LIMIT rsb_global_session_handle.memory_count_max
105 #define RSB_ALLOC_MEMAAC_LIMIT rsb_global_session_handle.allocations_count_max
106 #define RSB_ALLOC_LIMITS_TRESPASSED(AAA,AAC) RSB_UNLIKELY( ( ( RSB_ALLOC_MEMAAA_LIMIT > 0 && rsb_global_session_handle.allocated_memory+(AAA) >= RSB_ALLOC_MEMAAA_LIMIT ) || ( RSB_ALLOC_MEMAAC_LIMIT > 0 && rsb_global_session_handle.allocations_count+(AAC) > RSB_ALLOC_MEMAAC_LIMIT ) ) ? 1 : 0 )
107 #else /* RSB_WANT_ALLOCATOR_LIMITS */
108 #define RSB_ALLOC_LIMITS_TRESPASSED(AAA,AAC) 0
109 #endif /* RSB_WANT_ALLOCATOR_LIMITS */
110 #define RSB_ZERO_BYTE_ALLOC_CHECK	0
111 
112 #define RSB_TIME_SET_THREADS 0
113 
rsb__g_rsb_memory_counter_init(void)114 void rsb__g_rsb_memory_counter_init(void)
115 {
116 	/*!
117 	 * \ingroup gr_internals
118 	 *
119 	 * Memory counter reset.
120 	 */
121 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
122 	rsb_global_session_handle.allocated_memory=0;
123 	rsb_global_session_handle.allocations_count=0;
124 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
125 }
126 
rsb__get_g_rsb_memory_count(void)127 size_t rsb__get_g_rsb_memory_count(void)
128 {
129 	/*!
130 	 * \ingroup gr_internals
131 	 *
132 	 * A mere accessor function.
133 	 */
134 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
135 	return rsb_global_session_handle.allocated_memory;
136 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
137 	return 0;
138 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
139 }
140 
rsb__get_g_rsb_allocations_count(void)141 size_t rsb__get_g_rsb_allocations_count(void)
142 {
143 	/*!
144 	 * \ingroup gr_internals
145 	 *
146 	 * A mere accessor function.
147 	 */
148 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
149 	return rsb_global_session_handle.allocations_count;
150 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
151 	return 0;
152 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
153 }
154 
155 #if 1
156 
rsb_aligned_free(void * p)157 static void * rsb_aligned_free(void *p)
158 {
159 	/*!
160 	 * \ingroup gr_internals
161 	 *
162 	 * \param p a generic pointer allocated with rsb__aligned_malloc
163 	 * \return the input pointer, in case of correct operation, NULL in case of error
164 	 *
165 	 * frees a memory area previously allocated with rsb__aligned_malloc,
166 	 * and returns the pointer in case of successfull free,
167 	 * or NULL in case of suspect error.
168 	 * */
169 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
170 	size_t size;
171 	size_t shift;
172 	if( p == NULL )
173 		return p;
174 	#if RSB_DEBUG_SHRED_AFTER_FREE
175 	if( (((rsb_int_t)p) & RSB_SHRED_WORD ) == RSB_SHRED_WORD ) /* shred-area read pointer detection */
176 	{
177 		RSB_STDERR("Warning: it is likely that pointer %p is invalid and was read from a previously freed area. Expect a crash now.\n",p);
178 		RSB_MD_ASSERT(0);
179 	}
180 	#endif /* RSB_DEBUG_SHRED_AFTER_FREE */
181 	size  = ((size_t*)p)[RSB_MW_SZMO];
182 	#if RSB_MEM_DEBUG
183 	RSB_STDERR("freeing   %zu bytes at %p (in hex:0x%0zx bytes)\n",size,p,size);
184 	#endif
185 	shift = ((size_t*)p)[RSB_MW_SHMO];
186 	#if RSB_DEBUG_MARKER_AFTER_FREE
187 	if(size == RSB_FREE_MARKER || shift == RSB_FREE_MARKER)
188 	{
189 		RSB_STDERR("Warning: it is almost certain that memory at %p has been already deallocated! Expect a crash now.\n",p);
190 		RSB_MD_ASSERT(0);
191 	}
192 	((size_t*)p)[RSB_MW_SZMO] = RSB_FREE_MARKER;
193 	((size_t*)p)[RSB_MW_SHMO] = RSB_FREE_MARKER;
194 	#endif /* RSB_DEBUG_MARKER_AFTER_FREE */
195 	if( RSB_MW_ODMO && ((size_t*)p)[RSB_MW_ODMO] != RSB_OVW_MARKER )
196 	{
197 		RSB_STDERR("Warning: memory at %p has been overwritten (marker value is %x instead of 0x%0zx) ! Expect crashes.\n",p,((size_t*)p)[RSB_MW_ODMO],RSB_OVW_MARKER );
198 		RSB_MD_ASSERT(0);
199 	}
200 	#if RSB_DEBUG_SHRED_AFTER_FREE
201 	if( ( size >= sizeof(rsb_int_t) ) && ( *(rsb_int_t*)p == RSB_SHRED_WORD ) ) /* shredded area re-free detection */
202 	{
203 		RSB_STDERR("Warning: it is possible that %zd-byte area at %p was recently freed (points to {0x%x, ...). May expect a crash now.\n",size,p,*(rsb_int_t*)p);
204 		/* RSB_MD_ASSERT(0); */
205 	}
206 	memset(p, RSB_SHRED_BYTE, size);
207 	#endif /* RSB_DEBUG_SHRED_AFTER_FREE */
208 	p = (( char *)p)-(shift);
209 	p = ((size_t*)p)-RSB_MW_ESLC;	/* we make room for the markers */
210 #pragma omp atomic
211 	rsb_global_session_handle.allocated_memory -= size;
212 #pragma omp atomic
213 	rsb_global_session_handle.allocations_count--;
214 	free(p);
215 	return p;
216 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
217 	free(p);
218 	return p;
219 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
220 }
221 
rsb__realloc(void * rsb_data,size_t size)222 void * rsb__realloc(void *rsb_data, size_t size)
223 {
224 #ifdef RSB_WANT_DOUBLE_ALIGNED
225 	return rsb__do_realloc(rsb_data,size,sizeof(double)*2);
226 #else /* RSB_WANT_DOUBLE_ALIGNED */
227 	return rsb__do_realloc(rsb_data,size,1);
228 #endif /* RSB_WANT_DOUBLE_ALIGNED */
229 }
230 
rsb__do_realloc(void * rsb_data,size_t size,size_t alignment)231 void * rsb__do_realloc(void *rsb_data, size_t size, size_t alignment)
232 {
233 	void * p = rsb_data;
234 	size_t extra = 0;
235 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
236 	size_t osize;
237 	size_t shift;
238 	extra = sizeof(size_t)*RSB_MW_ESLC+alignment;
239 
240 	if(p==NULL)
241 		return p;
242 	osize  = ((size_t*)p)[RSB_MW_SZMO];
243 	#if ( RSB_MEM_DEBUG || RSB_MEM_DEBUG_REALLOC )
244 	RSB_STDERR("reallocating from %zu to %zu bytes (%+zd) at 0x%p (in hex: to 0x%0zx bytes)\n",osize,size,(size-osize),p,size);
245 	#endif
246 	shift = ((size_t*)p)[RSB_MW_SHMO];
247 	p = (( char *)p)-(shift);
248 	p = ((size_t*)p)-RSB_MW_ESLC;	/* we make room for the markers */
249 	/* no free was performed, since extra>0 */
250 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
251 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
252 	if(size==0) /* a free shall be performed */
253 		extra=0;
254 
255 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
256 	if(size==0 && p) /* a free shall be performed */
257 		rsb_global_session_handle.allocations_count--;
258 
259 	if(size>osize)
260 #pragma omp atomic
261 		rsb_global_session_handle.allocated_memory+=size-osize;
262 	else
263 	if(p) /* a free shall be performed */
264 #pragma omp atomic
265 		rsb_global_session_handle.allocated_memory-=osize-size;
266 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
267 	p = realloc(p,size+extra);/* if freeing, either p or NULL will be returned */
268 
269 	if(!p)
270 		return p;/* failure */
271 
272 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
273 	/*!
274 	 * \ingroup gr_internals
275 	 * TODO : no way to make survive the alignment ?
276 	 * */
277 #if 1
278 	/* restoring back allocation info (and potentially losing alignment, because we have to keep the same shift!) */
279 	p = ((size_t*)p)+RSB_MW_ESLC;	/* we make room for markers */
280 	p = (( char *)p)+(shift);
281 	/* to restore alignment, should perform a memmove */
282 	((size_t*)p)[RSB_MW_SHMO] = shift;
283 	((size_t*)p)[RSB_MW_SZMO] = size;
284 	if( RSB_MW_ODMO ) ((size_t*)p)[RSB_MW_ODMO] = RSB_OVW_MARKER;
285 #else
286 	/* bugful way */
287 
288 	{
289 		size_t off;
290 		off=((size_t)(((size_t*)p)+2))%(alignment); /* to the return address from ((size_t*)p)+2 */
291 		shift = (alignment-off);
292 		p = ((size_t*)p)+2;	/* we make room for two markers */
293 		p = (( char *)p)+(shift);
294 		((size_t*)p)[RSB_MW_SHMO] = shift;
295 		((size_t*)p)[RSB_MW_SZMO] = size;
296 		if( RSB_MW_ODMO ) ((size_t*)p)[RSB_MW_ODMO] = RSB_OVW_MARKER;
297 	}
298 #endif
299 	return p;
300 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
301 	return p; /* success */
302 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
303 	return NULL;
304 }
305 
rsb__aligned_malloc(size_t size,size_t alignment)306 void * rsb__aligned_malloc(size_t size, size_t alignment)
307 {
308 	/*!
309 	 * \ingroup gr_internals
310 	 *
311 	 * allocates size bytes and an integer, in a way to keep track of the allocated chunk size
312 	 * \param size is the amount of needed bytes to allocate
313 	 *
314 	 * the returned area will be alignment bytes aligned.
315 	 * this area should be deallocated with rsb_aligned_free.
316 	 *
317 	 * note that although the address will be aligned as asked,
318 	 * there will be 100% alignment and contiguity guarantee only
319 	 * on machines with no virtual memory (on linux there seems not to be a user space contiguos
320 	 * memory allocator like kernel's vmalloc).
321 	 *
322 	 * in case of lack of all of RSB_DISABLE_ALLOCATOR_WRAPPER, RSB_HAVE_POSIX_MEMALIGN and RSB_HAVE_MEMALIGN, malloc() will be used.
323 	 * */
324 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
325 	void * p;
326 	size_t extra = sizeof(size_t)*RSB_MW_ESLC+alignment;
327 	size_t off;
328 	size_t shift;
329 
330 	if(RSB_ALLOC_LIMITS_TRESPASSED(size+extra,1))
331 	{
332 		rsb__print_memory_allocation_info();
333 		return NULL;
334 	}
335 
336 #if RSB_ZERO_BYTE_ALLOC_CHECK
337 	if(size == 0 && extra == 0)
338 	{
339 		RSB_ERROR(RSB_ERRM_ZSM);
340 	}
341 #endif
342 	p = malloc( size + extra );
343 	if(!p)
344 		return p;/* failure */
345 	RSB_DEBUG_ASSERT(rsb_global_session_handle.allocated_memory	<=(rsb_global_session_handle.allocated_memory+size  ));
346 	/* the following could trigger during very very long/big runs, so .. */
347 	RSB_DEBUG_ASSERT((rsb_global_session_handle.allocations_count	< (rsb_global_session_handle.allocations_count+1))
348 			/* ... this line should fix that cases */
349 			|| (rsb_global_session_handle.allocations_count+1)==0);
350 
351 #pragma omp atomic
352 	rsb_global_session_handle.allocated_memory+=size;
353 #pragma omp atomic
354 	rsb_global_session_handle.allocations_count++;
355 	off=((size_t)(((size_t*)p)+RSB_MW_ESLC))%(alignment); /* to the return address from ((size_t*)p)+RSB_MW_ESLC */
356 	shift = (alignment-off);
357 	p = ((size_t*)p)+RSB_MW_ESLC;	/* we make room for the markers */
358 	p = (( char *)p)+(shift);
359 	((size_t*)p)[RSB_MW_SHMO] = shift;
360 	((size_t*)p)[RSB_MW_SZMO] = size;
361 	if( RSB_MW_ODMO ) ((size_t*)p)[RSB_MW_ODMO] = RSB_OVW_MARKER;
362 	return p;
363 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
364 	#if RSB_HAVE_POSIX_MEMALIGN
365 	void * p = NULL;
366         size_t ca = sizeof(void*); /* corrected alignment */
367         while(ca<alignment)
368                 ca*=2;
369         alignment = ca; /* "The address  of  the  allocated  memory  will be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *)." */
370 	if(posix_memalign(&p, alignment, size))
371 		return p; /* failure */
372 	else
373 		return p; /* success */
374 	#elif RSB_HAVE_MEMALIGN
375 	return memalign( alignment, size);
376 	#else
377 	return malloc(size); /* no platform support for aligned alloc */
378 	#endif
379 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
380 }
381 
rsb__free(void * p)382 void * rsb__free(void *p)
383 {
384 	/*!
385 	 * \ingroup gr_internals
386 	 *
387 	 * see rsb_aligned_free
388 	 * */
389 	#if RSB_MEM_DEBUG
390 	RSB_STDERR("freeing %p\n",p);
391 	#endif
392 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
393 	return rsb_aligned_free(p);
394 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
395 	free(p);
396 	return p;
397 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
398 }
399 
rsb__malloc(size_t size)400 void * rsb__malloc(size_t size)
401 {
402 	/*!
403 	 * \ingroup gr_internals
404 	 *
405 	 * see rsb__aligned_malloc
406 	 * */
407 	void * data=NULL;
408 
409 #if RSB_ZERO_BYTE_ALLOC_CHECK
410 	if(size == 0)
411 	{
412 		RSB_ERROR(RSB_ERRM_ZSM);
413 	}
414 #endif
415 	if(size >= RSB_MAX_ALLOCATABLE_MEMORY_CHUNK)
416 	{
417 		#if RSB_MEM_DEBUG
418 		RSB_STDERR("cannot allocate %zu bytes since it is more than the maximum allowed %zu\n",size,RSB_MAX_ALLOCATABLE_MEMORY_CHUNK);
419 		#endif
420 		return data;
421 	}
422 #ifdef RSB_WANT_DOUBLE_ALIGNED
423 	data = rsb__aligned_malloc(size,sizeof(double)*2);
424 #else /* RSB_WANT_DOUBLE_ALIGNED */
425 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
426 	data = rsb__aligned_malloc(size,1);
427 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
428 	data = malloc(size);
429 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
430 #endif /* RSB_WANT_DOUBLE_ALIGNED */
431 #if RSB_DEBUG_SHRED_AFTER_FREE
432 	 /* prevent previously shredded and freed, and now re-allocated *data to cause false alarm if unused before next free */
433 	if(data && size>0)
434 		memset(data, ~RSB_SHRED_BYTE, size);
435 #endif /* RSB_DEBUG_SHRED_AFTER_FREE */
436 	#if RSB_MEM_DEBUG
437 	RSB_STDERR("allocated %zu bytes to %p (in hex:0x%0zx bytes)\n",size,data,size);
438 	#endif /* RSB_MEM_DEBUG */
439 
440 #if RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION
441 	if(data)if(RSB_SHOULD_RANDOMLY_FAIL){rsb__free(data);data=NULL;}
442 #endif /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
443 	return data;
444 }
445 
446 #else
447 /* BEGIN OF DEAD CODE */
rsb__free(void * rsb_data)448 void * rsb__free(void *rsb_data)
449 {
450 	/*!
451 	 * TODO: DELETE THIS DEAD CODE
452 	 *
453 	 * \param rsb_data a generic pointer allocated with rsb__malloc
454 	 * \return the input pointer, in case of correct operation, NULL in case of error
455 	 *
456 	 * deletes a memory area previously allocated with rsb__malloc,
457 	 * and returns the pointer in case of successfull free,
458 	 * or NULL in case of suspect error.
459 	 * */
460 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
461 	size_t size;
462 	if(!rsb_data)return rsb_data;
463 
464 #ifdef RSB_WANT_DOUBLE_ALIGNED
465 	/*!
466 	 * This is a trick : we take the offset written in the byte just
467 	 * behind the allocated area and use it to restore the original
468 	 * [m|c]alloc-ated area.
469 	 * */
470 	size_t off = ((unsigned rsb_byte_t*)rsb_data)[RSB_MW_SHMO];
471 	/* we decode back the whole allocated area size */
472 	size=((size_t*)((unsigned rsb_byte_t*)(rsb_data)-(0x10-off)))[RSB_MW_SHMO];
473 	/*
474 	RSB_STDERR("freeing address after offset %d ... \n",off);
475 	RSB_STDERR("freeing  %d bytes ... \n",size);
476 	*/
477 #else /* RSB_WANT_DOUBLE_ALIGNED */
478 	/* we decode back the whole allocated area size */
479 	size=*(((size_t*)(rsb_data))RSB_MW_SHMO);
480 #endif /* RSB_WANT_DOUBLE_ALIGNED */
481 
482 	#if RSB_CHEAP_DEBUG
483 	if( size < 0 || size > RSB_SUSPECT_ALLOCATION_SIZE )/* this is a BAD sign and a warning should be issued */
484 	{
485 		RSB_ERROR("WARNING : pointer x%08x[%d] contains (has been overwritten with ?) a suspect value : %08x==%d", rsb_data,RSB_MW_SHMO,size,size );
486 	gaa
487 		return NULL;
488 	}
489 	#endif /* RSB_CHEAP_DEBUG */
490 	/* We update the global memory bookkeeping counter */
491 #pragma omp atomic
492 	rsb_global_session_handle.allocated_memory-=size;
493 #pragma omp atomic
494 	rsb_global_session_handle.allocations_count--;
495 #ifdef RSB_WANT_DOUBLE_ALIGNED
496 	/* We use the offset to restore the original pointer to free. */
497 	free((  size_t*)((unsigned rsb_byte_t*)(rsb_data)-(0x10-off))RSB_MW_SHMO);
498 #else /* RSB_WANT_DOUBLE_ALIGNED */
499 	free((((size_t*)(rsb_data))RSB_MW_SHMO));
500 #endif /* RSB_WANT_DOUBLE_ALIGNED */
501 	return rsb_data;
502 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
503 	/* RSB_DISABLE_ALLOCATOR_WRAPPER undefined */
504 #ifdef RSB_WANT_DOUBLE_ALIGNED
505 	return free(rsb_data);
506 #else /* RSB_WANT_DOUBLE_ALIGNED */
507 	return free(rsb_data);
508 #endif /* RSB_WANT_DOUBLE_ALIGNED */
509 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
510 }
511 
rsb__malloc(size_t size)512 void * rsb__malloc(size_t size)
513 {
514 	/*!
515 	 * TODO: DELETE THIS DEAD CODE
516 	 *
517 	 * (c)allocates size bytes and an integer, in a way to keep track of the allocated chunk size
518 	 * \param size is the amount of needed bytes to allocate
519 	 *
520 	 * if RSB_WANT_DOUBLE_ALIGNED is defined, the returned area will be double (64 bits) aligned.
521 	 * this area should be deallocated with rsb__free.
522 	 * */
523 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
524 	void * p;
525 #ifdef RSB_HAVE_POSIX_MEMALIGN
526 	 /* we could integrate/replace with posix_memalign, memalign or continue using our custom code */
527 #endif /* RSB_HAVE_POSIX_MEMALIGN */
528 #ifdef RSB_WANT_DOUBLE_ALIGNED
529 	/*
530 	 * This is an explicit trick to give the user a double aligned memory area.
531 	 * To achieve this, we allocate one extra double element, and a byte (four, really, for congruency reasons)
532 	 * and write in one  there information on the shift amount.
533 	 *
534 	 * Of course; we count that sizeof(size_t)>sizeof(char).
535 	 * */
536 	size_t extra = sizeof(double)+sizeof(size_t)*2;
537 #else /* RSB_WANT_DOUBLE_ALIGNED */
538 	/*
539 	 * We allocate one size_t element for storing allocation information (for explicit memory leaking checking).
540 	 * */
541 	size_t extra = sizeof(size_t);
542 #endif /* RSB_WANT_DOUBLE_ALIGNED */
543 	if(RSB_ALLOC_LIMITS_TRESPASSED(size,1))
544 	{rsb__print_memory_allocation_info(); return NULL;}
545 	p = calloc( size + extra, 1 );
546 	if(!p)return p;
547 	*(size_t*)p=size;/* note : not size + extra */
548 #pragma omp atomic
549 	rsb_global_session_handle.allocated_memory+=size;
550 #pragma omp atomic
551 	rsb_global_session_handle.allocations_count++;
552 #ifdef RSB_WANT_DOUBLE_ALIGNED
553 	/*
554 	 * WARNING : We determine the current 64 bits p alignment, so we are interested in the last 5 bits,
555 	 * really.
556 	 * DANGER  : is this portable ?
557 	 * */
558 	size_t off=(((size_t)p)+sizeof(size_t))&0xF;	/* can be 0 ... F */
559 	/*
560 	RSB_STDERR("allocated totally %d bytes \n",size+extra);
561 	RSB_STDERR("allocated %d ... \n",size);
562 	RSB_STDERR("allocation offset %d ... \n",off);
563 	*/
564 	((unsigned rsb_byte_t*)(p))[sizeof(size_t)+((0x10-off)RSB_MW_SHMO)]=(unsigned char)off;/* will be used to compute back the base pointer allocated */
565 	return ((unsigned rsb_byte_t*)p)+(0x10-off)+sizeof(size_t);
566 #else /* RSB_WANT_DOUBLE_ALIGNED */
567 	return ((size_t*)p)+1;
568 #endif /* RSB_WANT_DOUBLE_ALIGNED */
569 #else /* RSB_DISABLE_ALLOCATOR_WRAPPER */
570 	/* RSB_DISABLE_ALLOCATOR_WRAPPER undefined */
571 #ifdef RSB_WANT_DOUBLE_ALIGNED
572 	return calloc(size,1);
573 #else /* RSB_WANT_DOUBLE_ALIGNED */
574 	return calloc(size,1);
575 #endif /* RSB_WANT_DOUBLE_ALIGNED */
576 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
577 }
578 /* END OF DEAD CODE */
579 #endif /* 1 */
580 
rsb_do_time(void)581 rsb_time_t rsb_do_time(void)
582 {
583 	/*!
584 	   \ingroup gr_internals
585 
586 	   Returns a current relative time in seconds.
587 	   The user should rely on this function only for time difference computations.
588 	 */
589 #if RSB_WANT_OMP_RECURSIVE_KERNELS
590 	/* return omp_get_wtime(); */ /* for future use */
591 #endif
592 	/* SVr4, 4.3BSD.  POSIX.1-2001 */
593 	/* ( could also use psb_wtime or mpi_wtime )*/
594 	/* FIXME : gettimeofday() gives pessimistic estimates ! */
595 	/* FIXME : gettimeofday() could be in time.h or sys/times.h */
596 	/* FIXME : timer sanity is of paramount inportance ! Should check for its sanity at startup! */
597 #if defined(RSB_HAVE_GETTIMEOFDAY)
598 	register double t = RSB_REAL_ZERO;
599 	struct timeval tv1;
600 	gettimeofday(&tv1, NULL);
601 	t  =  (double)(tv1.tv_sec) + ((double)(tv1.tv_usec))*1.e-6;
602 	return t;
603 #elif defined(RSB_HAVE_TIMES) && defined(RSB_HAVE_SYSCONF) && defined(_SC_CLK_TCK)
604 	/* POSIX.1 */
605 	struct tms buffer;
606 	times(&buffer);
607 	return ( (rsb_time_t) ((clock_t)buffer.tms_utime) ) / ( (rsb_time_t)sysconf(_SC_CLK_TCK) );
608 #else /* defined(RSB_HAVE_TIMES) && defined(RSB_HAVE_SYSCONF) && defined(_SC_CLK_TCK) */
609 #error("You should better find timing routine, dude.\n")
610 	return -1;/* this is bad */
611 #endif /* defined(RSB_HAVE_TIMES) && defined(RSB_HAVE_SYSCONF) && defined(_SC_CLK_TCK) */
612 }
613 
rsb__timer_sanity(void)614 rsb_time_t rsb__timer_sanity(void)
615 {
616 	/*!
617 		\ingroup gr_internals
618 
619 		Could the timer lead to negative time intervals ? (we found it can happen, sadly)
620 		\return the minimum interval length after a bunch of timer calls
621 
622 		TODO : could we do something about this ?
623 	*/
624 	rsb_time_t md,d;
625 	int i;
626 
627 	md = - rsb_time();
628 	md += rsb_time();
629 	for(i=0;i<RSB_TIMER_SANITY_TEST_TIMES;++i)
630 	{
631 		d = - rsb_time();
632 		d += rsb_time();
633 		md=d<md?d:md;
634 	}
635 	return md;
636 }
637 
rsb__timer_granularity(void)638 rsb_time_t rsb__timer_granularity(void)
639 {
640 	/*!
641 	   \ingroup gr_internals
642 
643 	 * Tries to estimate the granularity of the timing function (that is, its overhead) by measuring it.
644          * This value will be important when estimating minimal times for various self benchmarking operations.
645          *
646 	 * A test measuring the average and deviation of this parameter may be also useful.
647 	 * It does not guarantee return with a broken timing function.
648 	 * */
649 	register double t = RSB_TIME_ZERO, t0 = RSB_TIME_ZERO;
650 	int times = RSB_TIMER_GRANULARITY_TEST_TIMES;
651 	register int i = times;
652 
653 #if 0
654 	i = times;
655 	/* results in the following two code snippets differ. it should be due to numerical roundoff. */
656 	while(i--)
657 	{
658 		t -= rsb_time();
659 		/* no op, only call overhead */
660 		t += rsb_time();
661 	}
662 	return t/(times*2);
663 #else
664 	/* this is more accurate (in particular: slower) but could be optimized out without an accumulator cookie (FIXME) */
665 	t0 = rsb_time();
666 	--i;
667 	t = -t0;
668 
669 	while(i--)
670 	{
671 		/* no op, only call overhead */
672 		rsb_time();
673 		rsb_time();
674 	}
675 
676 	t += rsb_time();
677 
678 	t = t/(times*2);
679 
680 	if(t <= RSB_TIME_ZERO)
681 		goto so_fast;
682 	else
683 		goto ret;
684 so_fast: /* FIXME: No guarantee of return with a broken timing function. */
685 	while( ( t = rsb_time() ) <= t0)
686 		;
687 	t -= t0;
688 ret:
689 	return t;
690 #endif
691 }
692 
rsb__print_memory_allocation_info(void)693 rsb_err_t rsb__print_memory_allocation_info(void)
694 {
695 #ifndef RSB_DISABLE_ALLOCATOR_WRAPPER
696 /*!
697  \ingroup gr_internals
698 
699  * A global memory counter, used for debugging purposes.
700  * */
701 #if RSB_ALLOW_STDOUT
702 	RSB_STDOUT("rsb_global_session_handle.allocated_memory       \t:%zu\n",(rsb_printf_int_t)rsb_global_session_handle.allocated_memory);
703 	RSB_STDOUT("rsb_global_session_handle.allocations_count  \t:%zu\n",(rsb_printf_int_t)rsb_global_session_handle.allocations_count);
704 	return RSB_ERR_NO_ERROR;
705 #endif /* RSB_ALLOW_STDOUT */
706 #endif /* RSB_DISABLE_ALLOCATOR_WRAPPER */
707 	return RSB_ERR_UNSUPPORTED_FEATURE;
708 }
709 
rsb__calloc(size_t n)710 void * rsb__calloc(size_t n)
711 {
712 	/*!
713 	 * \ingroup gr_internals
714 	 * Allocates an amount of n bytes set to zero.
715 	 *
716 	 * \param n is the amount of bytes to allocates
717 	 * \return the newly allocated area.
718 	 *
719 	 * This memory area should be freed with rsb__free.
720 	 * */
721 	void * p = rsb__malloc(n);
722 	if(p)
723 		RSB_BZERO(p,n);
724 #if(!RSB_QUIET_MEM_ERRORS)
725 	/* TODO : message diagnostics should be optable out, or debug levels-based */
726         else
727 	{
728                 RSB_ERROR("cannot allocate %zu bytes!\n",n);
729 		rsb__print_memory_allocation_info();
730 	}
731 #endif /* (!RSB_QUIET_MEM_ERRORS) */
732 	/* should be ((int*)p)[RSB_MW_SHMO]==n */
733 	return p;
734 }
735 
rsb__calloc_parallel(size_t n)736 void * rsb__calloc_parallel(size_t n)
737 {
738 	/*!
739 	 * \ingroup gr_internals
740 	 */
741 	void *p = rsb__calloc(n);
742 
743 	if(p)
744 		RSB_BZERO_parallel(p,n);
745 	return p;
746 }
747 
rsb__error(const char * format,...)748 int rsb__error(const char * format, ...)
749 {
750 	/*!
751 	 * \ingroup gr_internals
752 	 *
753 	 * \param as the first printf argument.
754 	 * \return an error code or 0
755 	 *
756 	 * For now, a wrapper around printf.
757 	 * It will print given arguments on stdout.
758 	 * */
759 	va_list ap;
760 	int rc=0;
761 
762 	va_start(ap,format);
763 #ifndef RSB_QUIET
764 	rc = vprintf(format,ap);
765 #endif /* RSB_QUIET */
766 	va_end(ap);
767 	return rc;
768 }
769 
rsb__get_lnc_size(int n)770 long rsb__get_lnc_size(int n)
771 {
772 	/*!
773 	 * \ingroup gr_internals
774 	 *
775 	 * \returns the nth level data cache size if > 0, -1 on error, 0 if no such level cache
776 	 * Cache levels start from 1.
777 	 * */
778 	long cs=0;
779 
780 	if(rsb_global_session_handle.memory_hierarchy_levels>0)
781 		return rsb_global_session_handle.caches[n].size;
782 
783 	switch(n)
784 	{
785 		case 1:
786 		{
787 #ifdef RSB_HAVE_SYSCONF
788 #ifdef _SC_LEVEL1_DCACHE_SIZE
789 			cs=sysconf(_SC_LEVEL1_DCACHE_SIZE);
790 #endif /* _SC_LEVEL1_DCACHE_SIZE */
791 #endif /* RSB_HAVE_SYSCONF  */
792 #ifdef _H_SYSTEMCFG
793 			cs=_system_configuration.dcache_size;
794 #endif /* _H_SYSTEMCFG */
795 			if(cs == 0) cs = rsb__get_lnc_size_hwloc(n);
796 		}
797 		break;
798 		case 2:
799 		{
800 #ifdef RSB_HAVE_SYSCONF
801 #ifdef _SC_LEVEL2_CACHE_SIZE
802 			cs=sysconf(_SC_LEVEL2_CACHE_SIZE);
803 #endif /* _SC_LEVEL2_CACHE_SIZE */
804 #endif /* RSB_HAVE_SYSCONF */
805 #ifdef _H_SYSTEMCFG
806 			cs=_system_configuration.L2_cache_size;
807 #endif /* _H_SYSTEMCFG */
808 			if(cs == 0) cs = rsb__get_lnc_size_hwloc(n);
809 		}
810 		break;
811 		case 3:
812 		{
813 #ifdef RSB_HAVE_SYSCONF
814 #ifdef _SC_LEVEL3_CACHE_SIZE
815 			cs=sysconf(_SC_LEVEL3_CACHE_SIZE);
816 #endif /* _SC_LEVEL3_CACHE_SIZE */
817 #endif /* RSB_HAVE_SYSCONF */
818 #ifdef _H_SYSTEMCFG
819 	//		cs=_system_configuration.L3_cache_size; // Does not exist :(
820 #endif /* _H_SYSTEMCFG */
821 			if(cs == 0) cs = rsb__get_lnc_size_hwloc(n);
822 		}
823 		break;
824 		default :
825 		/* For now, we don't handle more cache levels */
826 		cs=-1;
827 	}
828 	cs=cs<0?0:cs;
829 	return cs;
830 }
831 
rsb__get_l1c_size(void)832 long rsb__get_l1c_size(void)
833 {
834 	/*!
835 	 * \ingroup gr_internals
836 	 *
837 	 * \return the first level data cache size.
838 	 * \note see rsb__get_lnc_size
839 	 * */
840 	return rsb__get_lnc_size(1);
841 }
842 
rsb__get_l2c_size(void)843 long rsb__get_l2c_size(void)
844 {
845 	/*!
846 	 * \ingroup gr_internals
847 	 *
848 	 * \return the second level data cache size.
849 	 * \note see rsb__get_lnc_size
850 	 * */
851 	return rsb__get_lnc_size(2);
852 }
853 
rsb__get_l3c_size(void)854 long rsb__get_l3c_size(void)
855 {
856 	/*!
857 	 * \ingroup gr_internals
858 	 *
859 	 * \return the third level data cache size.
860 	 * \note see rsb__get_lnc_size
861 	 * */
862 	return rsb__get_lnc_size(3);
863 }
864 
rsb__get_l4c_size(void)865 long rsb__get_l4c_size(void)
866 {
867 	/*!
868 	 * \ingroup gr_internals
869 	 *
870 	 * \return the fourth level data cache size.
871 	 * \note see rsb__get_lnc_size
872 	 * */
873 	return rsb__get_lnc_size(4);
874 }
875 
rsb__know_cache_sizes(void)876 long rsb__know_cache_sizes(void)
877 {
878 	/*!
879 	 * \ingroup gr_internals
880 	 *
881 	 * \return do we know the caches sizes ?
882 	 * */
883 	return rsb__get_cache_levels_num() > 0;
884 }
885 
rsb__get_first_level_c_size(void)886 long rsb__get_first_level_c_size(void)
887 {
888 	/*!
889 	 * \ingroup gr_internals
890 	 *
891 	 * \return the first level data cache size
892 	 * or zero if not known or not available.
893 	 * */
894 	rsb_int_t cln = rsb__get_cache_levels_num();
895 
896 	if(rsb_global_session_handle.memory_hierarchy_levels>0)
897 		return rsb_global_session_handle.caches[1].size;
898 
899 	if(cln>0)
900 		return rsb__get_lnc_size(1);
901 	else
902 		return 0;
903 }
904 
rsb__get_lastlevel_c_size(void)905 long rsb__get_lastlevel_c_size(void)
906 {
907 	/*!
908 	 * \ingroup gr_internals
909 	 *
910 	 * \return the last level data cache size
911 	 * or zero if not known or not available.
912 	 * */
913 	rsb_int_t cln = rsb__get_cache_levels_num();
914 
915 	if(rsb_global_session_handle.memory_hierarchy_levels>0)
916 		return rsb_global_session_handle.caches[rsb_global_session_handle.memory_hierarchy_levels].size;
917 
918 	if(cln>0)
919 		return rsb__get_lnc_size(cln);
920 	else
921 		return 0;
922 }
923 
rsb__get_cache_block_byte_size(void)924 long rsb__get_cache_block_byte_size(void)
925 {
926 	/*!
927 	 * \ingroup gr_internals
928 	 * */
929 	long flc = RSB_MIN(RSB_MAX(1,rsb_global_session_handle.memory_hierarchy_levels),2);
930 	long cbs = RSB_MIN(rsb__get_lnc_size(flc),rsb__get_lastlevel_c_size_per_thread());
931 
932 	switch(rsb_global_session_handle.cache_blocking_method)
933 	{
934 		case -1: return cbs/2; break;
935 		case  1: return cbs*2; break;
936 		default:
937 		case  0: return cbs;
938 	}
939 }
940 
rsb_want_executing_threads(void)941 static long rsb_want_executing_threads(void)
942 {
943 	/*!
944 	  	\ingroup gr_internals
945 		Will always return a value 1 <= N <= RSB_CONST_MAX_SUPPORTED_CORES
946 		FIXME: make so that
947 		rsb_want_executing_threads() == rsb_set_executing_threads()
948 		or rather write
949 	       	rsb__set_num_threads(RSB_THREADS_GET)
950 	*/
951 	long wt = 1;
952 
953 #if RSB_WANT_OMP_RECURSIVE_KERNELS
954 	wt = rsb_global_session_handle.rsb_want_threads;
955 	if(wt<RSB_CONST_MIN_SUPPORTED_CORES)
956 		return RSB_CONST_MIN_SUPPORTED_CORES;
957 	wt = RSB_MIN(wt,RSB_CONST_MAX_SUPPORTED_CORES);
958 #endif
959 	return wt;
960 }
961 
rsb__get_lastlevel_c_size_per_thread(void)962 long rsb__get_lastlevel_c_size_per_thread(void)
963 {
964 	/*!
965 	 * \ingroup gr_internals
966 	 *
967 	 * \return the last level data cache size divided by the number of active threads
968 	 * or zero if not known or not available.
969 	 * */
970 	return (rsb__get_lastlevel_c_size()/rsb_want_executing_threads());
971 //	return rsb__get_lastlevel_c_size()/sqrt(rsb_want_executing_threads());
972 }
973 
rsb__get_cache_levels_num(void)974 rsb_int_t rsb__get_cache_levels_num(void)
975 {
976 	/*!
977 	 \ingroup internals
978 	 \return the count of cache levels if >0, -1 if unknown, 0 if no caches
979 	*/
980 	long cs,l=1;
981 
982 	for(l=1;l<RSB_MAX_SUPPORTED_CACHE_LEVELS;++l)
983 	{
984 		cs = rsb__get_lnc_size(l);
985 		if(!cs){--l;break;}
986 	}
987 	return l;
988 }
989 
rsb_getopt_long(int argc,char * const argv[],const char * optstring,const rsb_option * longopts,int * longindex)990 int rsb_getopt_long(
991 	int argc, char * const argv[], const char *optstring,
992 	const rsb_option *longopts, int *longindex)
993 {
994 	/*!
995 	   \ingroup internals
996 
997 	   A compatibility wrapper.
998 	 */
999 #ifdef RSB_HAVE_GETOPT_LONG
1000 	return getopt_long(argc,argv,optstring,longopts,longindex);
1001 #else /* RSB_HAVE_GETOPT_LONG */
1002 	return getopt(argc,argv,optstring);	/* a remedy */
1003 #endif /* RSB_HAVE_GETOPT_LONG */
1004 }
1005 
rsb__sys_init(void)1006 rsb_err_t rsb__sys_init(void)
1007 {
1008 	/*!
1009 	 \ingroup internals
1010 
1011 	 should check some system related init stuff,
1012 	 to prevent nasty errors during execution.
1013 	*/
1014 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1015 
1016 	if(sizeof(rsb_err_t)<4)
1017 		RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1018 
1019 	if(sizeof(rsb_flags_t)<4)
1020 		RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1021 
1022 	/*! \todo : we could check for 'base' overflow cases to define
1023 	 	    some limit cases..
1024 	 */
1025 	if(sizeof(rsb_coo_idx_t)>sizeof(rsb_nnz_idx_t))
1026 		RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1027 
1028 	if(sizeof(rsb_blk_idx_t)>sizeof(rsb_coo_idx_t))
1029 		RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1030 
1031 	if(rsb__get_l1c_size()<=0)
1032 		rsb_global_session_handle.min_leaf_matrix_bytes = RSB_EXPERIMENTAL_MIN_LEAF_ELEMENTS*sizeof(double);
1033 	else
1034 		rsb_global_session_handle.min_leaf_matrix_bytes = rsb__get_l1c_size();
1035 
1036 #ifdef CHAR_BIT	/* limits.h */
1037 	if( RSB_CHAR_BIT != CHAR_BIT )
1038 		RSB_DO_ERROR_CUMULATE(errval,RSB_ERR_INTERNAL_ERROR);
1039 #endif /* CHAR_BIT */
1040 
1041 	if(rsb__get_lastlevel_c_size()<0)
1042 		rsb_global_session_handle.avg_leaf_matrix_bytes=4*RSB_EXPERIMENTAL_MIN_LEAF_ELEMENTS*sizeof(double);
1043 	else
1044 		rsb_global_session_handle.avg_leaf_matrix_bytes = rsb__get_lastlevel_c_size()*2;
1045 #if RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION
1046 	{
1047 		rsb_time_t tseed = rsb_time();
1048 		unsigned int uiseed=(*(unsigned int*)(&tseed));
1049 		RSB_WARN("#Starting library with enabled malloc fault injection.\n# Initializing with random seed of value: %u\n",uiseed);
1050 		/* In this way, the user may introduce faults by recompiling the code. TODO: rsb_lib_init() based seed passing. */
1051 		srand(uiseed);
1052 	}
1053 #endif /* RSB_WANT_RANDOM_MALLOC_FAULT_INJECTION */
1054 
1055 	RSB_DO_ERR_RETURN(errval)
1056 }
1057 
rsb__memcpy(void * RSB_RESTRICT dest,const void * RSB_RESTRICT src,size_t n)1058 void *rsb__memcpy(void *RSB_RESTRICT dest, const void *RSB_RESTRICT src, size_t n)
1059 {
1060 	/*!
1061 	  	\ingroup gr_internals
1062 
1063 		Say you want to use a custom memcpy function
1064 		or perform some statistics measurement.
1065 
1066 		Well, this is the place to hack.
1067 		TODO: use the 'restrict' keyword.
1068 	*/
1069 #if 0
1070 	{
1071 		register unsigned char*dp=NULL;
1072 		register const unsigned char*sp=NULL;
1073 		for(dp=dest,sp=src;RSB_LIKELY(dp<dest+n);++dp,++sp)
1074 			*dp=*sp;
1075 	}
1076 #else
1077 	return memcpy(dest,src,n);
1078 #endif
1079 }
1080 
rsb__sys_free_system_memory(void)1081 size_t rsb__sys_free_system_memory(void)
1082 {
1083 	/*!
1084 	  	\ingroup gr_internals
1085 
1086 		System free memory, or 0.
1087 	*/
1088         size_t free_mem=0;
1089         long int pagesize =0;
1090         long int mem_pages=0;
1091 
1092 #ifdef RSB_HAVE_SYSCONF
1093 #if   defined(PAGESIZE)
1094         pagesize=sysconf(PAGESIZE);
1095 #elif defined(_SC_PAGESIZE)
1096         pagesize=sysconf(_SC_PAGESIZE);
1097 #elif defined(PAGE_SIZE)
1098         pagesize=sysconf(PAGE_SIZE);
1099 #else /* PAGESIZE */
1100 #endif /* PAGESIZE */
1101 #if   defined(_SC_AVPHYS_PAGES)
1102         mem_pages=sysconf(_SC_AVPHYS_PAGES);
1103 #endif /* _SC_AVPHYS_PAGES */
1104 #endif /* RSB_HAVE_SYSCONF */
1105 	if(pagesize<1 || mem_pages<1)
1106 		free_mem=0;
1107 	else
1108 		free_mem=((size_t)pagesize)*((size_t)mem_pages);
1109 	return free_mem;
1110 }
1111 
rsb__sys_total_system_memory(void)1112 size_t rsb__sys_total_system_memory(void)
1113 {
1114 	/*!
1115 	  	\ingroup gr_internals
1116 	*/
1117         size_t tot_mem=0;
1118         long int pagesize =0;
1119         long int mem_pages=0;
1120 
1121 #ifdef RSB_HAVE_SYSCONF
1122 #if   defined(PAGESIZE)
1123         pagesize=sysconf(PAGESIZE);
1124 #elif defined(_SC_PAGESIZE)
1125         pagesize=sysconf(_SC_PAGESIZE);
1126 #elif defined(PAGE_SIZE)
1127         pagesize=sysconf(PAGE_SIZE);
1128 #else /* PAGE_SIZE */
1129 #endif /* PAGE_SIZE */
1130 #if   defined(_SC_PHYS_PAGES)
1131         mem_pages = sysconf(_SC_PHYS_PAGES);
1132 #endif /* _SC_PHYS_PAGES */
1133 #endif /* RSB_HAVE_SYSCONF */
1134 	if(pagesize<1 || mem_pages<1)
1135 		tot_mem=0;
1136 	else
1137 		tot_mem=((size_t)pagesize)*((size_t)mem_pages);
1138 	return tot_mem;
1139 }
1140 
rsb_set_executing_threads(long tn)1141 static long rsb_set_executing_threads(long tn)
1142 {
1143 	/*!
1144 	 	FIXME: new
1145 	  	\ingroup gr_internals
1146 	*/
1147 #if RSB_WANT_OMP_RECURSIVE_KERNELS
1148 	/* multi threaded case */
1149 	if(tn > RSB_CONST_MAX_SUPPORTED_CORES)
1150 	{
1151 		RSB_ERROR("cannot set %ld threads: a maximum of %ld is supported\n",tn,RSB_CONST_MAX_SUPPORTED_CORES);
1152 		return RSB_CONST_MIN_SUPPORTED_CORES;
1153 	}
1154 	tn = RSB_MIN(tn,RSB_CONST_MAX_SUPPORTED_CORES);
1155 	if(tn < RSB_CONST_MIN_SUPPORTED_CORES)
1156 	{
1157 		/* a value < 0 means the user wants the threads count to be set automatically */
1158 	//	return 1;
1159 		tn = rsb_global_session_handle.rsb_g_threads;
1160 	}
1161 
1162 #if (RSB_TIME_SET_THREADS==1)
1163 	rsb_time_t dt = -rsb_time();
1164 #endif
1165 #if (RSB_USE_OMP_SET_NUM_THREADS==1)
1166 	omp_set_num_threads(tn);
1167 #endif
1168 #if (RSB_TIME_SET_THREADS==1)
1169 	dt += rsb_time();
1170 	RSB_STDOUT("setting threads (%d) took %lf s\n",tn,dt);
1171 #endif
1172 	/* FIXME : 20101111 on my GNU box, the following does not return tn, but seems to have effect. Weird */
1173 	//tn = omp_get_num_threads();
1174 	rsb_global_session_handle.rsb_want_threads = tn;/* FIXME : a hack */
1175 	return tn;
1176 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1177 	/* single threaded case */
1178 	return RSB_CONST_MIN_SUPPORTED_CORES;
1179 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1180 }
1181 
rsb__lock_as_memory_resident(rsb_bool_t dolock)1182 rsb_err_t rsb__lock_as_memory_resident(rsb_bool_t dolock)
1183 {
1184 	/* FIXME: need mechanisms to get/set/restore this setting */
1185 	rsb_err_t errval = RSB_ERR_NO_ERROR;
1186 #if RSB_HAVE_MLOCKALL
1187 	int retval = 0;
1188 	/* "mlockall() locks all pages mapped into the address space of the calling process. "*/
1189 	if(dolock)
1190 	{
1191 		retval = mlockall(MCL_FUTURE|MCL_CURRENT);
1192 	}
1193 	else
1194 	{
1195 		retval = munlockall();
1196 	}
1197 	if(retval)
1198 	{
1199 		errval = RSB_ERR_INTERNAL_ERROR;
1200 		/* FIXME: shall introduce RSB_ERR_SYSCALL_ERROR */
1201 		RSB_ERROR(RSB_ERRM_FCOVMU);
1202 	}
1203 #else /* RSB_HAVE_MLOCKALL */
1204 	RSB_ERROR(RSB_ERRM_COVMUINS);
1205 	errval = RSB_ERR_UNSUPPORTED_FEATURE;
1206 #endif /* RSB_HAVE_MLOCKALL */
1207 	return errval; /* what about retval ? */
1208 }
1209 
rsb__fileno(FILE * stream)1210 int rsb__fileno(FILE *stream)
1211 {
1212 #ifndef RSB_HAVE_FILENO
1213 	return -1;
1214 #else /* RSB_HAVE_FILENO */
1215 	return fileno(stream);
1216 #endif /* RSB_HAVE_FILENO */
1217 }
1218 
rsb__set_num_threads(rsb_int_t tn)1219 rsb_int_t rsb__set_num_threads(rsb_int_t tn)
1220 {
1221 	/*!
1222 	   	\ingroup rsb_doc_library rsb_doc_rsb
1223 
1224 	 	Gets and/or sets number of librsb running threads.
1225 		If \a tn is RSB_THREADS_AUTO, sets threads count to a default value.
1226 		If \a tn is RSB_THREADS_GET, only returns the count of active threads.
1227 		If \a tn is RSB_THREADS_GET_MAX, returns max count of supported threads.
1228 	 	\todo: shall promote this as a rsb.h function and make all similar ones static, and put them in thread.c !
1229 		\return: number of running threads.
1230 	 */
1231 
1232 	long rtn=0;
1233 
1234 	switch(tn)
1235 	{
1236 		case(RSB_THREADS_GET):
1237 		rtn = rsb_want_executing_threads();
1238 		break;
1239 		case(RSB_THREADS_AUTO):
1240 		tn=0;
1241 		break;
1242 		case(RSB_THREADS_GET_MAX_SYS):
1243 #if RSB_WANT_OMP_RECURSIVE_KERNELS
1244 		/* rtn = omp_get_max_threads(); */
1245 		/* rtn = omp_get_thread_limit(); */
1246 		rtn = rsb_global_session_handle.rsb_g_threads;
1247 #else /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1248 		rtn = 1;
1249 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1250 		break;
1251 		case(RSB_THREADS_GET_MAX_LIB):
1252 		rtn = RSB_CONST_MAX_SUPPORTED_CORES;
1253 		break;
1254 		case(RSB_THREADS_GET_MAX):
1255 #if RSB_WANT_OMP_RECURSIVE_KERNELS
1256 		rtn = omp_get_max_threads();
1257 #endif /* RSB_WANT_OMP_RECURSIVE_KERNELS */
1258 		rtn = RSB_MIN(rtn,RSB_CONST_MAX_SUPPORTED_CORES);
1259 		break;
1260 		default:
1261 		rtn = rsb_set_executing_threads(tn);
1262 	}
1263 	RSB_DEBUG_ASSERT(rtn>0);
1264 	return (rsb_int_t) rtn;
1265 }
1266 
1267 #if 0
1268 #include <execinfo.h>
1269 void rsb_print_trace (void)
1270 {
1271 	/* according to a glibc docs example */
1272 	void *array[10];
1273 	size_t size;
1274 	char **strings;
1275 	size_t i;
1276 	size = backtrace (array, 10);
1277 	strings = backtrace_symbols (array, size);
1278 	printf ("Obtained %zd stack frames.\n", size);
1279 	for (i = 0; i < size; i++)
1280 		printf ("%s\n", strings[i]);
1281 	free (strings);
1282 }
1283 #endif
1284 
1285 #if RSB_USE_RUSAGE
rsb_rstv(struct timeval * tvp)1286 static rsb_time_t rsb_rstv(struct timeval*tvp)
1287 {
1288         register double t = 0.0;
1289         t  =  (double)(tvp->tv_sec) + ((double)(tvp->tv_usec))*1.e-6;
1290         return t;
1291 }
1292 
1293 #define RSB_K 1024
rsb__getrusage(void)1294 rsb_err_t rsb__getrusage(void)
1295 {
1296 	/*
1297 	 * Shall work independently from rsb_lib_init/rsb_lib_exit.
1298 	 * */
1299 	struct rusage usage;
1300 	int gru = getrusage(RUSAGE_SELF,&usage);
1301 
1302 	RSB_STDOUT("getrusage() stats:\n");
1303 	/*("ru_ixrss : %ld (integral shared memory size)\n",usage.ru_ixrss);*/
1304 	RSB_STDOUT("ru_maxrss: %ld (maximum resident set size -- MB)\n",usage.ru_maxrss / RSB_K);
1305 	RSB_STDOUT("ru_stime : %0.4lgs (system CPU time used)\n",rsb_rstv(&usage.ru_stime));
1306 	RSB_STDOUT("ru_utime : %0.4lgs (user CPU time used)\n",rsb_rstv(&usage.ru_utime));
1307 #if 0
1308 	RSB_STDOUT("ru_utime : %0.4lg (user page faults (hard page faults))\n",rsb_rstv(&usage.ru_majflt));
1309 	RSB_STDOUT("ru_utime : %0.4lg (page reclaims (soft page faults))\n",rsb_rstv(&usage.ru_minflt));
1310 #endif
1311 
1312 	return gru == 0 ? RSB_ERR_NO_ERROR : RSB_ERR_GENERIC_ERROR;
1313 }
1314 #else /* RSB_USE_RUSAGE */
rsb__getrusage(void)1315 rsb_err_t rsb__getrusage(void)
1316 {
1317 	return RSB_ERR_NO_ERROR;
1318 }
1319 #endif /* RSB_USE_RUSAGE */
1320 
rsb__getenv(const rsb_char_t * name)1321 const rsb_char_t * rsb__getenv(const rsb_char_t * name)
1322 {
1323 	const rsb_char_t * evv = NULL;
1324 
1325 #ifdef RSB_HAVE_GETENV
1326 	evv = getenv(name);
1327 #endif /* RSB_HAVE_GETENV */
1328 
1329 	return evv;
1330 }
1331 
rsb__getenv_nnr(const rsb_char_t * name)1332 const rsb_char_t * rsb__getenv_nnr(const rsb_char_t * name)
1333 {
1334 	RSB_DEBUG_ASSERT( name != NULL );
1335 	return rsb__getenv(name) ? rsb__getenv(name) : name + strlen(name);
1336 }
1337 
1338 #if RSB_WITH_HWLOC
1339 #if HWLOC_API_VERSION >= 0x00020000
rsb__hwloc_max_cs_at_level(hwloc_topology_t topology,hwloc_obj_t obj,unsigned int depth,unsigned int cd)1340 static hwloc_uint64_t rsb__hwloc_max_cs_at_level(hwloc_topology_t topology, hwloc_obj_t obj, unsigned int depth, unsigned int cd)
1341 {
1342 	/* for rsb__get_lnc_size_hwloc */
1343     	unsigned int i, cl;
1344 	hwloc_uint64_t cs=0;
1345 
1346 	switch (obj->type)
1347 	{
1348 		case(HWLOC_OBJ_L1CACHE): cl=1; break;
1349 		case(HWLOC_OBJ_L2CACHE): cl=2; break;
1350 		case(HWLOC_OBJ_L3CACHE): cl=3; break;
1351 		case(HWLOC_OBJ_L4CACHE): cl=4; break;
1352 		case(HWLOC_OBJ_L5CACHE): cl=5; break;
1353 		default: cl=0;
1354 	}
1355 	if(cl && cl==cd)
1356 		cs = obj->attr->cache.size;
1357 
1358 	for (i = 0; i < obj->arity; i++)
1359 	{
1360 		hwloc_uint64_t ccs = rsb__hwloc_max_cs_at_level(topology, obj->children[i], depth + 1, cd);
1361     		cs = ccs > cs ? ccs : cs;
1362     	}
1363 	return cs;
1364 }
1365 #endif /* HWLOC_API_VERSION >= 0x00020000 */
1366 #endif /* RSB_WITH_HWLOC */
1367 
rsb__get_lnc_size_hwloc(int n)1368 long rsb__get_lnc_size_hwloc(int n)
1369 {
1370 	/* Gets cache size using hwloc.h. EXPERIMENTAL */
1371 	long size = 0;
1372 #if RSB_WITH_HWLOC
1373 	hwloc_uint64_t cs;
1374 	hwloc_topology_t topology;
1375 	hwloc_obj_t obj;
1376 	hwloc_topology_init(&topology);
1377 	hwloc_topology_load(topology);
1378 #if HWLOC_API_VERSION >= 0x00020000
1379 	cs = rsb__hwloc_max_cs_at_level(topology, hwloc_get_root_obj(topology), 0, n);
1380 	size = (long) cs;
1381 #else /* HWLOC_API_VERSION */
1382 	int levels = 0;
1383 	for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); obj; obj = obj->parent)
1384 #if HWLOC_API_VERSION >= 0x20000
1385 		if (obj->type >= HWLOC_OBJ_L1CACHE && obj->type <= HWLOC_OBJ_L5CACHE)
1386 #else
1387 		if (obj->type == HWLOC_OBJ_CACHE)
1388 #endif
1389 			if(++levels == n)
1390         			size = obj->attr->cache.size;
1391 #endif	/* HWLOC_API_VERSION */
1392     	hwloc_topology_destroy(topology);
1393 #endif	/* RSB_WITH_HWLOC */
1394 	return size;
1395 }
1396 
1397 /* @endcond */
1398