1
2 /**
3 * \file misc.h
4 * \brief Helper functions.
5 *
6 * \author Gregory Bard <bard@fordham.edu>
7 * \author Martin Albrecht <M.R.Albrecht@rhul.ac.uk>
8 * \author Carlo Wood <carlo@alinoe.com>
9 */
10
11 #ifndef M4RI_MISC_H
12 #define M4RI_MISC_H
13
14 /*******************************************************************
15 *
16 * M4RI: Linear Algebra over GF(2)
17 *
18 * Copyright (C) 2007, 2008 Gregory Bard <bard@fordham.edu>
19 * Copyright (C) 2008 Martin Albrecht <M.R.Albrecht@rhul.ac.uk>
20 * Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
21 *
22 * Distributed under the terms of the GNU General Public License (GPL)
23 * version 2 or higher.
24 *
25 * This code is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * General Public License for more details.
29 *
30 * The full text of the GPL is available at:
31 *
32 * http://www.gnu.org/licenses/
33 *
34 ********************************************************************/
35
36 #include <m4ri/m4ri_config.h>
37
38 #ifdef HAVE_CONFIG_H
39 #include "config.h"
40 #endif
41
42 #if __M4RI_USE_MM_MALLOC
43 #include <mm_malloc.h>
44 #endif
45
46 #include <stdlib.h>
47 #include <assert.h>
48 #include <string.h>
49 /// @cond INTERNAL
50 #define __STDC_LIMIT_MACROS
51 /// @endcond
52 #include <stdint.h>
53
54 /*
55 * These define entirely the word width used in the library.
56 */
57
58 /**
59 * \brief Pretty for a boolean int.
60 *
61 * The value of a BIT is either 0 or 1.
62 */
63
64 typedef int BIT;
65
66 /**
67 * \brief Type of row and column indexes.
68 *
69 * This type is used for integer values that hold row/colum sized values.
70 */
71
72 typedef int rci_t;
73
74 /**
75 * \brief Type of word indexes.
76 *
77 * This type is used for the array of words that make up a row.
78 */
79
80 typedef int wi_t;
81
82
83 /**
84 * \brief A word is the typical packed data structure to represent packed bits.
85 */
86
87 typedef uint64_t word;
88
89 /**
90 * \brief Explicit conversion macro.
91 *
92 * Explicit conversion of a word, representing 64 columns, to an integer
93 * to be used as index into an array. This is used for Gray codes.
94 * No error checking is done that the most significant bits in w are zero.
95 *
96 * \note This is a no-op. It's purpose it to track intention.
97 */
98
99 #define __M4RI_CONVERT_TO_INT(w) ((int)(w))
100
101 /**
102 * \brief Explicit conversion macro.
103 *
104 * Explicit conversion of a word, representing 64 columns, to a BIT
105 * to be used as boolean: this is an int with value 0 (false) or 1 (true).
106 * No error checking is done that only the least significant bit is set (if any).
107 *
108 * \note This is a no-op. It's purpose it to track intention.
109 */
110
111 #define __M4RI_CONVERT_TO_BIT(w) ((BIT)(w))
112
113 /**
114 * \brief Explicit conversion macro.
115 *
116 * Explicit conversion of a word, representing 64 columns, to an uint64_t.
117 *
118 * The returned value is the underlaying integer representation of these 64 columns,
119 * meaning in particular that if val is an uint64_t then
120 * __M4RI_CONVERT_TO_UINT64_T(__M4RI_CONVERT_TO_WORD(val)) == val.
121 *
122 * \note This is a no-op. It's purpose it to track intention.
123 */
124
125 #define __M4RI_CONVERT_TO_UINT64_T(w) (w)
126
127 /**
128 * \brief Explicit conversion macro.
129 *
130 * Explicit conversion of an integer to a word.
131 *
132 * \note This is a no-op. It's purpose it to track intention.
133 */
134
135 #define __M4RI_CONVERT_TO_WORD(i) ((word)(i))
136
137 /**
138 * \brief The number of bits in a word.
139 */
140
141 static int const m4ri_radix = 64;
142
143 /**
144 * \brief The number one as a word.
145 */
146
147 static word const m4ri_one = __M4RI_CONVERT_TO_WORD(1);
148
149 /**
150 * \brief A word with all bits set.
151 */
152
153 static word const m4ri_ffff = __M4RI_CONVERT_TO_WORD(-1);
154
155 /**
156 * \brief Return the maximal element of x and y
157 *
158 * \param x Word
159 * \param y Word
160 */
161
162 #ifndef MAX
163 #define MAX(x,y) (((x) > (y))?(x):(y))
164 #endif
165
166 /**
167 * \brief Return the minimal element of x and y
168 *
169 * \param x Word
170 * \param y Word
171 */
172
173 #ifndef MIN
174 #define MIN(x,y) (((x) < (y))?(x):(y))
175 #endif
176
177 /**
178 *\brief Pretty for 1.
179 */
180
181 #ifndef TRUE
182 #define TRUE 1
183 #endif
184
185 /**
186 *\brief Pretty for 0.
187 */
188
189 #ifndef FALSE
190 #define FALSE 0
191 #endif
192
193 /**
194 * \brief $2^i$
195 *
196 * \param i Integer.
197 */
198
199 #define __M4RI_TWOPOW(i) ((uint64_t)1 << (i))
200
201 /**
202 * \brief Clear the bit spot (counting from the left) in the word w
203 *
204 * \param w Word
205 * \param spot Integer with 0 <= spot < m4ri_radix
206 */
207
208 #define __M4RI_CLR_BIT(w, spot) ((w) &= ~(m4ri_one << (spot))
209
210 /**
211 * \brief Set the bit spot (counting from the left) in the word w
212 *
213 * \param w Word
214 * \param spot Integer with 0 <= spot < m4ri_radix
215 */
216
217 #define __M4RI_SET_BIT(w, spot) ((w) |= (m4ri_one << (spot)))
218
219 /**
220 * \brief Get the bit spot (counting from the left) in the word w
221 *
222 * \param w Word
223 * \param spot Integer with 0 <= spot < m4ri_radix
224 */
225
226 #define __M4RI_GET_BIT(w, spot) __M4RI_CONVERT_TO_BIT(((w) >> (spot)) & m4ri_one)
227
228 /**
229 * \brief Write the value to the bit spot in the word w
230 *
231 * \param w Word.
232 * \param spot Integer with 0 <= spot < m4ri_radix.
233 * \param value Either 0 or 1.
234 */
235
236 #define __M4RI_WRITE_BIT(w, spot, value) ((w) = (((w) & ~(m4ri_one << (spot))) | (__M4RI_CONVERT_TO_WORD(value) << (spot))))
237
238 /**
239 * \brief Flip the spot in the word w
240 *
241 * \param w Word.
242 * \param spot Integer with 0 <= spot < m4ri_radix.
243 */
244
245 #define __M4RI_FLIP_BIT(w, spot) ((w) ^= (m4ri_one << (spot)))
246
247 /**
248 * \brief create a bit mask to zero out all but the (n - 1) % m4ri_radix + 1 leftmost bits.
249 *
250 * This function returns 1..64 bits, never zero bits.
251 * This mask is mainly used to mask the valid bits in the most significant word,
252 * by using __M4RI_LEFT_BITMASK((M->ncols + M->offset) % m4ri_radix).
253 * In other words, the set bits represent the columns with the lowest index in the word.
254 *
255 * Thus,
256 *
257 * n Output
258 * 0=64 1111111111111111111111111111111111111111111111111111111111111111
259 * 1 0000000000000000000000000000000000000000000000000000000000000001
260 * 2 0000000000000000000000000000000000000000000000000000000000000011
261 * . ...
262 * 62 0011111111111111111111111111111111111111111111111111111111111111
263 * 63 0111111111111111111111111111111111111111111111111111111111111111
264 *
265 * Note that n == 64 is only passed from __M4RI_MIDDLE_BITMASK, and still works
266 * (behaves the same as n == 0): the input is modulo 64.
267 *
268 * \param n Integer with 0 <= n <= m4ri_radix
269 */
270
271 #define __M4RI_LEFT_BITMASK(n) (m4ri_ffff >> (m4ri_radix - (n)) % m4ri_radix)
272
273 /**
274 * \brief create a bit mask to zero out all but the n rightmost bits.
275 *
276 * This function returns 1..64 bits, never zero bits.
277 * This mask is mainly used to mask the n valid bits in the least significant word
278 * with valid bits by using __M4RI_RIGHT_BITMASK(m4ri_radix - M->offset).
279 * In other words, the set bits represent the columns with the highest index in the word.
280 *
281 * Thus,
282 *
283 * n Output
284 * 1 1000000000000000000000000000000000000000000000000000000000000000
285 * 2 1100000000000000000000000000000000000000000000000000000000000000
286 * 3 1110000000000000000000000000000000000000000000000000000000000000
287 * . ...
288 * 63 1111111111111111111111111111111111111111111111111111111111111110
289 * 64 1111111111111111111111111111111111111111111111111111111111111111
290 *
291 * Note that n == 0 is never passed and would fail.
292 *
293 * \param n Integer with 0 < n <= m4ri_radix
294 */
295
296 #define __M4RI_RIGHT_BITMASK(n) (m4ri_ffff << (m4ri_radix - (n)))
297
298 /**
299 * \brief create a bit mask that is the combination of __M4RI_LEFT_BITMASK and __M4RI_RIGHT_BITMASK.
300 *
301 * This function returns 1..64 bits, never zero bits.
302 * This mask is mainly used to mask the n valid bits in the only word with valid bits,
303 * when M->ncols + M->offset <= m4ri_radix), by using __M4RI_MIDDLE_BITMASK(M->ncols, M->offset).
304 * It is equivalent to __M4RI_LEFT_BITMASK(n + offset) & __M4RI_RIGHT_BITMASK(m4ri_radix - offset).
305 * In other words, the set bits represent the valid columns in the word.
306 *
307 * Note that when n == m4ri_radix (and thus offset == 0) then __M4RI_LEFT_BITMASK is called with n == 64.
308 *
309 * \param n Integer with 0 < n <= m4ri_radix - offset
310 * \param offset Column offset, with 0 <= offset < m4ri_radix
311 */
312
313 #define __M4RI_MIDDLE_BITMASK(n, offset) (__M4RI_LEFT_BITMASK(n) << (offset))
314
315 /**
316 * \brief swap bits in the word v
317 *
318 * \param v The word whose bits need to be reversed.
319 */
320
m4ri_swap_bits(word v)321 static inline word m4ri_swap_bits(word v) {
322 v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1);
323 v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2);
324 v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4);
325 v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);
326 v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16);
327 v = (v >> 32) | (v << 32);
328 return v;
329 }
330
331 /**
332 * \brief pack bits (inverse of m4ri_spread_bits)
333 *
334 * \param from bitstring
335 * \param Q array with bit positions
336 * \param length bitsize of the output
337 * \param base subtracted from every value in Q
338 *
339 * \returns inverse of m4ri_spread_bits)
340 *
341 * \see m4ri_spread_bits
342 */
343
m4ri_shrink_bits(word const from,rci_t * const Q,int const length,int const base)344 static inline word m4ri_shrink_bits(word const from, rci_t* const Q, int const length, int const base) {
345 word to = 0;
346 switch(length-1) {
347 case 15: to |= (from & (m4ri_one << (Q[15] - base))) >> (Q[15] - 15 - base);
348 case 14: to |= (from & (m4ri_one << (Q[14] - base))) >> (Q[14] - 14 - base);
349 case 13: to |= (from & (m4ri_one << (Q[13] - base))) >> (Q[13] - 13 - base);
350 case 12: to |= (from & (m4ri_one << (Q[12] - base))) >> (Q[12] - 12 - base);
351 case 11: to |= (from & (m4ri_one << (Q[11] - base))) >> (Q[11] - 11 - base);
352 case 10: to |= (from & (m4ri_one << (Q[10] - base))) >> (Q[10] - 10 - base);
353 case 9: to |= (from & (m4ri_one << (Q[ 9] - base))) >> (Q[ 9] - 9 - base);
354 case 8: to |= (from & (m4ri_one << (Q[ 8] - base))) >> (Q[ 8] - 8 - base);
355 case 7: to |= (from & (m4ri_one << (Q[ 7] - base))) >> (Q[ 7] - 7 - base);
356 case 6: to |= (from & (m4ri_one << (Q[ 6] - base))) >> (Q[ 6] - 6 - base);
357 case 5: to |= (from & (m4ri_one << (Q[ 5] - base))) >> (Q[ 5] - 5 - base);
358 case 4: to |= (from & (m4ri_one << (Q[ 4] - base))) >> (Q[ 4] - 4 - base);
359 case 3: to |= (from & (m4ri_one << (Q[ 3] - base))) >> (Q[ 3] - 3 - base);
360 case 2: to |= (from & (m4ri_one << (Q[ 2] - base))) >> (Q[ 2] - 2 - base);
361 case 1: to |= (from & (m4ri_one << (Q[ 1] - base))) >> (Q[ 1] - 1 - base);
362 case 0: to |= (from & (m4ri_one << (Q[ 0] - base))) >> (Q[ 0] - 0 - base);
363 break;
364 default:
365 abort();
366 }
367 return to;
368 }
369
370 /**
371 * \brief spread bits
372 *
373 * Given a bitstring 'from' and a spreading table Q, return a
374 * bitstring where the bits of 'from' are in the positions indicated
375 * by Q.
376 *
377 * \param from bitstring of length 'length' stored in a word
378 * \param Q table with new bit positions
379 * \param length bitsize of input
380 * \param base subtracted from every value in Q
381 *
382 * \returns bitstring having the same bits as from but spread using Q
383 *
384 * \see m4ri_shrink_bits
385 */
386
m4ri_spread_bits(word const from,rci_t * const Q,int const length,int const base)387 static inline word m4ri_spread_bits(word const from, rci_t* const Q, int const length, int const base) {
388 word to = 0;
389 switch(length-1) {
390 case 15: to |= (from & (m4ri_one << (15))) << (Q[15]-15-base);
391 case 14: to |= (from & (m4ri_one << (14))) << (Q[14]-14-base);
392 case 13: to |= (from & (m4ri_one << (13))) << (Q[13]-13-base);
393 case 12: to |= (from & (m4ri_one << (12))) << (Q[12]-12-base);
394 case 11: to |= (from & (m4ri_one << (11))) << (Q[11]-11-base);
395 case 10: to |= (from & (m4ri_one << (10))) << (Q[10]-10-base);
396 case 9: to |= (from & (m4ri_one << ( 9))) << (Q[ 9]- 9-base);
397 case 8: to |= (from & (m4ri_one << ( 8))) << (Q[ 8]- 8-base);
398 case 7: to |= (from & (m4ri_one << ( 7))) << (Q[ 7]- 7-base);
399 case 6: to |= (from & (m4ri_one << ( 6))) << (Q[ 6]- 6-base);
400 case 5: to |= (from & (m4ri_one << ( 5))) << (Q[ 5]- 5-base);
401 case 4: to |= (from & (m4ri_one << ( 4))) << (Q[ 4]- 4-base);
402 case 3: to |= (from & (m4ri_one << ( 3))) << (Q[ 3]- 3-base);
403 case 2: to |= (from & (m4ri_one << ( 2))) << (Q[ 2]- 2-base);
404 case 1: to |= (from & (m4ri_one << ( 1))) << (Q[ 1]- 1-base);
405 case 0: to |= (from & (m4ri_one << ( 0))) << (Q[ 0]- 0-base);
406 break;
407 default:
408 abort();
409 }
410 return to;
411 }
412
413 /**
414 * \brief Return alignment of addr w.r.t. n. For example the address
415 * 17 would be 1 aligned w.r.t. 16.
416 *
417 * \param addr
418 * \param n
419 */
420
421 #define __M4RI_ALIGNMENT(addr, n) (((unsigned long)(addr))%(n))
422
423 /**
424 * \brief Test for gcc >= maj.min, as per __GNUC_PREREQ in glibc
425 *
426 * \param maj The major version.
427 * \param min The minor version.
428 * \return TRUE iff we are using a GNU compile of at least version maj.min.
429 */
430 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
431 #define __M4RI_GNUC_PREREQ(maj, min) ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
432 #else
433 #define __M4RI_GNUC_PREREQ(maj, min) FALSE
434 #endif
435
436 /* __builtin_expect is in gcc 3.0, and not in 2.95. */
437 #if __M4RI_GNUC_PREREQ(3,0) || defined(M4RI_DOXYGEN)
438
439 /**
440 * \brief Macro to help with branch prediction.
441 */
442
443 #define __M4RI_LIKELY(cond) __builtin_expect ((cond) != 0, 1)
444
445 /**
446 * \brief Macro to help with branch prediction.
447 */
448
449 #define __M4RI_UNLIKELY(cond) __builtin_expect ((cond) != 0, 0)
450
451 #else
452 #define __M4RI_LIKELY(cond) (cond)
453 #define __M4RI_UNLIKELY(cond) (cond)
454 #endif
455
456 /**
457 * Return true if a's least significant bit is smaller than b's least significant bit.
458 *
459 * return true if LSBI(a) < LSBI(b),
460 * where LSBI(w) is the index of the least significant bit that is set in w, or 64 if w is zero.
461 *
462 * \param a Word
463 * \param b Word
464 */
465
m4ri_lesser_LSB(word a,word b)466 static inline int m4ri_lesser_LSB(word a, word b)
467 {
468 uint64_t const ia = __M4RI_CONVERT_TO_UINT64_T(a);
469 uint64_t const ib = __M4RI_CONVERT_TO_UINT64_T(b);
470 /*
471 * If a is zero then we should always return false, otherwise
472 * if b is zero we should return true iff a has at least one bit set.
473 */
474 return !(ib ? ((ia - 1) ^ ia) & ib : !ia);
475 }
476
477
478 /**** Error Handling *****/
479
480 /**
481 * \brief Print error message and abort().
482 *
483 * The function accepts additional
484 * parameters like printf, so e.g. m4ri_die("foo %d bar %f\n",1 ,2.0)
485 * is valid and will print the string "foo 1 bar 2.0" before dying.
486 *
487 * \param errormessage a string to be printed.
488 *
489 * \todo Allow user to register callback which is called on
490 * m4ri_die().
491 *
492 * \warning The provided string is not free'd.
493 */
494
495 void m4ri_die(const char *errormessage, ...);
496
497 /**** IO *****/
498
499 /**
500 * \brief Write a sting representing the word data to destination.
501 *
502 * \param destination Address of buffer of length at least m4ri_radix*1.3
503 * \param data Source word
504 * \param colon Insert a Colon after every 4-th bit.
505 * \warning Assumes destination has m4ri_radix*1.3 bytes available
506 */
507 void m4ri_word_to_str( char *destination, word data, int colon);
508
509 /**
510 * \brief Return 1 or 0 uniformly randomly distributed.
511 *
512 * \todo Allow user to provide her own random() function.
513 */
514
m4ri_coin_flip()515 static inline BIT m4ri_coin_flip() {
516 if (rand() < RAND_MAX/2) {
517 return 0;
518 } else {
519 return 1;
520 }
521 }
522
523 /**
524 * \brief Return uniformly randomly distributed random word.
525 *
526 * \todo Allow user to provide her own random() function.
527 */
528
529 word m4ri_random_word();
530
531 /***** Initialization *****/
532
533 /**
534 * \brief Initialize global data structures for the M4RI library.
535 *
536 * On Linux/Solaris this is called automatically when the shared
537 * library is loaded, but it doesn't harm if it is called twice.
538 */
539
540 #if defined(__GNUC__)
541 void __attribute__ ((constructor)) m4ri_init(void);
542 #else
543 void m4ri_init(void);
544 #endif
545
546 #ifdef __SUNPRO_C
547 #pragma init(m4ri_init)
548 #endif
549
550 /**
551 * \brief De-initialize global data structures from the M4RI library.
552 *
553 * On Linux/Solaris this is called automatically when the shared
554 * library is unloaded, but it doesn't harm if it is called twice.
555 */
556
557 #if defined(__GNUC__)
558 void __attribute__ ((destructor)) m4ri_fini(void);
559 #else
560 void m4ri_fini(void);
561 #endif
562
563 #ifdef __SUNPRO_C
564 #pragma fini(m4ri_fini)
565 #endif
566
567 /***** Memory Management *****/
568
569 /// @cond INTERNAL
570
571 #if __M4RI_CPU_L3_CACHE == 0
572 /*
573 * Fix some standard value for L3 cache size if it couldn't be
574 * determined by configure.
575 */
576
577 #undef __M4RI_CPU_L3_CACHE
578 #if __M4RI_CPU_L2_CACHE
579 #define __M4RI_CPU_L3_CACHE __M4RI_CPU_L2_CACHE
580 #else
581 #define __M4RI_CPU_L3_CACHE 4194304
582 #endif // __M4RI_CPU_L2_CACHE
583 #endif // __M4RI_CPU_L3_CACHE
584
585 #if __M4RI_CPU_L2_CACHE == 0
586 /*
587 * Fix some standard value for L2 cache size if it couldn't be
588 * determined by configure.
589 */
590 #undef __M4RI_CPU_L2_CACHE
591 #define __M4RI_CPU_L2_CACHE 262144
592 #endif // __M4RI_CPU_L2_CACHE
593
594
595 #if __M4RI_CPU_L1_CACHE == 0
596 /*
597 * Fix some standard value for L1 cache size if it couldn't be
598 * determined by configure.
599 */
600 #undef __M4RI_CPU_L1_CACHE
601 #define __M4RI_CPU_L1_CACHE 16384
602 #endif // __M4RI_CPU_L1_CACHE
603
604 /// @endcond
605
606 /**
607 * \brief Calloc wrapper.
608 *
609 * \param count Number of elements.
610 * \param size Size of each element.
611 *
612 * \return pointer to allocated memory block.
613 *
614 * \todo Allow user to register calloc function.
615 */
616
m4ri_mm_calloc(size_t count,size_t size)617 static inline void *m4ri_mm_calloc(size_t count, size_t size) {
618 void *newthing;
619 #if __M4RI_USE_MM_MALLOC
620 newthing = _mm_malloc(count * size, 64);
621 #elif __M4RI_USE_POSIX_MEMALIGN
622 int error = posix_memalign(&newthing, 64, count * size);
623 if (error) newthing = NULL;
624 #else
625 newthing = calloc(count, size);
626 #endif
627
628 if (newthing == NULL) {
629 m4ri_die("m4ri_mm_calloc: calloc returned NULL\n");
630 return NULL; /* unreachable. */
631 }
632 #if __M4RI_USE_MM_MALLOC || __M4RI_USE_POSIX_MEMALIGN
633 char *b = (char*)newthing;
634 memset(b, 0, count * size);
635 #endif
636 return newthing;
637 }
638
639 /**
640 * \brief Aligned malloc wrapper.
641 *
642 * This function will attempt to align memory, but does not guarantee
643 * success in case neither _mm_malloc nor posix_memalign are available.
644 *
645 * \param size Size in bytes.
646 * \param alignment Alignment (16,64,...).
647 *
648 * \return pointer to allocated memory block.
649 *
650 * \todo Allow user to register malloc function.
651 */
652
m4ri_mm_malloc_aligned(size_t size,size_t alignment)653 static inline void *m4ri_mm_malloc_aligned(size_t size, size_t alignment) {
654 void *newthing;
655
656 #if __M4RI_USE_MM_MALLOC
657 newthing = _mm_malloc(size, alignment);
658 #elif __M4RI_USE_POSIX_MEMALIGN
659 int error = posix_memalign(&newthing, alignment, size);
660 if (error)
661 newthing = NULL;
662 #else
663 newthing = malloc(size);
664 #endif
665
666 if (newthing==NULL && (size>0)) {
667 m4ri_die("m4ri_mm_malloc: malloc returned NULL\n");
668 return NULL; /* unreachable */
669 }
670 else { return newthing; }
671 }
672
673 /**
674 * \brief Malloc wrapper.
675 *
676 * \param size Size in bytes.
677 *
678 * \return pointer to allocated memory block.
679 *
680 * \todo Allow user to register malloc function.
681 */
682
m4ri_mm_malloc(size_t size)683 static inline void *m4ri_mm_malloc(size_t size) {
684 void *newthing;
685 #if __M4RI_USE_MM_MALLOC
686 newthing = _mm_malloc(size, 64);
687 #elif __M4RI_USE_POSIX_MEMALIGN
688 int error = posix_memalign(&newthing, 64, size);
689 if (error) newthing = NULL;
690 #else
691 newthing = malloc(size);
692 #endif //__M4RI_USE_MM_MALLOC
693 if (newthing==NULL && (size>0)) {
694 m4ri_die("m4ri_mm_malloc: malloc returned NULL\n");
695 return NULL; /* unreachable */
696 }
697 else { return newthing; }
698 }
699
700
701 /**
702 * \brief Free wrapper.
703 *
704 * \param condemned Pointer.
705 *
706 * \todo Allow user to register free function.
707 */
708
709 /* void m4ri_mm_free(void *condemned, ...); */
m4ri_mm_free(void * condemned,...)710 static inline void m4ri_mm_free(void *condemned, ...) {
711 #if __M4RI_USE_MM_MALLOC
712 _mm_free(condemned);
713 #else
714 free(condemned);
715 #endif
716 }
717
718 /// @cond INTERNAL
719
720 /*
721 * MSVC does not understand the restrict keyword
722 */
723
724 #if defined (__GNUC__)
725 #define RESTRICT __restrict__
726 #else
727 #define RESTRICT
728 #endif
729
730
731
732 /*
733 * Macros for template expansion.
734 */
735
736 #define __M4RI_TEMPLATE_EXPAND0(x,y) x ## _ ## y
737 #define __M4RI_TEMPLATE_EXPAND1(x,y) __M4RI_TEMPLATE_EXPAND0(x,y)
738 #define __M4RI_TEMPLATE_NAME(fun) __M4RI_TEMPLATE_EXPAND1(fun, N)
739
740 //// @endcond
741
742 #endif // M4RI_MISC_H
743