1 /******************************************************************************* 2 * Copyright (c) 2018, College of William & Mary 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * * Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of the College of William & Mary nor the 13 * names of its contributors may be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COLLEGE OF WILLIAM & MARY BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * PRIMME: https://github.com/primme/primme 28 * Contact: Andreas Stathopoulos, a n d r e a s _at_ c s . w m . e d u 29 ******************************************************************************* 30 * File: template.h 31 * 32 * Purpose - Contains definitions of macros used along PRIMME. 33 * In short source files, *.c, are compiled several times, every time for a 34 * different type, referred as SCALAR. Examples of types are float, double, 35 * complex float, complex double, and the corresponding GPU versions. All 36 * types are described in section "Arithmetic". Other macros are defined to 37 * refer derived types and functions. An example is REAL, which is 38 * defined as the real (non-complex) version of SCALAR. For instance, 39 * when SCALAR is complex double, REAL is double. Similarly, it is possible 40 * to call the real version of a function. For instance, 41 * permute_vecs_Sprimme permutes vectors with type SCALAR and 42 * permute_vecs_Rprimme permutes vectors with type REAL. 43 * 44 * When SCALAR is a GPU type, the pointers SCALAR* are supposed to point out 45 * memory allocated on GPUs, also called devices. For instance, 46 * Num_malloc_Sprimme allocates GPU memory when SCALAR is a GPU type. Use 47 * HSCALAR and HREAL as the non-GPU, also called host, versions of SCALAR and 48 * REAL. Also to use the non-GPU version use the suffices _SHprimme and 49 * _RHprimme. For instance, 50 * Num_malloc_SHprimme allocates memory on the host, and 51 * permute_vecs_RHprimme permute REAL vectors on the host. 52 * 53 ******************************************************************************/ 54 55 #ifndef TEMPLATE_H 56 #define TEMPLATE_H 57 58 #include "template_types.h" 59 60 /********************************************************************** 61 * Macros USE_FLOAT, USE_FLOATCOMPLEX, USE_DOUBLE and USE_DOUBLECOMPLEX - 62 * only one of them is defined at the same time, and identifies the 63 * type of SCALAR, one of float, complex float, double or complex double. 64 * 65 * Macro USE_COMPLEX - only defined when SCALAR is a complex type. 66 * 67 * Macro USE_HOST - CPU version 68 * 69 * Macro USE_MAGMA - MAGMA version 70 * 71 * Macro SUPPORTED_TYPE - defined if functions with the current type 72 * are going to be built. 73 * 74 * Macro SUPPORTED_HALF_TYPE - defined if functions with the current type 75 * has a version in half. 76 **********************************************************************/ 77 78 /* Helper macros and types used to define SCALAR and REAL and their variants */ 79 80 #define HOST_STEM 81 82 #if defined(USE_HALF) || defined(USE_HALFCOMPLEX) || defined(USE_FLOAT) || \ 83 defined(USE_FLOATCOMPLEX) || defined(USE_DOUBLE) || \ 84 defined(USE_DOUBLECOMPLEX) 85 # define USE_HOST 86 # define STEM 87 # define IMPL(BL, MA) BL 88 #elif defined(USE_FLOAT_MAGMA) || defined(USE_FLOATCOMPLEX_MAGMA) || \ 89 defined(USE_DOUBLE_MAGMA) || defined(USE_DOUBLECOMPLEX_MAGMA) || \ 90 defined(USE_HALF_MAGMA) || defined(USE_HALFCOMPLEX_MAGMA) 91 # define USE_MAGMA 92 # define STEM magma_ 93 # define IMPL(BL, MA) MA 94 #else 95 # error 96 #endif 97 98 #if !defined(CHECK_TEMPLATE) && !defined(STEM_C) 99 # define STEM_C STEM 100 #endif 101 102 #if defined(USE_HALF) || defined(USE_HALF_MAGMA) || defined(USE_FLOAT) || \ 103 defined(USE_FLOAT_MAGMA) || defined(USE_DOUBLE) || \ 104 defined(USE_DOUBLE_MAGMA) || defined(USE_QUAD) || \ 105 defined(USE_QUAD_MAGMA) 106 # define USE_REAL 107 #elif defined(USE_HALFCOMPLEX) || defined(USE_HALFCOMPLEX_MAGMA) || \ 108 defined(USE_FLOATCOMPLEX) || defined(USE_FLOATCOMPLEX_MAGMA) || \ 109 defined(USE_DOUBLECOMPLEX) || defined(USE_DOUBLECOMPLEX_MAGMA) || \ 110 defined(USE_QUADCOMPLEX) || defined(USE_QUADCOMPLEX_MAGMA) 111 # define USE_COMPLEX 112 #else 113 # error 114 #endif 115 116 #if defined(USE_DOUBLE) || defined(USE_DOUBLE_MAGMA) 117 # define ARITH(H,K,S,C,D,Z,Q,W) D 118 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) D 119 #elif defined(USE_DOUBLECOMPLEX) || defined(USE_DOUBLECOMPLEX_MAGMA) 120 # define ARITH(H,K,S,C,D,Z,Q,W) Z 121 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) D 122 #elif defined(USE_FLOAT) || defined(USE_FLOAT_MAGMA) 123 # define ARITH(H,K,S,C,D,Z,Q,W) S 124 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) S 125 #elif defined(USE_FLOATCOMPLEX) || defined(USE_FLOATCOMPLEX_MAGMA) 126 # define ARITH(H,K,S,C,D,Z,Q,W) C 127 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) S 128 #elif defined(USE_HALF) || defined(USE_HALF_MAGMA) 129 # define ARITH(H,K,S,C,D,Z,Q,W) H 130 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) H 131 #elif defined(USE_HALFCOMPLEX) || defined(USE_HALFCOMPLEX_MAGMA) 132 # define ARITH(H,K,S,C,D,Z,Q,W) K 133 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) H 134 #elif defined(USE_QUAD) || defined(USE_QUAD_MAGMA) 135 # define ARITH(H,K,S,C,D,Z,Q,W) Q 136 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) Q 137 #elif defined(USE_QUADCOMPLEX) || defined(USE_QUADCOMPLEX_MAGMA) 138 # define ARITH(H,K,S,C,D,Z,Q,W) W 139 # define REAL_ARITH(H,K,S,C,D,Z,Q,W) Q 140 #else 141 # error 142 #endif 143 144 /* For host types, define SUPPORTED_HALF_TYPE when the compiler supports half 145 * precision. For MAGMA, define the macro if MAGMA also supports half precision. 146 * 147 * Define SUPPORTED_TYPE when inspecting the signature functions to generate 148 * the signature for all possible functions. Also define the macro for any 149 * setting without half precision, and for half precision if the compiler 150 * supports half precision. 151 */ 152 153 #if defined(PRIMME_WITH_HALF) && defined(PRIMME_WITH_NATIVE_HALF) && \ 154 (defined(USE_HOST) || \ 155 (defined(PRIMME_WITH_MAGMA) && defined(USE_MAGMA) && \ 156 defined(MAGMA_WITH_HALF))) 157 # define SUPPORTED_HALF_TYPE 158 #endif 159 160 // Undefine SUPPORTED_TYPE when the current type is not supported. That is if 161 // one the next applies: 162 // - USE_HALF/COMPLEX/_MAGMA is defined but SUPPORTED_HALF_TYPE is not. 163 // - USE_FLOAT/COMPLEX/_MAGMA is defined but PRIMME_WITHOUT_FLOAT is defined. 164 // - USE_MAGMA is defined but PRIMME_WITH_MAGMA is not. 165 166 #define SUPPORTED_TYPE 167 #if !defined(CHECK_TEMPLATE) && \ 168 (((defined(USE_HALF) || defined(USE_HALFCOMPLEX) || \ 169 defined(USE_HALF_MAGMA) || defined(USE_HALFCOMPLEX_MAGMA)) && \ 170 !defined(SUPPORTED_HALF_TYPE)) || \ 171 ((defined(USE_HALF_MAGMA) || defined(USE_HALFCOMPLEX_MAGMA)) && \ 172 !defined(MAGMA_WITH_HALF)) || \ 173 (defined(USE_MAGMA) && !defined(PRIMME_WITH_MAGMA)) || \ 174 ((defined(USE_FLOAT) || defined(USE_FLOATCOMPLEX) || \ 175 defined(USE_FLOAT_MAGMA) || \ 176 defined(USE_FLOATCOMPLEX_MAGMA)) && \ 177 defined(PRIMME_WITHOUT_FLOAT))) 178 # undef SUPPORTED_TYPE 179 #endif 180 181 /* A C99 code with complex type is not a valid C++ code. However C++ */ 182 /* compilers usually can take it. Nevertheless in order to avoid the warnings */ 183 /* while compiling in pedantic mode, we use the proper complex type for C99 */ 184 /* (complex double and complex float) and C++ (std::complex<double> and */ 185 /* std::complex<float>). Of course both complex types are binary compatible. */ 186 187 #ifdef USE_COMPLEX 188 # ifndef __cplusplus 189 # define REAL_PART(x) (creal(x)) 190 # define IMAGINARY_PART(x) (cimag(x)) 191 # define ABS(x) (cabs(x)) 192 # define CONJ(x) (conj(x)) 193 # else 194 # define REAL_PART(x) (std::real(x)) 195 # define IMAGINARY_PART(x) (std::imag(x)) 196 # define ABS(x) (std::abs(x)) 197 # define CONJ(x) (std::conj(x)) 198 # endif 199 #else 200 # define REAL_PART(x) (x) 201 # define IMAGINARY_PART(x) 0 202 # define ABS(x) (fabs(x)) 203 # define CONJ(x) (x) 204 #endif 205 206 /* Helper macros to support complex arithmetic for types without complex */ 207 /* support in C99. For now, only half precision has this problem. The */ 208 /* approach is to cast the unsupported complex type to a supported type */ 209 /* with more precision. For instance, complex half precision is cast to */ 210 /* complex single precision. */ 211 /* */ 212 /* NOTE: 'A' is an unsupported complex type and 'B' is a supported type */ 213 /* SET_ZERO(A) : set A = 0 */ 214 /* SET_COMPLEX(A, B) : set A = B */ 215 /* TO_COMPLEX(A) : cast A to a supported complex type */ 216 /* PLUS_EQUAL(A, B) : set A += B */ 217 /* MULT_EQUAL(A, B) : set A *= B */ 218 219 #if (defined(USE_HALFCOMPLEX) || defined(USE_HALFCOMPLEX_MAGMA)) && !defined(PRIMME_WITH_NATIVE_COMPLEX_HALF) 220 # define SET_ZERO(A) {(A).r = 0; (A).i = 0;} 221 # define SET_COMPLEX(A,B) {(A).r = REAL_PART(B); (A).i = IMAGINARY_PART(B);} 222 # ifndef __cplusplus 223 # define TO_COMPLEX(A) ((A).r + (A).i * _Complex_I) 224 # else 225 # define TO_COMPLEX(A) (std::complex<HREAL>((HREAL)((A).r), (HREAL)((A).i))) 226 # endif 227 # define PLUS_EQUAL(A,B) {(A).r += REAL_PART(B); (A).i += IMAGINARY_PART(B);} 228 # define MULT_EQUAL(A, B) \ 229 { \ 230 HSCALAR C = TO_COMPLEX(A) * (B); \ 231 (A).r += REAL_PART(C); \ 232 (A).i += IMAGINARY_PART(C); \ 233 } 234 #else 235 # define SET_ZERO(A) {(A) = 0.0;} 236 # define SET_COMPLEX(A,B) (A) = (B) 237 # if defined(USE_HALFCOMPLEX) && defined(__cplusplus) 238 # define TO_COMPLEX(A) (HSCALAR(REAL_PART(A), IMAGINARY_PART(A))) 239 # define PLUS_EQUAL(A, B) (A) = TO_COMPLEX(A) + (B) 240 # define MULT_EQUAL(A, B) (A) = TO_COMPLEX(A) * (B) 241 # else 242 # define TO_COMPLEX(A) (A) 243 # define PLUS_EQUAL(A, B) (A) += (B) 244 # define MULT_EQUAL(A, B) (A) *= (B) 245 # endif 246 #endif 247 248 249 /* TEMPLATE_PLEASE tags the functions whose prototypes depends on macros and */ 250 /* are used in other files. The macro has value only when the tool ctemplate */ 251 /* is inspecting the source files, which is indicated by the macro */ 252 /* CHECK_TEMPLATE being defined. See Makefile and tools/ctemplate. */ 253 /* */ 254 /* When SCALAR is not a complex type (e.g., float and double) the function */ 255 /* will be referred as _Sprimme and _Rprimme. Otherwise it will be referred */ 256 /* only as _Sprimme. The term TEMPLATE_PLEASE should prefix every function */ 257 /* that will be instantiated with different values for SCALAR and REAL. */ 258 259 #ifndef TEMPLATE_H_PRIVATE 260 #define TEMPLATE_H_PRIVATE 261 262 # define USE_ARITH(Re,Co) ARITH(Re,Co,Re,Co,Re,Co,Re,Co) 263 264 # define USE_SR(Re,Co,T,XH,STEM,POST) \ 265 USE(CONCAT(CONCAT(CONCAT(S,XH),T),primme), STR0(CONCAT(CONCAT(CONCAT(STEM,USE_ARITH(Re,Co)),primme),POST))) \ 266 USE(CONCAT(CONCAT(CONCAT(R,XH),T),primme), STR0(CONCAT(CONCAT(CONCAT(STEM,Re),primme),POST))) 267 268 # define USE_TYPE(H,K,S,C,D,Z,Q,W,XH,STEM,POST) \ 269 USE_SR(H,K,h,XH,STEM,POST) \ 270 USE_SR(S,C,s,XH,STEM,POST) \ 271 USE_SR(D,Z,d,XH,STEM,POST) \ 272 USE_SR(Q,W,q,XH,STEM,POST) 273 274 #endif /* TEMPLATE_H_PRIVATE */ 275 276 #ifdef CHECK_TEMPLATE 277 # define TEMPLATE_PLEASE \ 278 APPEND_FUNC(Sprimme,SCALAR_SUF) USE(Sprimme,"SCALAR_SUF") \ 279 USE(Rprimme,"REAL_SUF") USE(SHprimme,"HOST_SCALAR_SUF") \ 280 USE(RHprimme,"HOST_REAL_SUF") USE(SXprimme,"XSCALAR_SUF") \ 281 USE(RXprimme,"XREAL_SUF") USE_TYPE(h,k,s,c,d,z,q,w, , STEM_C, ) \ 282 USE_TYPE(h,k,s,c,d,z,q,w, X, HOST_STEM, ) \ 283 USE_TYPE(s,c,s,c,d,z,q,w, H, HOST_STEM, ) 284 285 # define STATIC APPEND_FUNC(,SCALAR_SUF) USE(,"SCALAR_SUF") 286 287 #else 288 # define TEMPLATE_PLEASE 289 # define STATIC 290 #endif /* CHECK_TEMPLATE */ 291 292 /* Avoid to use the final type for integers and complex in generated */ 293 /* headers file. Instead use PRIMME_COMPLEX_FLOAT, _HALF and _DOUBLE. */ 294 295 #ifdef CHECK_TEMPLATE 296 # undef PRIMME_HALF 297 # undef PRIMME_COMPLEX_HALF 298 # undef PRIMME_COMPLEX_FLOAT 299 # undef PRIMME_COMPLEX_DOUBLE 300 # undef PRIMME_QUAD 301 # undef PRIMME_COMPLEX_QUAD 302 # undef PRIMME_INT 303 #endif /* CHECK_TEMPLATE */ 304 305 #endif /* TEMPLATE_H */ 306