1 /*******************************************************************************
2  * Copyright (c) 2018, College of William & Mary
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the College of William & Mary nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COLLEGE OF WILLIAM & MARY BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * PRIMME: https://github.com/primme/primme
28  * Contact: Andreas Stathopoulos, a n d r e a s _at_ c s . w m . e d u
29  *******************************************************************************
30  * File: template.h
31  *
32  * Purpose - Contains definitions of macros used along PRIMME.
33  *    In short source files, *.c, are compiled several times, every time for a
34  *    different type, referred as SCALAR. Examples of types are float, double,
35  *    complex float, complex double, and the corresponding GPU versions. All
36  *    types are described in section "Arithmetic". Other macros are defined to
37  *    refer derived types and functions. An example is REAL, which is
38  *    defined as the real (non-complex) version of SCALAR. For instance,
39  *    when SCALAR is complex double, REAL is double. Similarly, it is possible
40  *    to call the real version of a function. For instance,
41  *    permute_vecs_Sprimme permutes vectors with type SCALAR and
42  *    permute_vecs_Rprimme permutes vectors with type REAL.
43  *
44  *    When SCALAR is a GPU type, the pointers SCALAR* are supposed to point out
45  *    memory allocated on GPUs, also called devices. For instance,
46  *    Num_malloc_Sprimme allocates GPU memory when SCALAR is a GPU type. Use
47  *    HSCALAR and HREAL as the non-GPU, also called host, versions of SCALAR and
48  *    REAL. Also to use the non-GPU version use the suffices _SHprimme and
49  *    _RHprimme. For instance,
50  *    Num_malloc_SHprimme allocates memory on the host, and
51  *    permute_vecs_RHprimme permute REAL vectors on the host.
52  *
53  ******************************************************************************/
54 
55 #ifndef TEMPLATE_H
56 #define TEMPLATE_H
57 
58 #include "template_types.h"
59 
60 /**********************************************************************
61  * Macros USE_FLOAT, USE_FLOATCOMPLEX, USE_DOUBLE and USE_DOUBLECOMPLEX -
62  *    only one of them is defined at the same time, and identifies the
63  *    type of SCALAR, one of float, complex float, double or complex double.
64  *
65  * Macro USE_COMPLEX - only defined when SCALAR is a complex type.
66  *
67  * Macro USE_HOST - CPU version
68  *
69  * Macro USE_MAGMA - MAGMA version
70  *
71  * Macro SUPPORTED_TYPE - defined if functions with the current type
72  *    are going to be built.
73  *
74  * Macro SUPPORTED_HALF_TYPE - defined if functions with the current type
75  *    has a version in half.
76  **********************************************************************/
77 
78 /* Helper macros and types used to define SCALAR and REAL and their variants */
79 
80 #define HOST_STEM
81 
82 #if defined(USE_HALF) || defined(USE_HALFCOMPLEX) || defined(USE_FLOAT) ||     \
83       defined(USE_FLOATCOMPLEX) || defined(USE_DOUBLE) ||                      \
84       defined(USE_DOUBLECOMPLEX)
85 #  define USE_HOST
86 #  define STEM
87 #  define IMPL(BL, MA) BL
88 #elif defined(USE_FLOAT_MAGMA) || defined(USE_FLOATCOMPLEX_MAGMA) ||           \
89       defined(USE_DOUBLE_MAGMA) || defined(USE_DOUBLECOMPLEX_MAGMA) ||         \
90       defined(USE_HALF_MAGMA) || defined(USE_HALFCOMPLEX_MAGMA)
91 #  define USE_MAGMA
92 #  define STEM magma_
93 #  define IMPL(BL, MA) MA
94 #else
95 #  error
96 #endif
97 
98 #if !defined(CHECK_TEMPLATE) && !defined(STEM_C)
99 #   define STEM_C STEM
100 #endif
101 
102 #if defined(USE_HALF) || defined(USE_HALF_MAGMA) || defined(USE_FLOAT) ||      \
103       defined(USE_FLOAT_MAGMA) || defined(USE_DOUBLE) ||                       \
104       defined(USE_DOUBLE_MAGMA) || defined(USE_QUAD) ||                        \
105       defined(USE_QUAD_MAGMA)
106 #  define USE_REAL
107 #elif defined(USE_HALFCOMPLEX) || defined(USE_HALFCOMPLEX_MAGMA) ||            \
108       defined(USE_FLOATCOMPLEX) || defined(USE_FLOATCOMPLEX_MAGMA) ||          \
109       defined(USE_DOUBLECOMPLEX) || defined(USE_DOUBLECOMPLEX_MAGMA) ||        \
110       defined(USE_QUADCOMPLEX) || defined(USE_QUADCOMPLEX_MAGMA)
111 #  define USE_COMPLEX
112 #else
113 #  error
114 #endif
115 
116 #if   defined(USE_DOUBLE)        || defined(USE_DOUBLE_MAGMA)
117 #  define      ARITH(H,K,S,C,D,Z,Q,W) D
118 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) D
119 #elif defined(USE_DOUBLECOMPLEX) || defined(USE_DOUBLECOMPLEX_MAGMA)
120 #  define      ARITH(H,K,S,C,D,Z,Q,W) Z
121 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) D
122 #elif defined(USE_FLOAT)         || defined(USE_FLOAT_MAGMA)
123 #  define      ARITH(H,K,S,C,D,Z,Q,W) S
124 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) S
125 #elif defined(USE_FLOATCOMPLEX)  || defined(USE_FLOATCOMPLEX_MAGMA)
126 #  define      ARITH(H,K,S,C,D,Z,Q,W) C
127 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) S
128 #elif defined(USE_HALF)          || defined(USE_HALF_MAGMA)
129 #  define      ARITH(H,K,S,C,D,Z,Q,W) H
130 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) H
131 #elif defined(USE_HALFCOMPLEX)   || defined(USE_HALFCOMPLEX_MAGMA)
132 #  define      ARITH(H,K,S,C,D,Z,Q,W) K
133 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) H
134 #elif defined(USE_QUAD)          || defined(USE_QUAD_MAGMA)
135 #  define      ARITH(H,K,S,C,D,Z,Q,W) Q
136 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) Q
137 #elif defined(USE_QUADCOMPLEX)   || defined(USE_QUADCOMPLEX_MAGMA)
138 #  define      ARITH(H,K,S,C,D,Z,Q,W) W
139 #  define REAL_ARITH(H,K,S,C,D,Z,Q,W) Q
140 #else
141 #  error
142 #endif
143 
144 /* For host types, define SUPPORTED_HALF_TYPE when the compiler supports half
145  * precision. For MAGMA, define the macro if MAGMA also supports half precision.
146  *
147  * Define SUPPORTED_TYPE when inspecting the signature functions to generate
148  * the signature for all possible functions. Also define the macro for any
149  * setting without half precision, and for half precision if the compiler
150  * supports half precision.
151  */
152 
153 #if defined(PRIMME_WITH_HALF) && defined(PRIMME_WITH_NATIVE_HALF) &&           \
154       (defined(USE_HOST) ||                                                    \
155             (defined(PRIMME_WITH_MAGMA) && defined(USE_MAGMA) &&               \
156                   defined(MAGMA_WITH_HALF)))
157 #  define SUPPORTED_HALF_TYPE
158 #endif
159 
160 // Undefine SUPPORTED_TYPE when the current type is not supported. That is if
161 // one the next applies:
162 // - USE_HALF/COMPLEX/_MAGMA is defined but SUPPORTED_HALF_TYPE is not.
163 // - USE_FLOAT/COMPLEX/_MAGMA is defined but PRIMME_WITHOUT_FLOAT is defined.
164 // - USE_MAGMA is defined but PRIMME_WITH_MAGMA is not.
165 
166 #define SUPPORTED_TYPE
167 #if !defined(CHECK_TEMPLATE) &&                                                \
168       (((defined(USE_HALF) || defined(USE_HALFCOMPLEX) ||                      \
169               defined(USE_HALF_MAGMA) || defined(USE_HALFCOMPLEX_MAGMA)) &&    \
170              !defined(SUPPORTED_HALF_TYPE)) ||                                 \
171             ((defined(USE_HALF_MAGMA) || defined(USE_HALFCOMPLEX_MAGMA)) &&    \
172                   !defined(MAGMA_WITH_HALF)) ||                                \
173             (defined(USE_MAGMA) && !defined(PRIMME_WITH_MAGMA)) ||             \
174             ((defined(USE_FLOAT) || defined(USE_FLOATCOMPLEX) ||               \
175                    defined(USE_FLOAT_MAGMA) ||                                 \
176                    defined(USE_FLOATCOMPLEX_MAGMA)) &&                         \
177                   defined(PRIMME_WITHOUT_FLOAT)))
178 #  undef SUPPORTED_TYPE
179 #endif
180 
181 /* A C99 code with complex type is not a valid C++ code. However C++          */
182 /* compilers usually can take it. Nevertheless in order to avoid the warnings */
183 /* while compiling in pedantic mode, we use the proper complex type for C99   */
184 /* (complex double and complex float) and C++ (std::complex<double> and       */
185 /* std::complex<float>). Of course both complex types are binary compatible.  */
186 
187 #ifdef USE_COMPLEX
188 #  ifndef __cplusplus
189 #     define REAL_PART(x) (creal(x))
190 #     define IMAGINARY_PART(x) (cimag(x))
191 #     define ABS(x) (cabs(x))
192 #     define CONJ(x) (conj(x))
193 #  else
194 #     define REAL_PART(x) (std::real(x))
195 #     define IMAGINARY_PART(x) (std::imag(x))
196 #     define ABS(x) (std::abs(x))
197 #     define CONJ(x) (std::conj(x))
198 #  endif
199 #else
200 #  define REAL_PART(x) (x)
201 #  define IMAGINARY_PART(x) 0
202 #  define ABS(x) (fabs(x))
203 #  define CONJ(x) (x)
204 #endif
205 
206 /* Helper macros to support complex arithmetic for types without complex   */
207 /* support in C99. For now, only half precision has this problem. The      */
208 /* approach is to cast the unsupported complex type to a supported type    */
209 /* with more precision. For instance, complex half precision is cast to    */
210 /* complex single precision.                                               */
211 /*                                                                         */
212 /* NOTE: 'A' is an unsupported complex type and 'B' is a supported type    */
213 /* SET_ZERO(A)       : set A = 0                                           */
214 /* SET_COMPLEX(A, B) : set A = B                                           */
215 /* TO_COMPLEX(A)     : cast A to a supported complex type                  */
216 /* PLUS_EQUAL(A, B)  : set A += B                                          */
217 /* MULT_EQUAL(A, B)  : set A *= B                                          */
218 
219 #if (defined(USE_HALFCOMPLEX) || defined(USE_HALFCOMPLEX_MAGMA)) && !defined(PRIMME_WITH_NATIVE_COMPLEX_HALF)
220 #  define SET_ZERO(A) {(A).r = 0; (A).i = 0;}
221 #  define SET_COMPLEX(A,B) {(A).r = REAL_PART(B); (A).i = IMAGINARY_PART(B);}
222 #  ifndef __cplusplus
223 #     define TO_COMPLEX(A) ((A).r + (A).i * _Complex_I)
224 #  else
225 #     define TO_COMPLEX(A) (std::complex<HREAL>((HREAL)((A).r), (HREAL)((A).i)))
226 #  endif
227 #  define PLUS_EQUAL(A,B) {(A).r += REAL_PART(B); (A).i += IMAGINARY_PART(B);}
228 #  define MULT_EQUAL(A, B)                                                     \
229    {                                                                           \
230       HSCALAR C = TO_COMPLEX(A) * (B);                                         \
231       (A).r += REAL_PART(C);                                                   \
232       (A).i += IMAGINARY_PART(C);                                              \
233    }
234 #else
235 #  define SET_ZERO(A) {(A) = 0.0;}
236 #  define SET_COMPLEX(A,B) (A) = (B)
237 #  if defined(USE_HALFCOMPLEX) && defined(__cplusplus)
238 #     define TO_COMPLEX(A) (HSCALAR(REAL_PART(A), IMAGINARY_PART(A)))
239 #     define PLUS_EQUAL(A, B) (A) = TO_COMPLEX(A) + (B)
240 #     define MULT_EQUAL(A, B) (A) = TO_COMPLEX(A) * (B)
241 #  else
242 #     define TO_COMPLEX(A) (A)
243 #     define PLUS_EQUAL(A, B) (A) += (B)
244 #     define MULT_EQUAL(A, B) (A) *= (B)
245 #  endif
246 #endif
247 
248 
249 /* TEMPLATE_PLEASE tags the functions whose prototypes depends on macros and  */
250 /* are used in other files. The macro has value only when the tool ctemplate  */
251 /* is inspecting the source files, which is indicated by the macro            */
252 /* CHECK_TEMPLATE being defined. See Makefile and tools/ctemplate.            */
253 /*                                                                            */
254 /* When SCALAR is not a complex type (e.g., float and double) the function    */
255 /* will be referred as _Sprimme and _Rprimme. Otherwise it will be referred   */
256 /* only as _Sprimme. The term TEMPLATE_PLEASE should prefix every function    */
257 /* that will be instantiated with different values for SCALAR and REAL.       */
258 
259 #ifndef TEMPLATE_H_PRIVATE
260 #define TEMPLATE_H_PRIVATE
261 
262 #  define USE_ARITH(Re,Co) ARITH(Re,Co,Re,Co,Re,Co,Re,Co)
263 
264 #  define USE_SR(Re,Co,T,XH,STEM,POST) \
265       USE(CONCAT(CONCAT(CONCAT(S,XH),T),primme), STR0(CONCAT(CONCAT(CONCAT(STEM,USE_ARITH(Re,Co)),primme),POST))) \
266       USE(CONCAT(CONCAT(CONCAT(R,XH),T),primme), STR0(CONCAT(CONCAT(CONCAT(STEM,Re),primme),POST)))
267 
268 #  define USE_TYPE(H,K,S,C,D,Z,Q,W,XH,STEM,POST)  \
269       USE_SR(H,K,h,XH,STEM,POST) \
270       USE_SR(S,C,s,XH,STEM,POST) \
271       USE_SR(D,Z,d,XH,STEM,POST) \
272       USE_SR(Q,W,q,XH,STEM,POST)
273 
274 #endif /* TEMPLATE_H_PRIVATE */
275 
276 #ifdef CHECK_TEMPLATE
277 #  define TEMPLATE_PLEASE \
278       APPEND_FUNC(Sprimme,SCALAR_SUF) USE(Sprimme,"SCALAR_SUF") \
279       USE(Rprimme,"REAL_SUF") USE(SHprimme,"HOST_SCALAR_SUF") \
280       USE(RHprimme,"HOST_REAL_SUF") USE(SXprimme,"XSCALAR_SUF") \
281       USE(RXprimme,"XREAL_SUF") USE_TYPE(h,k,s,c,d,z,q,w, , STEM_C, ) \
282       USE_TYPE(h,k,s,c,d,z,q,w, X, HOST_STEM, ) \
283       USE_TYPE(s,c,s,c,d,z,q,w, H, HOST_STEM, )
284 
285 #  define STATIC APPEND_FUNC(,SCALAR_SUF) USE(,"SCALAR_SUF")
286 
287 #else
288 #  define TEMPLATE_PLEASE
289 #  define STATIC
290 #endif /* CHECK_TEMPLATE */
291 
292 /* Avoid to use the final type for integers and complex in generated       */
293 /* headers file. Instead use PRIMME_COMPLEX_FLOAT, _HALF and _DOUBLE.      */
294 
295 #ifdef CHECK_TEMPLATE
296 #  undef PRIMME_HALF
297 #  undef PRIMME_COMPLEX_HALF
298 #  undef PRIMME_COMPLEX_FLOAT
299 #  undef PRIMME_COMPLEX_DOUBLE
300 #  undef PRIMME_QUAD
301 #  undef PRIMME_COMPLEX_QUAD
302 #  undef PRIMME_INT
303 #endif /* CHECK_TEMPLATE */
304 
305 #endif /* TEMPLATE_H */
306