1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8 Copyright (C) 2016, Hewlett Packard Enterprise Development LP
9 Copyright (C) 2020, Advanced Micro Devices, Inc.
10
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are
13 met:
14 - Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19 - Neither the name(s) of the copyright holder(s) nor the names of its
20 contributors may be used to endorse or promote products derived
21 from this software without specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35 */
36
37 #ifndef BLIS_TYPE_DEFS_H
38 #define BLIS_TYPE_DEFS_H
39
40
41 //
42 // -- BLIS basic types ---------------------------------------------------------
43 //
44
45 #ifdef __cplusplus
46 // For C++, include stdint.h.
47 #include <stdint.h>
48 #elif __STDC_VERSION__ >= 199901L
49 // For C99 (or later), include stdint.h.
50 #include <stdint.h>
51 #include <stdbool.h>
52 #else
53 // When stdint.h is not available, manually typedef the types we will use.
54 #ifdef _WIN32
55 typedef __int32 int32_t;
56 typedef unsigned __int32 uint32_t;
57 typedef __int64 int64_t;
58 typedef unsigned __int64 uint64_t;
59 #else
60 #error "Attempting to compile on pre-C99 system without stdint.h."
61 #endif
62 #endif
63
64 // -- General-purpose integers --
65
66 // If BLAS integers are 64 bits, mandate that BLIS integers also be 64 bits.
67 // NOTE: This cpp guard will only meaningfully change BLIS's behavior on
68 // systems where the BLIS integer size would have been automatically selected
69 // to be 32 bits, since explicit selection of 32 bits is prohibited at
70 // configure-time (and explicit or automatic selection of 64 bits is fine
71 // and would have had the same result).
72 #if BLIS_BLAS_INT_SIZE == 64
73 #undef BLIS_INT_TYPE_SIZE
74 #define BLIS_INT_TYPE_SIZE 64
75 #endif
76
77 // Define integer types depending on what size integer was requested.
78 #if BLIS_INT_TYPE_SIZE == 32
79 typedef int32_t gint_t;
80 typedef uint32_t guint_t;
81 #elif BLIS_INT_TYPE_SIZE == 64
82 typedef int64_t gint_t;
83 typedef uint64_t guint_t;
84 #else
85 typedef signed long int gint_t;
86 typedef unsigned long int guint_t;
87 #endif
88
89 // -- Boolean type --
90
91 // NOTE: bool_t is no longer used and has been replaced with C99's bool type.
92 //typedef bool bool_t;
93
94 // BLIS uses TRUE and FALSE macro constants as possible boolean values, but we
95 // define these macros in terms of true and false, respectively, which are
96 // defined by C99 in stdbool.h.
97 #ifndef TRUE
98 #define TRUE true
99 #endif
100
101 #ifndef FALSE
102 #define FALSE false
103 #endif
104
105 // -- Special-purpose integers --
106
107 // This cpp guard provides a temporary hack to allow libflame
108 // interoperability with BLIS.
109 #ifndef _DEFINED_DIM_T
110 #define _DEFINED_DIM_T
111 typedef gint_t dim_t; // dimension type
112 #endif
113 typedef gint_t inc_t; // increment/stride type
114 typedef gint_t doff_t; // diagonal offset type
115 typedef guint_t siz_t; // byte size type
116 typedef uint32_t objbits_t; // object information bit field
117
118 // -- Real types --
119
120 // Define the number of floating-point types supported, and the size of the
121 // largest type.
122 #define BLIS_NUM_FP_TYPES 4
123 #define BLIS_MAX_TYPE_SIZE sizeof(dcomplex)
124
125 // There are some places where we need to use sizeof() inside of a C
126 // preprocessor #if conditional, and so here we define the various sizes
127 // for those purposes.
128 #define BLIS_SIZEOF_S 4 // sizeof(float)
129 #define BLIS_SIZEOF_D 8 // sizeof(double)
130 #define BLIS_SIZEOF_C 8 // sizeof(scomplex)
131 #define BLIS_SIZEOF_Z 16 // sizeof(dcomplex)
132
133 // -- Complex types --
134
135 #ifdef BLIS_ENABLE_C99_COMPLEX
136
137 #if __STDC_VERSION__ >= 199901L
138 #include <complex.h>
139
140 // Typedef official complex types to BLIS complex type names.
141 typedef float complex scomplex;
142 typedef double complex dcomplex;
143 #else
144 #error "Configuration requested C99 complex types, but C99 does not appear to be supported."
145 #endif
146
147 #else // ifndef BLIS_ENABLE_C99_COMPLEX
148
149 // This cpp guard provides a temporary hack to allow libflame
150 // interoperability with BLIS.
151 #ifndef _DEFINED_SCOMPLEX
152 #define _DEFINED_SCOMPLEX
153 typedef struct
154 {
155 float real;
156 float imag;
157 } scomplex;
158 #endif
159
160 // This cpp guard provides a temporary hack to allow libflame
161 // interoperability with BLIS.
162 #ifndef _DEFINED_DCOMPLEX
163 #define _DEFINED_DCOMPLEX
164 typedef struct
165 {
166 double real;
167 double imag;
168 } dcomplex;
169 #endif
170
171 #endif // BLIS_ENABLE_C99_COMPLEX
172
173 // -- Atom type --
174
175 // Note: atom types are used to hold "bufferless" scalar object values. Note
176 // that it needs to be as large as the largest possible scalar value we might
177 // want to hold. Thus, for now, it is a dcomplex.
178 typedef dcomplex atom_t;
179
180 // -- Fortran-77 types --
181
182 // Note: These types are typically only used by BLAS compatibility layer, but
183 // we must define them even when the compatibility layer isn't being built
184 // because they also occur in bli_slamch() and bli_dlamch().
185
186 // Define f77_int depending on what size of integer was requested.
187 #if BLIS_BLAS_INT_TYPE_SIZE == 32
188 typedef int32_t f77_int;
189 #elif BLIS_BLAS_INT_TYPE_SIZE == 64
190 typedef int64_t f77_int;
191 #else
192 typedef long int f77_int;
193 #endif
194
195 typedef char f77_char;
196 typedef float f77_float;
197 typedef double f77_double;
198 typedef scomplex f77_scomplex;
199 typedef dcomplex f77_dcomplex;
200
201 // -- Void function pointer types --
202
203 // Note: This type should be used in any situation where the address of a
204 // *function* will be conveyed or stored prior to it being typecast back
205 // to the correct function type. It does not need to be used when conveying
206 // or storing the address of *data* (such as an array of float or double).
207
208 //typedef void (*void_fp)( void );
209 typedef void* void_fp;
210
211
212 //
213 // -- BLIS info bit field offsets ----------------------------------------------
214 //
215
216 /*
217 info field description
218
219 bit(s) purpose
220 ------- -------
221 2 ~ 0 Stored numerical datatype
222 - 0: domain (0 == real, 1 == complex)
223 - 1: precision (0 == single, 1 == double)
224 - 2: special (100 = int; 101 = const)
225 3 Transposition required [during pack]?
226 4 Conjugation required [during pack]?
227 7 ~ 5 Part of matrix stored:
228 - 5: strictly upper triangular
229 - 6: diagonal
230 - 7: strictly lower triangular
231 8 Implicit unit diagonal?
232 9 Invert diagonal required [during pack]?
233 12 ~ 10 Target numerical datatype
234 - 10: domain (0 == real, 1 == complex)
235 - 11: precision (0 == single, 1 == double)
236 - 12: used to encode integer, constant types
237 15 ~ 13 Execution numerical datatype
238 - 13: domain (0 == real, 1 == complex)
239 - 14: precision (0 == single, 1 == double)
240 - 15: used to encode integer, constant types
241 22 ~ 16 Packed type/status
242 - 0 0000 00: not packed
243 - 1 0000 00: packed (unspecified; by rows, columns, or vector)
244 - 1 0000 00: packed by rows
245 - 1 0000 01: packed by columns
246 - 1 0000 10: packed by row panels
247 - 1 0000 11: packed by column panels
248 - 1 0001 10: packed by 4m interleaved row panels
249 - 1 0001 11: packed by 4m interleaved column panels
250 - 1 0010 10: packed by 3m interleaved row panels
251 - 1 0010 11: packed by 3m interleaved column panels
252 - 1 0011 10: packed by 4m separated row panels (not used)
253 - 1 0011 11: packed by 4m separated column panels (not used)
254 - 1 0100 10: packed by 3m separated row panels
255 - 1 0100 11: packed by 3m separated column panels
256 - 1 0101 10: packed real-only row panels
257 - 1 0101 11: packed real-only column panels
258 - 1 0110 10: packed imag-only row panels
259 - 1 0110 11: packed imag-only column panels
260 - 1 0111 10: packed real+imag row panels
261 - 1 0111 11: packed real+imag column panels
262 - 1 1000 10: packed by 1m expanded row panels
263 - 1 1000 11: packed by 1m expanded column panels
264 - 1 1001 10: packed by 1m reordered row panels
265 - 1 1001 11: packed by 1m reordered column panels
266 23 Packed panel order if upper-stored
267 - 0 == forward order if upper
268 - 1 == reverse order if upper
269 24 Packed panel order if lower-stored
270 - 0 == forward order if lower
271 - 1 == reverse order if lower
272 26 ~ 25 Packed buffer type
273 - 0 == block of A
274 - 1 == panel of B
275 - 2 == panel of C
276 - 3 == general use
277 28 ~ 27 Structure type
278 - 0 == general
279 - 1 == Hermitian
280 - 2 == symmetric
281 - 3 == triangular
282 31 ~ 29 Computation numerical datatype
283 - 29: domain (0 == real, 1 == complex)
284 - 30: precision (0 == single, 1 == double)
285 - 31: used to encode integer, constant types
286
287 info2 field description
288
289 bit(s) purpose
290 ------- -------
291 2 ~ 0 Scalar storage numerical datatype
292 - 0: domain (0 == real, 1 == complex)
293 - 1: precision (0 == single, 1 == double)
294 - 2: used to encode integer, constant types
295 */
296
297 // info
298 #define BLIS_DATATYPE_SHIFT 0
299 #define BLIS_DOMAIN_SHIFT 0
300 #define BLIS_PRECISION_SHIFT 1
301 #define BLIS_CONJTRANS_SHIFT 3
302 #define BLIS_TRANS_SHIFT 3
303 #define BLIS_CONJ_SHIFT 4
304 #define BLIS_UPLO_SHIFT 5
305 #define BLIS_UPPER_SHIFT 5
306 #define BLIS_DIAG_SHIFT 6
307 #define BLIS_LOWER_SHIFT 7
308 #define BLIS_UNIT_DIAG_SHIFT 8
309 #define BLIS_INVERT_DIAG_SHIFT 9
310 #define BLIS_TARGET_DT_SHIFT 10
311 #define BLIS_TARGET_DOMAIN_SHIFT 10
312 #define BLIS_TARGET_PREC_SHIFT 11
313 #define BLIS_EXEC_DT_SHIFT 13
314 #define BLIS_EXEC_DOMAIN_SHIFT 13
315 #define BLIS_EXEC_PREC_SHIFT 14
316 #define BLIS_PACK_SCHEMA_SHIFT 16
317 #define BLIS_PACK_RC_SHIFT 16
318 #define BLIS_PACK_PANEL_SHIFT 17
319 #define BLIS_PACK_FORMAT_SHIFT 18
320 #define BLIS_PACK_SHIFT 22
321 #define BLIS_PACK_REV_IF_UPPER_SHIFT 23
322 #define BLIS_PACK_REV_IF_LOWER_SHIFT 24
323 #define BLIS_PACK_BUFFER_SHIFT 25
324 #define BLIS_STRUC_SHIFT 27
325 #define BLIS_COMP_DT_SHIFT 29
326 #define BLIS_COMP_DOMAIN_SHIFT 29
327 #define BLIS_COMP_PREC_SHIFT 30
328
329 // info2
330 #define BLIS_SCALAR_DT_SHIFT 0
331 #define BLIS_SCALAR_DOMAIN_SHIFT 0
332 #define BLIS_SCALAR_PREC_SHIFT 1
333
334 //
335 // -- BLIS info bit field masks ------------------------------------------------
336 //
337
338 // info
339 #define BLIS_DATATYPE_BITS ( 0x7 << BLIS_DATATYPE_SHIFT )
340 #define BLIS_DOMAIN_BIT ( 0x1 << BLIS_DOMAIN_SHIFT )
341 #define BLIS_PRECISION_BIT ( 0x1 << BLIS_PRECISION_SHIFT )
342 #define BLIS_CONJTRANS_BITS ( 0x3 << BLIS_CONJTRANS_SHIFT )
343 #define BLIS_TRANS_BIT ( 0x1 << BLIS_TRANS_SHIFT )
344 #define BLIS_CONJ_BIT ( 0x1 << BLIS_CONJ_SHIFT )
345 #define BLIS_UPLO_BITS ( 0x7 << BLIS_UPLO_SHIFT )
346 #define BLIS_UPPER_BIT ( 0x1 << BLIS_UPPER_SHIFT )
347 #define BLIS_DIAG_BIT ( 0x1 << BLIS_DIAG_SHIFT )
348 #define BLIS_LOWER_BIT ( 0x1 << BLIS_LOWER_SHIFT )
349 #define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT )
350 #define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
351 #define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
352 #define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT )
353 #define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT )
354 #define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT )
355 #define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT )
356 #define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT )
357 #define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
358 #define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
359 #define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
360 #define BLIS_PACK_FORMAT_BITS ( 0xF << BLIS_PACK_FORMAT_SHIFT )
361 #define BLIS_PACK_BIT ( 0x1 << BLIS_PACK_SHIFT )
362 #define BLIS_PACK_REV_IF_UPPER_BIT ( 0x1 << BLIS_PACK_REV_IF_UPPER_SHIFT )
363 #define BLIS_PACK_REV_IF_LOWER_BIT ( 0x1 << BLIS_PACK_REV_IF_LOWER_SHIFT )
364 #define BLIS_PACK_BUFFER_BITS ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
365 #define BLIS_STRUC_BITS ( 0x3 << BLIS_STRUC_SHIFT )
366 #define BLIS_COMP_DT_BITS ( 0x7 << BLIS_COMP_DT_SHIFT )
367 #define BLIS_COMP_DOMAIN_BIT ( 0x1 << BLIS_COMP_DOMAIN_SHIFT )
368 #define BLIS_COMP_PREC_BIT ( 0x1 << BLIS_COMP_PREC_SHIFT )
369
370 // info2
371 #define BLIS_SCALAR_DT_BITS ( 0x7 << BLIS_SCALAR_DT_SHIFT )
372 #define BLIS_SCALAR_DOMAIN_BIT ( 0x1 << BLIS_SCALAR_DOMAIN_SHIFT )
373 #define BLIS_SCALAR_PREC_BIT ( 0x1 << BLIS_SCALAR_PREC_SHIFT )
374
375
376 //
377 // -- BLIS enumerated type value definitions -----------------------------------
378 //
379
380 #define BLIS_BITVAL_REAL 0x0
381 #define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT
382 #define BLIS_BITVAL_SINGLE_PREC 0x0
383 #define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT
384 #define BLIS_BITVAL_FLOAT_TYPE 0x0
385 #define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
386 #define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT
387 #define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
388 #define BLIS_BITVAL_INT_TYPE 0x04
389 #define BLIS_BITVAL_CONST_TYPE 0x05
390 #define BLIS_BITVAL_NO_TRANS 0x0
391 #define BLIS_BITVAL_TRANS BLIS_TRANS_BIT
392 #define BLIS_BITVAL_NO_CONJ 0x0
393 #define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
394 #define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
395 #define BLIS_BITVAL_ZEROS 0x0
396 #define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
397 #define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
398 #define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
399 #define BLIS_BITVAL_NONUNIT_DIAG 0x0
400 #define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
401 #define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
402 #define BLIS_BITVAL_NOT_PACKED 0x0
403 #define BLIS_BITVAL_4MI ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
404 #define BLIS_BITVAL_3MI ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
405 #define BLIS_BITVAL_4MS ( 0x3 << BLIS_PACK_FORMAT_SHIFT )
406 #define BLIS_BITVAL_3MS ( 0x4 << BLIS_PACK_FORMAT_SHIFT )
407 #define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT )
408 #define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT )
409 #define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
410 #define BLIS_BITVAL_1E ( 0x8 << BLIS_PACK_FORMAT_SHIFT )
411 #define BLIS_BITVAL_1R ( 0x9 << BLIS_PACK_FORMAT_SHIFT )
412 #define BLIS_BITVAL_PACKED_UNSPEC ( BLIS_PACK_BIT )
413 #define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
414 #define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
415 #define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
416 #define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
417 #define BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT )
418 #define BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
419 #define BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT )
420 #define BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
421 #define BLIS_BITVAL_PACKED_ROW_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT )
422 #define BLIS_BITVAL_PACKED_COL_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
423 #define BLIS_BITVAL_PACKED_ROW_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT )
424 #define BLIS_BITVAL_PACKED_COL_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
425 #define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
426 #define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
427 #define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT )
428 #define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
429 #define BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT )
430 #define BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
431 #define BLIS_BITVAL_PACKED_ROW_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT )
432 #define BLIS_BITVAL_PACKED_COL_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
433 #define BLIS_BITVAL_PACKED_ROW_PANELS_1R ( BLIS_PACK_BIT | BLIS_BITVAL_1R | BLIS_PACK_PANEL_BIT )
434 #define BLIS_BITVAL_PACKED_COL_PANELS_1R ( BLIS_PACK_BIT | BLIS_BITVAL_1R | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
435 #define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
436 #define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
437 #define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
438 #define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT
439 #define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0
440 #define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
441 #define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
442 #define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
443 #define BLIS_BITVAL_GENERAL 0x0
444 #define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT )
445 #define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT )
446 #define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT )
447
448
449 //
450 // -- BLIS enumerated type definitions -----------------------------------------
451 //
452
453 // -- Operational parameter types --
454
455 typedef enum
456 {
457 BLIS_NO_TRANSPOSE = 0x0,
458 BLIS_TRANSPOSE = BLIS_BITVAL_TRANS,
459 BLIS_CONJ_NO_TRANSPOSE = BLIS_BITVAL_CONJ,
460 BLIS_CONJ_TRANSPOSE = BLIS_BITVAL_CONJ_TRANS
461 } trans_t;
462
463 typedef enum
464 {
465 BLIS_NO_CONJUGATE = 0x0,
466 BLIS_CONJUGATE = BLIS_BITVAL_CONJ
467 } conj_t;
468
469 typedef enum
470 {
471 BLIS_ZEROS = BLIS_BITVAL_ZEROS,
472 BLIS_LOWER = BLIS_BITVAL_LOWER,
473 BLIS_UPPER = BLIS_BITVAL_UPPER,
474 BLIS_DENSE = BLIS_BITVAL_DENSE
475 } uplo_t;
476
477 typedef enum
478 {
479 BLIS_LEFT = 0x0,
480 BLIS_RIGHT
481 } side_t;
482
483 typedef enum
484 {
485 BLIS_NONUNIT_DIAG = 0x0,
486 BLIS_UNIT_DIAG = BLIS_BITVAL_UNIT_DIAG
487 } diag_t;
488
489 typedef enum
490 {
491 BLIS_NO_INVERT_DIAG = 0x0,
492 BLIS_INVERT_DIAG = BLIS_BITVAL_INVERT_DIAG
493 } invdiag_t;
494
495 typedef enum
496 {
497 BLIS_GENERAL = BLIS_BITVAL_GENERAL,
498 BLIS_HERMITIAN = BLIS_BITVAL_HERMITIAN,
499 BLIS_SYMMETRIC = BLIS_BITVAL_SYMMETRIC,
500 BLIS_TRIANGULAR = BLIS_BITVAL_TRIANGULAR
501 } struc_t;
502
503
504 // -- Data type --
505
506 typedef enum
507 {
508 BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE,
509 BLIS_DOUBLE = BLIS_BITVAL_DOUBLE_TYPE,
510 BLIS_SCOMPLEX = BLIS_BITVAL_SCOMPLEX_TYPE,
511 BLIS_DCOMPLEX = BLIS_BITVAL_DCOMPLEX_TYPE,
512 BLIS_INT = BLIS_BITVAL_INT_TYPE,
513 BLIS_CONSTANT = BLIS_BITVAL_CONST_TYPE,
514 BLIS_DT_LO = BLIS_FLOAT,
515 BLIS_DT_HI = BLIS_DCOMPLEX
516 } num_t;
517
518 typedef enum
519 {
520 BLIS_REAL = BLIS_BITVAL_REAL,
521 BLIS_COMPLEX = BLIS_BITVAL_COMPLEX
522 } dom_t;
523
524 typedef enum
525 {
526 BLIS_SINGLE_PREC = BLIS_BITVAL_SINGLE_PREC,
527 BLIS_DOUBLE_PREC = BLIS_BITVAL_DOUBLE_PREC
528 } prec_t;
529
530
531 // -- Pack schema type --
532
533 typedef enum
534 {
535 BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
536 BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
537 BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
538 BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
539 BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
540 BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
541 BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
542 BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
543 BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
544 BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
545 BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
546 BLIS_PACKED_ROW_PANELS_4MS = BLIS_BITVAL_PACKED_ROW_PANELS_4MS,
547 BLIS_PACKED_COL_PANELS_4MS = BLIS_BITVAL_PACKED_COL_PANELS_4MS,
548 BLIS_PACKED_ROW_PANELS_3MS = BLIS_BITVAL_PACKED_ROW_PANELS_3MS,
549 BLIS_PACKED_COL_PANELS_3MS = BLIS_BITVAL_PACKED_COL_PANELS_3MS,
550 BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
551 BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
552 BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
553 BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
554 BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
555 BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
556 BLIS_PACKED_ROW_PANELS_1E = BLIS_BITVAL_PACKED_ROW_PANELS_1E,
557 BLIS_PACKED_COL_PANELS_1E = BLIS_BITVAL_PACKED_COL_PANELS_1E,
558 BLIS_PACKED_ROW_PANELS_1R = BLIS_BITVAL_PACKED_ROW_PANELS_1R,
559 BLIS_PACKED_COL_PANELS_1R = BLIS_BITVAL_PACKED_COL_PANELS_1R
560 } pack_t;
561
562 // We combine row and column packing into one "type", and we start
563 // with BLIS_PACKED_ROW_PANELS, _COLUMN_PANELS. We also count the
564 // schema pair for "4ms" (4m separated), because its bit value has
565 // been reserved, even though we don't use it.
566 #define BLIS_NUM_PACK_SCHEMA_TYPES 10
567
568
569 // -- Pack order type --
570
571 typedef enum
572 {
573 BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER,
574 BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER,
575
576 BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER,
577 BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER
578 } packord_t;
579
580
581 // -- Pack buffer type --
582
583 typedef enum
584 {
585 BLIS_BUFFER_FOR_A_BLOCK = BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
586 BLIS_BUFFER_FOR_B_PANEL = BLIS_BITVAL_BUFFER_FOR_B_PANEL,
587 BLIS_BUFFER_FOR_C_PANEL = BLIS_BITVAL_BUFFER_FOR_C_PANEL,
588 BLIS_BUFFER_FOR_GEN_USE = BLIS_BITVAL_BUFFER_FOR_GEN_USE
589 } packbuf_t;
590
591
592 // -- Partitioning direction --
593
594 typedef enum
595 {
596 BLIS_FWD,
597 BLIS_BWD
598 } dir_t;
599
600
601 // -- Subpartition type --
602
603 typedef enum
604 {
605 BLIS_SUBPART0,
606 BLIS_SUBPART1,
607 BLIS_SUBPART2,
608 BLIS_SUBPART1AND0,
609 BLIS_SUBPART1AND2,
610 BLIS_SUBPART1A,
611 BLIS_SUBPART1B,
612 BLIS_SUBPART00,
613 BLIS_SUBPART10,
614 BLIS_SUBPART20,
615 BLIS_SUBPART01,
616 BLIS_SUBPART11,
617 BLIS_SUBPART21,
618 BLIS_SUBPART02,
619 BLIS_SUBPART12,
620 BLIS_SUBPART22
621 } subpart_t;
622
623
624 // -- Matrix dimension type --
625
626 typedef enum
627 {
628 BLIS_M = 0,
629 BLIS_N = 1
630 } mdim_t;
631
632
633 // -- Machine parameter types --
634
635 typedef enum
636 {
637 BLIS_MACH_EPS = 0,
638 BLIS_MACH_SFMIN,
639 BLIS_MACH_BASE,
640 BLIS_MACH_PREC,
641 BLIS_MACH_NDIGMANT,
642 BLIS_MACH_RND,
643 BLIS_MACH_EMIN,
644 BLIS_MACH_RMIN,
645 BLIS_MACH_EMAX,
646 BLIS_MACH_RMAX,
647 BLIS_MACH_EPS2
648 } machval_t;
649
650 #define BLIS_NUM_MACH_PARAMS 11
651 #define BLIS_MACH_PARAM_FIRST BLIS_MACH_EPS
652 #define BLIS_MACH_PARAM_LAST BLIS_MACH_EPS2
653
654
655 // -- Induced method types --
656
657 typedef enum
658 {
659 BLIS_3MH = 0,
660 BLIS_3M1,
661 BLIS_4MH,
662 BLIS_4M1B,
663 BLIS_4M1A,
664 BLIS_1M,
665 BLIS_NAT,
666 BLIS_IND_FIRST = 0,
667 BLIS_IND_LAST = BLIS_NAT
668 } ind_t;
669
670 #define BLIS_NUM_IND_METHODS (BLIS_NAT+1)
671
672 // These are used in bli_*_oapi.c to construct the ind_t values from
673 // the induced method substrings that go into function names.
674 #define bli_3mh BLIS_3MH
675 #define bli_3m1 BLIS_3M1
676 #define bli_4mh BLIS_4MH
677 #define bli_4mb BLIS_4M1B
678 #define bli_4m1 BLIS_4M1A
679 #define bli_1m BLIS_1M
680 #define bli_nat BLIS_NAT
681
682
683 // -- Kernel ID types --
684
685 typedef enum
686 {
687 BLIS_ADDV_KER = 0,
688 BLIS_AMAXV_KER,
689 BLIS_AXPBYV_KER,
690 BLIS_AXPYV_KER,
691 BLIS_COPYV_KER,
692 BLIS_DOTV_KER,
693 BLIS_DOTXV_KER,
694 BLIS_INVERTV_KER,
695 BLIS_SCALV_KER,
696 BLIS_SCAL2V_KER,
697 BLIS_SETV_KER,
698 BLIS_SUBV_KER,
699 BLIS_SWAPV_KER,
700 BLIS_XPBYV_KER
701 } l1vkr_t;
702
703 #define BLIS_NUM_LEVEL1V_KERS 14
704
705
706 typedef enum
707 {
708 BLIS_AXPY2V_KER = 0,
709 BLIS_DOTAXPYV_KER,
710 BLIS_AXPYF_KER,
711 BLIS_DOTXF_KER,
712 BLIS_DOTXAXPYF_KER
713 } l1fkr_t;
714
715 #define BLIS_NUM_LEVEL1F_KERS 5
716
717
718 typedef enum
719 {
720 BLIS_PACKM_0XK_KER = 0,
721 BLIS_PACKM_1XK_KER = 1,
722 BLIS_PACKM_2XK_KER = 2,
723 BLIS_PACKM_3XK_KER = 3,
724 BLIS_PACKM_4XK_KER = 4,
725 BLIS_PACKM_5XK_KER = 5,
726 BLIS_PACKM_6XK_KER = 6,
727 BLIS_PACKM_7XK_KER = 7,
728 BLIS_PACKM_8XK_KER = 8,
729 BLIS_PACKM_9XK_KER = 9,
730 BLIS_PACKM_10XK_KER = 10,
731 BLIS_PACKM_11XK_KER = 11,
732 BLIS_PACKM_12XK_KER = 12,
733 BLIS_PACKM_13XK_KER = 13,
734 BLIS_PACKM_14XK_KER = 14,
735 BLIS_PACKM_15XK_KER = 15,
736 BLIS_PACKM_16XK_KER = 16,
737 BLIS_PACKM_17XK_KER = 17,
738 BLIS_PACKM_18XK_KER = 18,
739 BLIS_PACKM_19XK_KER = 19,
740 BLIS_PACKM_20XK_KER = 20,
741 BLIS_PACKM_21XK_KER = 21,
742 BLIS_PACKM_22XK_KER = 22,
743 BLIS_PACKM_23XK_KER = 23,
744 BLIS_PACKM_24XK_KER = 24,
745 BLIS_PACKM_25XK_KER = 25,
746 BLIS_PACKM_26XK_KER = 26,
747 BLIS_PACKM_27XK_KER = 27,
748 BLIS_PACKM_28XK_KER = 28,
749 BLIS_PACKM_29XK_KER = 29,
750 BLIS_PACKM_30XK_KER = 30,
751 BLIS_PACKM_31XK_KER = 31,
752
753 BLIS_UNPACKM_0XK_KER = 0,
754 BLIS_UNPACKM_1XK_KER = 1,
755 BLIS_UNPACKM_2XK_KER = 2,
756 BLIS_UNPACKM_3XK_KER = 3,
757 BLIS_UNPACKM_4XK_KER = 4,
758 BLIS_UNPACKM_5XK_KER = 5,
759 BLIS_UNPACKM_6XK_KER = 6,
760 BLIS_UNPACKM_7XK_KER = 7,
761 BLIS_UNPACKM_8XK_KER = 8,
762 BLIS_UNPACKM_9XK_KER = 9,
763 BLIS_UNPACKM_10XK_KER = 10,
764 BLIS_UNPACKM_11XK_KER = 11,
765 BLIS_UNPACKM_12XK_KER = 12,
766 BLIS_UNPACKM_13XK_KER = 13,
767 BLIS_UNPACKM_14XK_KER = 14,
768 BLIS_UNPACKM_15XK_KER = 15,
769 BLIS_UNPACKM_16XK_KER = 16,
770 BLIS_UNPACKM_17XK_KER = 17,
771 BLIS_UNPACKM_18XK_KER = 18,
772 BLIS_UNPACKM_19XK_KER = 19,
773 BLIS_UNPACKM_20XK_KER = 20,
774 BLIS_UNPACKM_21XK_KER = 21,
775 BLIS_UNPACKM_22XK_KER = 22,
776 BLIS_UNPACKM_23XK_KER = 23,
777 BLIS_UNPACKM_24XK_KER = 24,
778 BLIS_UNPACKM_25XK_KER = 25,
779 BLIS_UNPACKM_26XK_KER = 26,
780 BLIS_UNPACKM_27XK_KER = 27,
781 BLIS_UNPACKM_28XK_KER = 28,
782 BLIS_UNPACKM_29XK_KER = 29,
783 BLIS_UNPACKM_30XK_KER = 30,
784 BLIS_UNPACKM_31XK_KER = 31
785
786 } l1mkr_t;
787
788 #define BLIS_NUM_PACKM_KERS 32
789 #define BLIS_NUM_UNPACKM_KERS 32
790
791
792 typedef enum
793 {
794 BLIS_GEMM_UKR = 0,
795 BLIS_GEMMTRSM_L_UKR,
796 BLIS_GEMMTRSM_U_UKR,
797 BLIS_TRSM_L_UKR,
798 BLIS_TRSM_U_UKR
799 } l3ukr_t;
800
801 #define BLIS_NUM_LEVEL3_UKRS 5
802
803
804 typedef enum
805 {
806 BLIS_REFERENCE_UKERNEL = 0,
807 BLIS_VIRTUAL_UKERNEL,
808 BLIS_OPTIMIZED_UKERNEL,
809 BLIS_NOTAPPLIC_UKERNEL
810 } kimpl_t;
811
812 #define BLIS_NUM_UKR_IMPL_TYPES 4
813
814
815 #if 0
816 typedef enum
817 {
818 // RV = row-stored, contiguous vector-loading
819 // RG = row-stored, non-contiguous gather-loading
820 // CV = column-stored, contiguous vector-loading
821 // CG = column-stored, non-contiguous gather-loading
822
823 // RD = row-stored, dot-based
824 // CD = col-stored, dot-based
825
826 // RC = row-stored, column-times-column
827 // CR = column-stored, row-times-row
828
829 // GX = general-stored generic implementation
830
831 BLIS_GEMMSUP_RV_UKR = 0,
832 BLIS_GEMMSUP_RG_UKR,
833 BLIS_GEMMSUP_CV_UKR,
834 BLIS_GEMMSUP_CG_UKR,
835
836 BLIS_GEMMSUP_RD_UKR,
837 BLIS_GEMMSUP_CD_UKR,
838
839 BLIS_GEMMSUP_RC_UKR,
840 BLIS_GEMMSUP_CR_UKR,
841
842 BLIS_GEMMSUP_GX_UKR,
843 } l3sup_t;
844
845 #define BLIS_NUM_LEVEL3_SUP_UKRS 9
846 #endif
847
848
849 typedef enum
850 {
851 // 3-operand storage combinations
852 BLIS_RRR = 0,
853 BLIS_RRC, // 1
854 BLIS_RCR, // 2
855 BLIS_RCC, // 3
856 BLIS_CRR, // 4
857 BLIS_CRC, // 5
858 BLIS_CCR, // 6
859 BLIS_CCC, // 7
860 BLIS_XXX, // 8
861
862 #if 0
863 BLIS_RRG,
864 BLIS_RCG,
865 BLIS_RGR,
866 BLIS_RGC,
867 BLIS_RGG,
868 BLIS_CRG,
869 BLIS_CCG,
870 BLIS_CGR,
871 BLIS_CGC,
872 BLIS_CGG,
873 BLIS_GRR,
874 BLIS_GRC,
875 BLIS_GRG,
876 BLIS_GCR,
877 BLIS_GCC,
878 BLIS_GCG,
879 BLIS_GGR,
880 BLIS_GGC,
881 BLIS_GGG,
882 #endif
883 } stor3_t;
884
885 #define BLIS_NUM_3OP_RC_COMBOS 9
886 //#define BLIS_NUM_3OP_RCG_COMBOS 27
887
888
889 #if 0
890 typedef enum
891 {
892 BLIS_JC_IDX = 0,
893 BLIS_PC_IDX,
894 BLIS_IC_IDX,
895 BLIS_JR_IDX,
896 BLIS_IR_IDX,
897 BLIS_PR_IDX
898 } thridx_t;
899 #endif
900
901 #define BLIS_NUM_LOOPS 6
902
903
904 // -- Operation ID type --
905
906 typedef enum
907 {
908 //
909 // NOTE: If/when additional type values are added to this enum,
910 // you must either:
911 // - keep the level-3 values (starting with _GEMM) beginning at
912 // index 0; or
913 // - if the value range is moved such that it does not begin at
914 // index 0, implement something like a BLIS_OPID_LEVEL3_RANGE_START
915 // value that can be subtracted from the opid_t value to map it
916 // to a zero-based range.
917 // This is needed because these level-3 opid_t values are used in
918 // bli_l3_ind.c to index into arrays.
919 //
920 BLIS_GEMM = 0,
921 BLIS_GEMMT,
922 BLIS_HEMM,
923 BLIS_HERK,
924 BLIS_HER2K,
925 BLIS_SYMM,
926 BLIS_SYRK,
927 BLIS_SYR2K,
928 BLIS_TRMM3,
929 BLIS_TRMM,
930 BLIS_TRSM,
931
932 BLIS_NOID
933 } opid_t;
934
935 #define BLIS_NUM_LEVEL3_OPS 11
936
937
938 // -- Blocksize ID type --
939
940 typedef enum
941 {
942 // NOTE: the level-3 blocksizes MUST be indexed starting at zero.
943 // At one point, we made this assumption in bli_cntx_set_blkszs()
944 // and friends.
945
946 BLIS_KR = 0,
947 BLIS_MR,
948 BLIS_NR,
949 BLIS_MC,
950 BLIS_KC,
951 BLIS_NC,
952
953 BLIS_M2, // level-2 blocksize in m dimension
954 BLIS_N2, // level-2 blocksize in n dimension
955
956 BLIS_AF, // level-1f axpyf fusing factor
957 BLIS_DF, // level-1f dotxf fusing factor
958 BLIS_XF, // level-1f dotxaxpyf fusing factor
959
960 BLIS_NO_PART // used as a placeholder when blocksizes are not applicable.
961 } bszid_t;
962
963 #define BLIS_NUM_BLKSZS 11
964
965
966 // -- Threshold ID type --
967
968 typedef enum
969 {
970 BLIS_MT = 0, // level-3 small/unpacked matrix threshold in m dimension
971 BLIS_NT, // level-3 small/unpacked matrix threshold in n dimension
972 BLIS_KT // level-3 small/unpacked matrix threshold in k dimension
973
974 } threshid_t;
975
976 #define BLIS_NUM_THRESH 3
977
978
979 // -- Architecture ID type --
980
981 // NOTE: This typedef enum must be kept up-to-date with the arch_t
982 // string array in bli_arch.c. Whenever values are added/inserted
983 // OR if values are rearranged, be sure to update the string array
984 // in bli_arch.c.
985
986 typedef enum
987 {
988 // NOTE: The C language standard guarantees that the first enum value
989 // starts at 0.
990
991 // Intel
992 BLIS_ARCH_SKX,
993 BLIS_ARCH_KNL,
994 BLIS_ARCH_KNC,
995 BLIS_ARCH_HASWELL,
996 BLIS_ARCH_SANDYBRIDGE,
997 BLIS_ARCH_PENRYN,
998
999 // AMD
1000 BLIS_ARCH_ZEN2,
1001 BLIS_ARCH_ZEN,
1002 BLIS_ARCH_EXCAVATOR,
1003 BLIS_ARCH_STEAMROLLER,
1004 BLIS_ARCH_PILEDRIVER,
1005 BLIS_ARCH_BULLDOZER,
1006
1007 // ARM
1008 BLIS_ARCH_THUNDERX2,
1009 BLIS_ARCH_CORTEXA57,
1010 BLIS_ARCH_CORTEXA53,
1011 BLIS_ARCH_CORTEXA15,
1012 BLIS_ARCH_CORTEXA9,
1013
1014 // IBM/Power
1015 BLIS_ARCH_POWER10,
1016 BLIS_ARCH_POWER9,
1017 BLIS_ARCH_POWER7,
1018 BLIS_ARCH_BGQ,
1019
1020 // Generic architecture/configuration
1021 BLIS_ARCH_GENERIC,
1022
1023 // The total number of defined architectures. This must be last in the
1024 // list of enums since its definition assumes that the previous enum
1025 // value (BLIS_ARCH_GENERIC) is given index num_archs-1.
1026 BLIS_NUM_ARCHS
1027
1028 } arch_t;
1029
1030 // NOTE: This value must be updated to reflect the number of enum values
1031 // listed above for arch_t!
1032 #define BLIS_NUM_ARCHS 22
1033
1034
1035 //
1036 // -- BLIS misc. structure types -----------------------------------------------
1037 //
1038
1039 // These headers must be included here (or earlier) because definitions they
1040 // provide are needed in the pool_t and related structs.
1041 #include "bli_pthread.h"
1042 #include "bli_malloc.h"
1043
1044 // -- Pool block type --
1045
1046 typedef struct
1047 {
1048 void* buf;
1049 siz_t block_size;
1050
1051 } pblk_t;
1052
1053
1054 // -- Pool type --
1055
1056 typedef struct
1057 {
1058 void* block_ptrs;
1059 dim_t block_ptrs_len;
1060
1061 dim_t top_index;
1062 dim_t num_blocks;
1063
1064 siz_t block_size;
1065 siz_t align_size;
1066 siz_t offset_size;
1067
1068 malloc_ft malloc_fp;
1069 free_ft free_fp;
1070
1071 } pool_t;
1072
1073
1074 // -- Array type --
1075
1076 typedef struct
1077 {
1078 void* buf;
1079
1080 siz_t num_elem;
1081 siz_t elem_size;
1082
1083 } array_t;
1084
1085
1086 // -- Locked pool-of-arrays-of-pools type --
1087
1088 typedef struct
1089 {
1090 bli_pthread_mutex_t mutex;
1091 pool_t pool;
1092
1093 siz_t def_array_len;
1094
1095 } apool_t;
1096
1097
1098 // -- packing block allocator: Locked set of pools type --
1099
1100 typedef struct membrk_s
1101 {
1102 pool_t pools[3];
1103 bli_pthread_mutex_t mutex;
1104
1105 // These fields are used for general-purpose allocation.
1106 siz_t align_size;
1107 malloc_ft malloc_fp;
1108 free_ft free_fp;
1109
1110 } membrk_t;
1111
1112
1113 // -- Memory object type --
1114
1115 typedef struct mem_s
1116 {
1117 pblk_t pblk;
1118 packbuf_t buf_type;
1119 pool_t* pool;
1120 siz_t size;
1121 } mem_t;
1122
1123
1124 // -- Control tree node type --
1125
1126 struct cntl_s
1127 {
1128 // Basic fields (usually required).
1129 opid_t family;
1130 bszid_t bszid;
1131 void_fp var_func;
1132 struct cntl_s* sub_prenode;
1133 struct cntl_s* sub_node;
1134
1135 // Optional fields (needed only by some operations such as packm).
1136 // NOTE: first field of params must be a uint64_t containing the size
1137 // of the struct.
1138 void* params;
1139
1140 // Internal fields that track "cached" data.
1141 mem_t pack_mem;
1142 };
1143 typedef struct cntl_s cntl_t;
1144
1145
1146 // -- Blocksize object type --
1147
1148 typedef struct blksz_s
1149 {
1150 // Primary blocksize values.
1151 dim_t v[BLIS_NUM_FP_TYPES];
1152
1153 // Blocksize extensions.
1154 dim_t e[BLIS_NUM_FP_TYPES];
1155
1156 } blksz_t;
1157
1158
1159 // -- Function pointer object type --
1160
1161 typedef struct func_s
1162 {
1163 // Kernel function address.
1164 void_fp ptr[BLIS_NUM_FP_TYPES];
1165
1166 } func_t;
1167
1168
1169 // -- Multi-boolean object type --
1170
1171 typedef struct mbool_s
1172 {
1173 bool v[BLIS_NUM_FP_TYPES];
1174
1175 } mbool_t;
1176
1177
1178 // -- Auxiliary kernel info type --
1179
1180 // Note: This struct is used by macro-kernels to package together extra
1181 // parameter values that may be of use to the micro-kernel without
1182 // cluttering up the micro-kernel interface itself.
1183
1184 typedef struct
1185 {
1186 // The pack schemas of A and B.
1187 pack_t schema_a;
1188 pack_t schema_b;
1189
1190 // Pointers to the micro-panels of A and B which will be used by the
1191 // next call to the micro-kernel.
1192 void* a_next;
1193 void* b_next;
1194
1195 // The imaginary strides of A and B.
1196 inc_t is_a;
1197 inc_t is_b;
1198
1199 // The panel strides of A and B.
1200 // NOTE: These are only used in situations where iteration over the
1201 // micropanels takes place in part within the kernel code (e.g. sup
1202 // millikernels).
1203 inc_t ps_a;
1204 inc_t ps_b;
1205
1206 // The type to convert to on output.
1207 //num_t dt_on_output;
1208
1209 } auxinfo_t;
1210
1211
1212 // -- Global scalar constant data struct --
1213
1214 // Note: This struct is used only when statically initializing the
1215 // global scalar constants in bli_const.c.
1216 typedef struct constdata_s
1217 {
1218 float s;
1219 double d;
1220 scomplex c;
1221 dcomplex z;
1222 gint_t i;
1223
1224 } constdata_t;
1225
1226
1227 //
1228 // -- BLIS object type definitions ---------------------------------------------
1229 //
1230
1231 typedef struct obj_s
1232 {
1233 // Basic fields
1234 struct obj_s* root;
1235
1236 dim_t off[2];
1237 dim_t dim[2];
1238 doff_t diag_off;
1239
1240 objbits_t info;
1241 objbits_t info2;
1242 siz_t elem_size;
1243
1244 void* buffer;
1245 inc_t rs;
1246 inc_t cs;
1247 inc_t is;
1248
1249 // Bufferless scalar storage
1250 atom_t scalar;
1251
1252 // Pack-related fields
1253 dim_t m_padded; // m dimension of matrix, including any padding
1254 dim_t n_padded; // n dimension of matrix, including any padding
1255 inc_t ps; // panel stride (distance to next panel)
1256 inc_t pd; // panel dimension (the "width" of a panel:
1257 // usually MR or NR)
1258 dim_t m_panel; // m dimension of a "full" panel
1259 dim_t n_panel; // n dimension of a "full" panel
1260 } obj_t;
1261
1262 // Pre-initializors. Things that must be set afterwards:
1263 // - root object pointer
1264 // - info bitfields: dt, target_dt, exec_dt, comp_dt
1265 // - info2 bitfields: scalar_dt
1266 // - elem_size
1267 // - dims, strides
1268 // - buffer
1269 // - internal scalar buffer (must always set imaginary component)
1270
1271 #define BLIS_OBJECT_INITIALIZER \
1272 { \
1273 .root = NULL, \
1274 \
1275 .off = { 0, 0 }, \
1276 .dim = { 0, 0 }, \
1277 .diag_off = 0, \
1278 \
1279 .info = 0x0 | BLIS_BITVAL_DENSE | \
1280 BLIS_BITVAL_GENERAL, \
1281 .info2 = 0x0, \
1282 .elem_size = sizeof( float ), /* this is changed later. */ \
1283 \
1284 .buffer = NULL, \
1285 .rs = 0, \
1286 .cs = 0, \
1287 .is = 1, \
1288 \
1289 .scalar = { 0.0, 0.0 }, \
1290 \
1291 .m_padded = 0, \
1292 .n_padded = 0, \
1293 .ps = 0, \
1294 .pd = 0, \
1295 .m_panel = 0, \
1296 .n_panel = 0 \
1297 }
1298
1299 #define BLIS_OBJECT_INITIALIZER_1X1 \
1300 { \
1301 .root = NULL, \
1302 \
1303 .off = { 0, 0 }, \
1304 .dim = { 1, 1 }, \
1305 .diag_off = 0, \
1306 \
1307 .info = 0x0 | BLIS_BITVAL_DENSE | \
1308 BLIS_BITVAL_GENERAL, \
1309 .info2 = 0x0, \
1310 .elem_size = sizeof( float ), /* this is changed later. */ \
1311 \
1312 .buffer = NULL, \
1313 .rs = 0, \
1314 .cs = 0, \
1315 .is = 1, \
1316 \
1317 .scalar = { 0.0, 0.0 }, \
1318 \
1319 .m_padded = 0, \
1320 .n_padded = 0, \
1321 .ps = 0, \
1322 .pd = 0, \
1323 .m_panel = 0, \
1324 .n_panel = 0 \
1325 }
1326
1327 // Define these macros here since they must be updated if contents of
1328 // obj_t changes.
1329
bli_obj_init_full_shallow_copy_of(obj_t * a,obj_t * b)1330 BLIS_INLINE void bli_obj_init_full_shallow_copy_of( obj_t* a, obj_t* b )
1331 {
1332 b->root = a->root;
1333
1334 b->off[0] = a->off[0];
1335 b->off[1] = a->off[1];
1336 b->dim[0] = a->dim[0];
1337 b->dim[1] = a->dim[1];
1338 b->diag_off = a->diag_off;
1339
1340 b->info = a->info;
1341 b->info2 = a->info2;
1342 b->elem_size = a->elem_size;
1343
1344 b->buffer = a->buffer;
1345 b->rs = a->rs;
1346 b->cs = a->cs;
1347 b->is = a->is;
1348
1349 b->scalar = a->scalar;
1350
1351 //b->pack_mem = a->pack_mem;
1352 b->m_padded = a->m_padded;
1353 b->n_padded = a->n_padded;
1354 b->ps = a->ps;
1355 b->pd = a->pd;
1356 b->m_panel = a->m_panel;
1357 b->n_panel = a->n_panel;
1358 }
1359
bli_obj_init_subpart_from(obj_t * a,obj_t * b)1360 BLIS_INLINE void bli_obj_init_subpart_from( obj_t* a, obj_t* b )
1361 {
1362 b->root = a->root;
1363
1364 b->off[0] = a->off[0];
1365 b->off[1] = a->off[1];
1366 // Avoid copying m and n since they will be overwritten.
1367 //b->dim[0] = a->dim[0];
1368 //b->dim[1] = a->dim[1];
1369 b->diag_off = a->diag_off;
1370
1371 b->info = a->info;
1372 b->info2 = a->info2;
1373 b->elem_size = a->elem_size;
1374
1375 b->buffer = a->buffer;
1376 b->rs = a->rs;
1377 b->cs = a->cs;
1378 b->is = a->is;
1379
1380 b->scalar = a->scalar;
1381
1382 // Avoid copying pack_mem entry.
1383 // FGVZ: You should probably make sure this is right.
1384 //b->pack_mem = a->pack_mem;
1385 b->m_padded = a->m_padded;
1386 b->n_padded = a->n_padded;
1387 b->ps = a->ps;
1388 b->pd = a->pd;
1389 b->m_panel = a->m_panel;
1390 b->n_panel = a->n_panel;
1391 }
1392
1393 // Initializors for global scalar constants.
1394 // NOTE: These must remain cpp macros since they are initializor
1395 // expressions, not functions.
1396
1397 #define bli_obj_init_const( buffer0 ) \
1398 { \
1399 .root = NULL, \
1400 \
1401 .off = { 0, 0 }, \
1402 .dim = { 1, 1 }, \
1403 .diag_off = 0, \
1404 \
1405 .info = 0x0 | BLIS_BITVAL_CONST_TYPE | \
1406 BLIS_BITVAL_DENSE | \
1407 BLIS_BITVAL_GENERAL, \
1408 .info2 = 0x0, \
1409 .elem_size = sizeof( constdata_t ), \
1410 \
1411 .buffer = buffer0, \
1412 .rs = 1, \
1413 .cs = 1, \
1414 .is = 1 \
1415 }
1416
1417 #define bli_obj_init_constdata( val ) \
1418 { \
1419 .s = ( float )val, \
1420 .d = ( double )val, \
1421 .c = { .real = ( float )val, .imag = 0.0f }, \
1422 .z = { .real = ( double )val, .imag = 0.0 }, \
1423 .i = ( gint_t )val, \
1424 }
1425
1426
1427 // -- Context type --
1428
1429 typedef struct cntx_s
1430 {
1431 blksz_t blkszs[ BLIS_NUM_BLKSZS ];
1432 bszid_t bmults[ BLIS_NUM_BLKSZS ];
1433
1434 func_t l3_vir_ukrs[ BLIS_NUM_LEVEL3_UKRS ];
1435 func_t l3_nat_ukrs[ BLIS_NUM_LEVEL3_UKRS ];
1436 mbool_t l3_nat_ukrs_prefs[ BLIS_NUM_LEVEL3_UKRS ];
1437
1438 blksz_t l3_sup_thresh[ BLIS_NUM_THRESH ];
1439 void* l3_sup_handlers[ BLIS_NUM_LEVEL3_OPS ];
1440 blksz_t l3_sup_blkszs[ BLIS_NUM_BLKSZS ];
1441 func_t l3_sup_kers[ BLIS_NUM_3OP_RC_COMBOS ];
1442 mbool_t l3_sup_kers_prefs[ BLIS_NUM_3OP_RC_COMBOS ];
1443
1444 func_t l1f_kers[ BLIS_NUM_LEVEL1F_KERS ];
1445 func_t l1v_kers[ BLIS_NUM_LEVEL1V_KERS ];
1446
1447 func_t packm_kers[ BLIS_NUM_PACKM_KERS ];
1448 func_t unpackm_kers[ BLIS_NUM_UNPACKM_KERS ];
1449
1450 ind_t method;
1451 pack_t schema_a_block;
1452 pack_t schema_b_panel;
1453 pack_t schema_c_panel;
1454
1455 } cntx_t;
1456
1457
1458 // -- Runtime type --
1459
1460 // NOTE: The order of these fields must be kept consistent with the definition
1461 // of the BLIS_RNTM_INITIALIZER macro in bli_rntm.h.
1462
1463 typedef struct rntm_s
1464 {
1465 // "External" fields: these may be queried by the end-user.
1466 bool auto_factor;
1467
1468 dim_t num_threads;
1469 dim_t thrloop[ BLIS_NUM_LOOPS ];
1470 bool pack_a; // enable/disable packing of left-hand matrix A.
1471 bool pack_b; // enable/disable packing of right-hand matrix B.
1472 bool l3_sup; // enable/disable small matrix handling in level-3 ops.
1473
1474 // "Internal" fields: these should not be exposed to the end-user.
1475
1476 // The small block pool, which is attached in the l3 thread decorator.
1477 pool_t* sba_pool;
1478
1479 // The packing block allocator, which is attached in the l3 thread decorator.
1480 membrk_t* membrk;
1481
1482 } rntm_t;
1483
1484
1485 // -- Error types --
1486
1487 typedef enum
1488 {
1489 BLIS_NO_ERROR_CHECKING = 0,
1490 BLIS_FULL_ERROR_CHECKING
1491 } errlev_t;
1492
1493 typedef enum
1494 {
1495 // Generic error codes
1496 BLIS_SUCCESS = ( -1),
1497 BLIS_FAILURE = ( -2),
1498
1499 BLIS_ERROR_CODE_MIN = ( -9),
1500
1501 // General errors
1502 BLIS_INVALID_ERROR_CHECKING_LEVEL = ( -10),
1503 BLIS_UNDEFINED_ERROR_CODE = ( -11),
1504 BLIS_NULL_POINTER = ( -12),
1505 BLIS_NOT_YET_IMPLEMENTED = ( -13),
1506
1507 // Parameter-specific errors
1508 BLIS_INVALID_SIDE = ( -20),
1509 BLIS_INVALID_UPLO = ( -21),
1510 BLIS_INVALID_TRANS = ( -22),
1511 BLIS_INVALID_CONJ = ( -23),
1512 BLIS_INVALID_DIAG = ( -24),
1513 BLIS_INVALID_MACHVAL = ( -25),
1514 BLIS_EXPECTED_NONUNIT_DIAG = ( -26),
1515
1516 // Datatype-specific errors
1517 BLIS_INVALID_DATATYPE = ( -30),
1518 BLIS_EXPECTED_FLOATING_POINT_DATATYPE = ( -31),
1519 BLIS_EXPECTED_NONINTEGER_DATATYPE = ( -32),
1520 BLIS_EXPECTED_NONCONSTANT_DATATYPE = ( -33),
1521 BLIS_EXPECTED_REAL_DATATYPE = ( -34),
1522 BLIS_EXPECTED_INTEGER_DATATYPE = ( -35),
1523 BLIS_INCONSISTENT_DATATYPES = ( -36),
1524 BLIS_EXPECTED_REAL_PROJ_OF = ( -37),
1525 BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38),
1526 BLIS_INCONSISTENT_PRECISIONS = ( -39),
1527
1528 // Dimension-specific errors
1529 BLIS_NONCONFORMAL_DIMENSIONS = ( -40),
1530 BLIS_EXPECTED_SCALAR_OBJECT = ( -41),
1531 BLIS_EXPECTED_VECTOR_OBJECT = ( -42),
1532 BLIS_UNEQUAL_VECTOR_LENGTHS = ( -43),
1533 BLIS_EXPECTED_SQUARE_OBJECT = ( -44),
1534 BLIS_UNEXPECTED_OBJECT_LENGTH = ( -45),
1535 BLIS_UNEXPECTED_OBJECT_WIDTH = ( -46),
1536 BLIS_UNEXPECTED_VECTOR_DIM = ( -47),
1537 BLIS_UNEXPECTED_DIAG_OFFSET = ( -48),
1538 BLIS_NEGATIVE_DIMENSION = ( -49),
1539
1540 // Stride-specific errors
1541 BLIS_INVALID_ROW_STRIDE = ( -50),
1542 BLIS_INVALID_COL_STRIDE = ( -51),
1543 BLIS_INVALID_DIM_STRIDE_COMBINATION = ( -52),
1544
1545 // Structure-specific errors
1546 BLIS_EXPECTED_GENERAL_OBJECT = ( -60),
1547 BLIS_EXPECTED_HERMITIAN_OBJECT = ( -61),
1548 BLIS_EXPECTED_SYMMETRIC_OBJECT = ( -62),
1549 BLIS_EXPECTED_TRIANGULAR_OBJECT = ( -63),
1550
1551 // Storage-specific errors
1552 BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT = ( -70),
1553
1554 // Partitioning-specific errors
1555 BLIS_INVALID_3x1_SUBPART = ( -80),
1556 BLIS_INVALID_1x3_SUBPART = ( -81),
1557 BLIS_INVALID_3x3_SUBPART = ( -82),
1558
1559 // Control tree-specific errors
1560 BLIS_UNEXPECTED_NULL_CONTROL_TREE = ( -90),
1561
1562 // Packing-specific errors
1563 BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-100),
1564
1565 // Buffer-specific errors
1566 BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110),
1567
1568 // Memory errors
1569 BLIS_MALLOC_RETURNED_NULL = (-120),
1570
1571 // Internal memory pool errors
1572 BLIS_INVALID_PACKBUF = (-130),
1573 BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-131),
1574 BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-132),
1575 BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-133),
1576 BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-134),
1577
1578 // Object-related errors
1579 BLIS_EXPECTED_OBJECT_ALIAS = (-140),
1580
1581 // Architecture-related errors
1582 BLIS_INVALID_ARCH_ID = (-150),
1583 BLIS_UNINITIALIZED_GKS_CNTX = (-151),
1584
1585 // Blocksize-related errors
1586 BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-160),
1587 BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-161),
1588 BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-162),
1589 BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-163),
1590 BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-164),
1591 BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-165),
1592
1593 BLIS_ERROR_CODE_MAX = (-170)
1594 } err_t;
1595
1596 #endif
1597