1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8    Copyright (C) 2016, Hewlett Packard Enterprise Development LP
9    Copyright (C) 2020, Advanced Micro Devices, Inc.
10 
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are
13    met:
14     - Redistributions of source code must retain the above copyright
15       notice, this list of conditions and the following disclaimer.
16     - Redistributions in binary form must reproduce the above copyright
17       notice, this list of conditions and the following disclaimer in the
18       documentation and/or other materials provided with the distribution.
19     - Neither the name(s) of the copyright holder(s) nor the names of its
20       contributors may be used to endorse or promote products derived
21       from this software without specific prior written permission.
22 
23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 */
36 
37 #ifndef BLIS_TYPE_DEFS_H
38 #define BLIS_TYPE_DEFS_H
39 
40 
41 //
42 // -- BLIS basic types ---------------------------------------------------------
43 //
44 
45 #ifdef __cplusplus
46   // For C++, include stdint.h.
47   #include <stdint.h>
48 #elif __STDC_VERSION__ >= 199901L
49   // For C99 (or later), include stdint.h.
50   #include <stdint.h>
51   #include <stdbool.h>
52 #else
53   // When stdint.h is not available, manually typedef the types we will use.
54   #ifdef _WIN32
55     typedef          __int32  int32_t;
56     typedef unsigned __int32 uint32_t;
57     typedef          __int64  int64_t;
58     typedef unsigned __int64 uint64_t;
59   #else
60     #error "Attempting to compile on pre-C99 system without stdint.h."
61   #endif
62 #endif
63 
64 // -- General-purpose integers --
65 
66 // If BLAS integers are 64 bits, mandate that BLIS integers also be 64 bits.
67 // NOTE: This cpp guard will only meaningfully change BLIS's behavior on
68 // systems where the BLIS integer size would have been automatically selected
69 // to be 32 bits, since explicit selection of 32 bits is prohibited at
70 // configure-time (and explicit or automatic selection of 64 bits is fine
71 // and would have had the same result).
72 #if BLIS_BLAS_INT_SIZE == 64
73   #undef  BLIS_INT_TYPE_SIZE
74   #define BLIS_INT_TYPE_SIZE 64
75 #endif
76 
77 // Define integer types depending on what size integer was requested.
78 #if   BLIS_INT_TYPE_SIZE == 32
79 typedef           int32_t  gint_t;
80 typedef          uint32_t guint_t;
81 #elif BLIS_INT_TYPE_SIZE == 64
82 typedef           int64_t  gint_t;
83 typedef          uint64_t guint_t;
84 #else
85 typedef   signed long int  gint_t;
86 typedef unsigned long int guint_t;
87 #endif
88 
89 // -- Boolean type --
90 
91 // NOTE: bool_t is no longer used and has been replaced with C99's bool type.
92 //typedef bool bool_t;
93 
94 // BLIS uses TRUE and FALSE macro constants as possible boolean values, but we
95 // define these macros in terms of true and false, respectively, which are
96 // defined by C99 in stdbool.h.
97 #ifndef TRUE
98   #define TRUE  true
99 #endif
100 
101 #ifndef FALSE
102   #define FALSE false
103 #endif
104 
105 // -- Special-purpose integers --
106 
107 // This cpp guard provides a temporary hack to allow libflame
108 // interoperability with BLIS.
109 #ifndef _DEFINED_DIM_T
110 #define _DEFINED_DIM_T
111 typedef   gint_t dim_t;      // dimension type
112 #endif
113 typedef   gint_t inc_t;      // increment/stride type
114 typedef   gint_t doff_t;     // diagonal offset type
115 typedef  guint_t siz_t;      // byte size type
116 typedef uint32_t objbits_t;  // object information bit field
117 
118 // -- Real types --
119 
120 // Define the number of floating-point types supported, and the size of the
121 // largest type.
122 #define BLIS_NUM_FP_TYPES   4
123 #define BLIS_MAX_TYPE_SIZE  sizeof(dcomplex)
124 
125 // There are some places where we need to use sizeof() inside of a C
126 // preprocessor #if conditional, and so here we define the various sizes
127 // for those purposes.
128 #define BLIS_SIZEOF_S      4  // sizeof(float)
129 #define BLIS_SIZEOF_D      8  // sizeof(double)
130 #define BLIS_SIZEOF_C      8  // sizeof(scomplex)
131 #define BLIS_SIZEOF_Z      16 // sizeof(dcomplex)
132 
133 // -- Complex types --
134 
135 #ifdef BLIS_ENABLE_C99_COMPLEX
136 
137 	#if __STDC_VERSION__ >= 199901L
138 		#include <complex.h>
139 
140 		// Typedef official complex types to BLIS complex type names.
141 		typedef  float complex scomplex;
142 		typedef double complex dcomplex;
143 	#else
144 		#error "Configuration requested C99 complex types, but C99 does not appear to be supported."
145 	#endif
146 
147 #else // ifndef BLIS_ENABLE_C99_COMPLEX
148 
149 	// This cpp guard provides a temporary hack to allow libflame
150 	// interoperability with BLIS.
151 	#ifndef _DEFINED_SCOMPLEX
152 	#define _DEFINED_SCOMPLEX
153 	typedef struct
154 	{
155 		float  real;
156 		float  imag;
157 	} scomplex;
158 	#endif
159 
160 	// This cpp guard provides a temporary hack to allow libflame
161 	// interoperability with BLIS.
162 	#ifndef _DEFINED_DCOMPLEX
163 	#define _DEFINED_DCOMPLEX
164 	typedef struct
165 	{
166 		double real;
167 		double imag;
168 	} dcomplex;
169 	#endif
170 
171 #endif // BLIS_ENABLE_C99_COMPLEX
172 
173 // -- Atom type --
174 
175 // Note: atom types are used to hold "bufferless" scalar object values. Note
176 // that it needs to be as large as the largest possible scalar value we might
177 // want to hold. Thus, for now, it is a dcomplex.
178 typedef dcomplex atom_t;
179 
180 // -- Fortran-77 types --
181 
182 // Note: These types are typically only used by BLAS compatibility layer, but
183 // we must define them even when the compatibility layer isn't being built
184 // because they also occur in bli_slamch() and bli_dlamch().
185 
186 // Define f77_int depending on what size of integer was requested.
187 #if   BLIS_BLAS_INT_TYPE_SIZE == 32
188 typedef int32_t   f77_int;
189 #elif BLIS_BLAS_INT_TYPE_SIZE == 64
190 typedef int64_t   f77_int;
191 #else
192 typedef long int  f77_int;
193 #endif
194 
195 typedef char      f77_char;
196 typedef float     f77_float;
197 typedef double    f77_double;
198 typedef scomplex  f77_scomplex;
199 typedef dcomplex  f77_dcomplex;
200 
201 // -- Void function pointer types --
202 
203 // Note: This type should be used in any situation where the address of a
204 // *function* will be conveyed or stored prior to it being typecast back
205 // to the correct function type. It does not need to be used when conveying
206 // or storing the address of *data* (such as an array of float or double).
207 
208 //typedef void (*void_fp)( void );
209 typedef void* void_fp;
210 
211 
212 //
213 // -- BLIS info bit field offsets ----------------------------------------------
214 //
215 
216 /*
217   info field description
218 
219   bit(s)   purpose
220   -------  -------
221    2 ~ 0   Stored numerical datatype
222            - 0: domain    (0 == real, 1 == complex)
223            - 1: precision (0 == single, 1 == double)
224            - 2: special   (100 = int; 101 = const)
225        3   Transposition required [during pack]?
226        4   Conjugation required [during pack]?
227    7 ~ 5   Part of matrix stored:
228            - 5: strictly upper triangular
229            - 6: diagonal
230            - 7: strictly lower triangular
231        8   Implicit unit diagonal?
232        9   Invert diagonal required [during pack]?
233   12 ~ 10  Target numerical datatype
234            - 10: domain    (0 == real, 1 == complex)
235            - 11: precision (0 == single, 1 == double)
236            - 12: used to encode integer, constant types
237   15 ~ 13  Execution numerical datatype
238            - 13: domain    (0 == real, 1 == complex)
239            - 14: precision (0 == single, 1 == double)
240            - 15: used to encode integer, constant types
241   22 ~ 16  Packed type/status
242            - 0 0000 00: not packed
243            - 1 0000 00: packed (unspecified; by rows, columns, or vector)
244            - 1 0000 00: packed by rows
245            - 1 0000 01: packed by columns
246            - 1 0000 10: packed by row panels
247            - 1 0000 11: packed by column panels
248            - 1 0001 10: packed by 4m interleaved row panels
249            - 1 0001 11: packed by 4m interleaved column panels
250            - 1 0010 10: packed by 3m interleaved row panels
251            - 1 0010 11: packed by 3m interleaved column panels
252            - 1 0011 10: packed by 4m separated row panels (not used)
253            - 1 0011 11: packed by 4m separated column panels (not used)
254            - 1 0100 10: packed by 3m separated row panels
255            - 1 0100 11: packed by 3m separated column panels
256            - 1 0101 10: packed real-only row panels
257            - 1 0101 11: packed real-only column panels
258            - 1 0110 10: packed imag-only row panels
259            - 1 0110 11: packed imag-only column panels
260            - 1 0111 10: packed real+imag row panels
261            - 1 0111 11: packed real+imag column panels
262            - 1 1000 10: packed by 1m expanded row panels
263            - 1 1000 11: packed by 1m expanded column panels
264            - 1 1001 10: packed by 1m reordered row panels
265            - 1 1001 11: packed by 1m reordered column panels
266        23  Packed panel order if upper-stored
267            - 0 == forward order if upper
268            - 1 == reverse order if upper
269        24  Packed panel order if lower-stored
270            - 0 == forward order if lower
271            - 1 == reverse order if lower
272   26 ~ 25  Packed buffer type
273            - 0 == block of A
274            - 1 == panel of B
275            - 2 == panel of C
276            - 3 == general use
277   28 ~ 27  Structure type
278            - 0 == general
279            - 1 == Hermitian
280            - 2 == symmetric
281            - 3 == triangular
282   31 ~ 29  Computation numerical datatype
283            - 29: domain    (0 == real, 1 == complex)
284            - 30: precision (0 == single, 1 == double)
285            - 31: used to encode integer, constant types
286 
287   info2 field description
288 
289   bit(s)   purpose
290   -------  -------
291     2 ~ 0  Scalar storage numerical datatype
292            -  0: domain    (0 == real, 1 == complex)
293            -  1: precision (0 == single, 1 == double)
294            -  2: used to encode integer, constant types
295 */
296 
297 // info
298 #define BLIS_DATATYPE_SHIFT                0
299 #define   BLIS_DOMAIN_SHIFT                0
300 #define   BLIS_PRECISION_SHIFT             1
301 #define BLIS_CONJTRANS_SHIFT               3
302 #define   BLIS_TRANS_SHIFT                 3
303 #define   BLIS_CONJ_SHIFT                  4
304 #define BLIS_UPLO_SHIFT                    5
305 #define   BLIS_UPPER_SHIFT                 5
306 #define   BLIS_DIAG_SHIFT                  6
307 #define   BLIS_LOWER_SHIFT                 7
308 #define BLIS_UNIT_DIAG_SHIFT               8
309 #define BLIS_INVERT_DIAG_SHIFT             9
310 #define BLIS_TARGET_DT_SHIFT               10
311 #define   BLIS_TARGET_DOMAIN_SHIFT         10
312 #define   BLIS_TARGET_PREC_SHIFT           11
313 #define BLIS_EXEC_DT_SHIFT                 13
314 #define   BLIS_EXEC_DOMAIN_SHIFT           13
315 #define   BLIS_EXEC_PREC_SHIFT             14
316 #define BLIS_PACK_SCHEMA_SHIFT             16
317 #define   BLIS_PACK_RC_SHIFT               16
318 #define   BLIS_PACK_PANEL_SHIFT            17
319 #define   BLIS_PACK_FORMAT_SHIFT           18
320 #define   BLIS_PACK_SHIFT                  22
321 #define BLIS_PACK_REV_IF_UPPER_SHIFT       23
322 #define BLIS_PACK_REV_IF_LOWER_SHIFT       24
323 #define BLIS_PACK_BUFFER_SHIFT             25
324 #define BLIS_STRUC_SHIFT                   27
325 #define BLIS_COMP_DT_SHIFT                 29
326 #define   BLIS_COMP_DOMAIN_SHIFT           29
327 #define   BLIS_COMP_PREC_SHIFT             30
328 
329 // info2
330 #define BLIS_SCALAR_DT_SHIFT                0
331 #define   BLIS_SCALAR_DOMAIN_SHIFT          0
332 #define   BLIS_SCALAR_PREC_SHIFT            1
333 
334 //
335 // -- BLIS info bit field masks ------------------------------------------------
336 //
337 
338 // info
339 #define BLIS_DATATYPE_BITS                 ( 0x7  << BLIS_DATATYPE_SHIFT )
340 #define   BLIS_DOMAIN_BIT                  ( 0x1  << BLIS_DOMAIN_SHIFT )
341 #define   BLIS_PRECISION_BIT               ( 0x1  << BLIS_PRECISION_SHIFT )
342 #define BLIS_CONJTRANS_BITS                ( 0x3  << BLIS_CONJTRANS_SHIFT )
343 #define   BLIS_TRANS_BIT                   ( 0x1  << BLIS_TRANS_SHIFT )
344 #define   BLIS_CONJ_BIT                    ( 0x1  << BLIS_CONJ_SHIFT )
345 #define BLIS_UPLO_BITS                     ( 0x7  << BLIS_UPLO_SHIFT )
346 #define   BLIS_UPPER_BIT                   ( 0x1  << BLIS_UPPER_SHIFT )
347 #define   BLIS_DIAG_BIT                    ( 0x1  << BLIS_DIAG_SHIFT )
348 #define   BLIS_LOWER_BIT                   ( 0x1  << BLIS_LOWER_SHIFT )
349 #define BLIS_UNIT_DIAG_BIT                 ( 0x1  << BLIS_UNIT_DIAG_SHIFT )
350 #define BLIS_INVERT_DIAG_BIT               ( 0x1  << BLIS_INVERT_DIAG_SHIFT )
351 #define BLIS_TARGET_DT_BITS                ( 0x7  << BLIS_TARGET_DT_SHIFT )
352 #define   BLIS_TARGET_DOMAIN_BIT           ( 0x1  << BLIS_TARGET_DOMAIN_SHIFT )
353 #define   BLIS_TARGET_PREC_BIT             ( 0x1  << BLIS_TARGET_PREC_SHIFT )
354 #define BLIS_EXEC_DT_BITS                  ( 0x7  << BLIS_EXEC_DT_SHIFT )
355 #define   BLIS_EXEC_DOMAIN_BIT             ( 0x1  << BLIS_EXEC_DOMAIN_SHIFT )
356 #define   BLIS_EXEC_PREC_BIT               ( 0x1  << BLIS_EXEC_PREC_SHIFT )
357 #define BLIS_PACK_SCHEMA_BITS              ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
358 #define   BLIS_PACK_RC_BIT                 ( 0x1  << BLIS_PACK_RC_SHIFT )
359 #define   BLIS_PACK_PANEL_BIT              ( 0x1  << BLIS_PACK_PANEL_SHIFT )
360 #define   BLIS_PACK_FORMAT_BITS            ( 0xF  << BLIS_PACK_FORMAT_SHIFT )
361 #define   BLIS_PACK_BIT                    ( 0x1  << BLIS_PACK_SHIFT )
362 #define BLIS_PACK_REV_IF_UPPER_BIT         ( 0x1  << BLIS_PACK_REV_IF_UPPER_SHIFT )
363 #define BLIS_PACK_REV_IF_LOWER_BIT         ( 0x1  << BLIS_PACK_REV_IF_LOWER_SHIFT )
364 #define BLIS_PACK_BUFFER_BITS              ( 0x3  << BLIS_PACK_BUFFER_SHIFT )
365 #define BLIS_STRUC_BITS                    ( 0x3  << BLIS_STRUC_SHIFT )
366 #define BLIS_COMP_DT_BITS                  ( 0x7  << BLIS_COMP_DT_SHIFT )
367 #define   BLIS_COMP_DOMAIN_BIT             ( 0x1  << BLIS_COMP_DOMAIN_SHIFT )
368 #define   BLIS_COMP_PREC_BIT               ( 0x1  << BLIS_COMP_PREC_SHIFT )
369 
370 // info2
371 #define BLIS_SCALAR_DT_BITS                ( 0x7  << BLIS_SCALAR_DT_SHIFT )
372 #define   BLIS_SCALAR_DOMAIN_BIT           ( 0x1  << BLIS_SCALAR_DOMAIN_SHIFT )
373 #define   BLIS_SCALAR_PREC_BIT             ( 0x1  << BLIS_SCALAR_PREC_SHIFT )
374 
375 
376 //
377 // -- BLIS enumerated type value definitions -----------------------------------
378 //
379 
380 #define BLIS_BITVAL_REAL                      0x0
381 #define BLIS_BITVAL_COMPLEX                   BLIS_DOMAIN_BIT
382 #define BLIS_BITVAL_SINGLE_PREC               0x0
383 #define BLIS_BITVAL_DOUBLE_PREC               BLIS_PRECISION_BIT
384 #define   BLIS_BITVAL_FLOAT_TYPE              0x0
385 #define   BLIS_BITVAL_SCOMPLEX_TYPE           BLIS_DOMAIN_BIT
386 #define   BLIS_BITVAL_DOUBLE_TYPE             BLIS_PRECISION_BIT
387 #define   BLIS_BITVAL_DCOMPLEX_TYPE         ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
388 #define   BLIS_BITVAL_INT_TYPE                0x04
389 #define   BLIS_BITVAL_CONST_TYPE              0x05
390 #define BLIS_BITVAL_NO_TRANS                  0x0
391 #define BLIS_BITVAL_TRANS                     BLIS_TRANS_BIT
392 #define BLIS_BITVAL_NO_CONJ                   0x0
393 #define BLIS_BITVAL_CONJ                      BLIS_CONJ_BIT
394 #define BLIS_BITVAL_CONJ_TRANS              ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
395 #define BLIS_BITVAL_ZEROS                     0x0
396 #define BLIS_BITVAL_UPPER                   ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
397 #define BLIS_BITVAL_LOWER                   ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
398 #define BLIS_BITVAL_DENSE                     BLIS_UPLO_BITS
399 #define BLIS_BITVAL_NONUNIT_DIAG              0x0
400 #define BLIS_BITVAL_UNIT_DIAG                 BLIS_UNIT_DIAG_BIT
401 #define BLIS_BITVAL_INVERT_DIAG               BLIS_INVERT_DIAG_BIT
402 #define BLIS_BITVAL_NOT_PACKED                0x0
403 #define   BLIS_BITVAL_4MI                   ( 0x1  << BLIS_PACK_FORMAT_SHIFT )
404 #define   BLIS_BITVAL_3MI                   ( 0x2  << BLIS_PACK_FORMAT_SHIFT )
405 #define   BLIS_BITVAL_4MS                   ( 0x3  << BLIS_PACK_FORMAT_SHIFT )
406 #define   BLIS_BITVAL_3MS                   ( 0x4  << BLIS_PACK_FORMAT_SHIFT )
407 #define   BLIS_BITVAL_RO                    ( 0x5  << BLIS_PACK_FORMAT_SHIFT )
408 #define   BLIS_BITVAL_IO                    ( 0x6  << BLIS_PACK_FORMAT_SHIFT )
409 #define   BLIS_BITVAL_RPI                   ( 0x7  << BLIS_PACK_FORMAT_SHIFT )
410 #define   BLIS_BITVAL_1E                    ( 0x8  << BLIS_PACK_FORMAT_SHIFT )
411 #define   BLIS_BITVAL_1R                    ( 0x9  << BLIS_PACK_FORMAT_SHIFT )
412 #define   BLIS_BITVAL_PACKED_UNSPEC         ( BLIS_PACK_BIT                                                            )
413 #define   BLIS_BITVAL_PACKED_ROWS           ( BLIS_PACK_BIT                                                            )
414 #define   BLIS_BITVAL_PACKED_COLUMNS        ( BLIS_PACK_BIT                                         | BLIS_PACK_RC_BIT )
415 #define   BLIS_BITVAL_PACKED_ROW_PANELS     ( BLIS_PACK_BIT                   | BLIS_PACK_PANEL_BIT                    )
416 #define   BLIS_BITVAL_PACKED_COL_PANELS     ( BLIS_PACK_BIT                   | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
417 #define   BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT                    )
418 #define   BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
419 #define   BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT                    )
420 #define   BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
421 #define   BLIS_BITVAL_PACKED_ROW_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT                    )
422 #define   BLIS_BITVAL_PACKED_COL_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
423 #define   BLIS_BITVAL_PACKED_ROW_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT                    )
424 #define   BLIS_BITVAL_PACKED_COL_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
425 #define   BLIS_BITVAL_PACKED_ROW_PANELS_RO  ( BLIS_PACK_BIT | BLIS_BITVAL_RO  | BLIS_PACK_PANEL_BIT                    )
426 #define   BLIS_BITVAL_PACKED_COL_PANELS_RO  ( BLIS_PACK_BIT | BLIS_BITVAL_RO  | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
427 #define   BLIS_BITVAL_PACKED_ROW_PANELS_IO  ( BLIS_PACK_BIT | BLIS_BITVAL_IO  | BLIS_PACK_PANEL_BIT                    )
428 #define   BLIS_BITVAL_PACKED_COL_PANELS_IO  ( BLIS_PACK_BIT | BLIS_BITVAL_IO  | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
429 #define   BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT                    )
430 #define   BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
431 #define   BLIS_BITVAL_PACKED_ROW_PANELS_1E  ( BLIS_PACK_BIT | BLIS_BITVAL_1E  | BLIS_PACK_PANEL_BIT                    )
432 #define   BLIS_BITVAL_PACKED_COL_PANELS_1E  ( BLIS_PACK_BIT | BLIS_BITVAL_1E  | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
433 #define   BLIS_BITVAL_PACKED_ROW_PANELS_1R  ( BLIS_PACK_BIT | BLIS_BITVAL_1R  | BLIS_PACK_PANEL_BIT                    )
434 #define   BLIS_BITVAL_PACKED_COL_PANELS_1R  ( BLIS_PACK_BIT | BLIS_BITVAL_1R  | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
435 #define BLIS_BITVAL_PACK_FWD_IF_UPPER         0x0
436 #define BLIS_BITVAL_PACK_REV_IF_UPPER         BLIS_PACK_REV_IF_UPPER_BIT
437 #define BLIS_BITVAL_PACK_FWD_IF_LOWER         0x0
438 #define BLIS_BITVAL_PACK_REV_IF_LOWER         BLIS_PACK_REV_IF_LOWER_BIT
439 #define BLIS_BITVAL_BUFFER_FOR_A_BLOCK        0x0
440 #define BLIS_BITVAL_BUFFER_FOR_B_PANEL      ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
441 #define BLIS_BITVAL_BUFFER_FOR_C_PANEL      ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
442 #define BLIS_BITVAL_BUFFER_FOR_GEN_USE      ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
443 #define BLIS_BITVAL_GENERAL                   0x0
444 #define BLIS_BITVAL_HERMITIAN               ( 0x1 << BLIS_STRUC_SHIFT )
445 #define BLIS_BITVAL_SYMMETRIC               ( 0x2 << BLIS_STRUC_SHIFT )
446 #define BLIS_BITVAL_TRIANGULAR              ( 0x3 << BLIS_STRUC_SHIFT )
447 
448 
449 //
450 // -- BLIS enumerated type definitions -----------------------------------------
451 //
452 
453 // -- Operational parameter types --
454 
455 typedef enum
456 {
457 	BLIS_NO_TRANSPOSE      = 0x0,
458 	BLIS_TRANSPOSE         = BLIS_BITVAL_TRANS,
459 	BLIS_CONJ_NO_TRANSPOSE = BLIS_BITVAL_CONJ,
460 	BLIS_CONJ_TRANSPOSE    = BLIS_BITVAL_CONJ_TRANS
461 } trans_t;
462 
463 typedef enum
464 {
465 	BLIS_NO_CONJUGATE      = 0x0,
466 	BLIS_CONJUGATE         = BLIS_BITVAL_CONJ
467 } conj_t;
468 
469 typedef enum
470 {
471 	BLIS_ZEROS             = BLIS_BITVAL_ZEROS,
472 	BLIS_LOWER             = BLIS_BITVAL_LOWER,
473 	BLIS_UPPER             = BLIS_BITVAL_UPPER,
474 	BLIS_DENSE             = BLIS_BITVAL_DENSE
475 } uplo_t;
476 
477 typedef enum
478 {
479 	BLIS_LEFT              = 0x0,
480 	BLIS_RIGHT
481 } side_t;
482 
483 typedef enum
484 {
485 	BLIS_NONUNIT_DIAG      = 0x0,
486 	BLIS_UNIT_DIAG         = BLIS_BITVAL_UNIT_DIAG
487 } diag_t;
488 
489 typedef enum
490 {
491 	BLIS_NO_INVERT_DIAG    = 0x0,
492 	BLIS_INVERT_DIAG       = BLIS_BITVAL_INVERT_DIAG
493 } invdiag_t;
494 
495 typedef enum
496 {
497 	BLIS_GENERAL           = BLIS_BITVAL_GENERAL,
498 	BLIS_HERMITIAN         = BLIS_BITVAL_HERMITIAN,
499 	BLIS_SYMMETRIC         = BLIS_BITVAL_SYMMETRIC,
500 	BLIS_TRIANGULAR        = BLIS_BITVAL_TRIANGULAR
501 } struc_t;
502 
503 
504 // -- Data type --
505 
506 typedef enum
507 {
508 	BLIS_FLOAT             = BLIS_BITVAL_FLOAT_TYPE,
509 	BLIS_DOUBLE            = BLIS_BITVAL_DOUBLE_TYPE,
510 	BLIS_SCOMPLEX          = BLIS_BITVAL_SCOMPLEX_TYPE,
511 	BLIS_DCOMPLEX          = BLIS_BITVAL_DCOMPLEX_TYPE,
512 	BLIS_INT               = BLIS_BITVAL_INT_TYPE,
513 	BLIS_CONSTANT          = BLIS_BITVAL_CONST_TYPE,
514 	BLIS_DT_LO             = BLIS_FLOAT,
515 	BLIS_DT_HI             = BLIS_DCOMPLEX
516 } num_t;
517 
518 typedef enum
519 {
520 	BLIS_REAL              = BLIS_BITVAL_REAL,
521 	BLIS_COMPLEX           = BLIS_BITVAL_COMPLEX
522 } dom_t;
523 
524 typedef enum
525 {
526 	BLIS_SINGLE_PREC       = BLIS_BITVAL_SINGLE_PREC,
527 	BLIS_DOUBLE_PREC       = BLIS_BITVAL_DOUBLE_PREC
528 } prec_t;
529 
530 
531 // -- Pack schema type --
532 
533 typedef enum
534 {
535 	BLIS_NOT_PACKED            = BLIS_BITVAL_NOT_PACKED,
536 	BLIS_PACKED_UNSPEC         = BLIS_BITVAL_PACKED_UNSPEC,
537 	BLIS_PACKED_VECTOR         = BLIS_BITVAL_PACKED_UNSPEC,
538 	BLIS_PACKED_ROWS           = BLIS_BITVAL_PACKED_ROWS,
539 	BLIS_PACKED_COLUMNS        = BLIS_BITVAL_PACKED_COLUMNS,
540 	BLIS_PACKED_ROW_PANELS     = BLIS_BITVAL_PACKED_ROW_PANELS,
541 	BLIS_PACKED_COL_PANELS     = BLIS_BITVAL_PACKED_COL_PANELS,
542 	BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
543 	BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
544 	BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
545 	BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
546 	BLIS_PACKED_ROW_PANELS_4MS = BLIS_BITVAL_PACKED_ROW_PANELS_4MS,
547 	BLIS_PACKED_COL_PANELS_4MS = BLIS_BITVAL_PACKED_COL_PANELS_4MS,
548 	BLIS_PACKED_ROW_PANELS_3MS = BLIS_BITVAL_PACKED_ROW_PANELS_3MS,
549 	BLIS_PACKED_COL_PANELS_3MS = BLIS_BITVAL_PACKED_COL_PANELS_3MS,
550 	BLIS_PACKED_ROW_PANELS_RO  = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
551 	BLIS_PACKED_COL_PANELS_RO  = BLIS_BITVAL_PACKED_COL_PANELS_RO,
552 	BLIS_PACKED_ROW_PANELS_IO  = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
553 	BLIS_PACKED_COL_PANELS_IO  = BLIS_BITVAL_PACKED_COL_PANELS_IO,
554 	BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
555 	BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
556 	BLIS_PACKED_ROW_PANELS_1E  = BLIS_BITVAL_PACKED_ROW_PANELS_1E,
557 	BLIS_PACKED_COL_PANELS_1E  = BLIS_BITVAL_PACKED_COL_PANELS_1E,
558 	BLIS_PACKED_ROW_PANELS_1R  = BLIS_BITVAL_PACKED_ROW_PANELS_1R,
559 	BLIS_PACKED_COL_PANELS_1R  = BLIS_BITVAL_PACKED_COL_PANELS_1R
560 } pack_t;
561 
562 // We combine row and column packing into one "type", and we start
563 // with BLIS_PACKED_ROW_PANELS, _COLUMN_PANELS. We also count the
564 // schema pair for "4ms" (4m separated), because its bit value has
565 // been reserved, even though we don't use it.
566 #define BLIS_NUM_PACK_SCHEMA_TYPES 10
567 
568 
569 // -- Pack order type --
570 
571 typedef enum
572 {
573 	BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER,
574 	BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER,
575 
576 	BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER,
577 	BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER
578 } packord_t;
579 
580 
581 // -- Pack buffer type --
582 
583 typedef enum
584 {
585 	BLIS_BUFFER_FOR_A_BLOCK = BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
586 	BLIS_BUFFER_FOR_B_PANEL = BLIS_BITVAL_BUFFER_FOR_B_PANEL,
587 	BLIS_BUFFER_FOR_C_PANEL = BLIS_BITVAL_BUFFER_FOR_C_PANEL,
588 	BLIS_BUFFER_FOR_GEN_USE = BLIS_BITVAL_BUFFER_FOR_GEN_USE
589 } packbuf_t;
590 
591 
592 // -- Partitioning direction --
593 
594 typedef enum
595 {
596 	BLIS_FWD,
597 	BLIS_BWD
598 } dir_t;
599 
600 
601 // -- Subpartition type --
602 
603 typedef enum
604 {
605 	BLIS_SUBPART0,
606 	BLIS_SUBPART1,
607 	BLIS_SUBPART2,
608 	BLIS_SUBPART1AND0,
609 	BLIS_SUBPART1AND2,
610 	BLIS_SUBPART1A,
611 	BLIS_SUBPART1B,
612 	BLIS_SUBPART00,
613 	BLIS_SUBPART10,
614 	BLIS_SUBPART20,
615 	BLIS_SUBPART01,
616 	BLIS_SUBPART11,
617 	BLIS_SUBPART21,
618 	BLIS_SUBPART02,
619 	BLIS_SUBPART12,
620 	BLIS_SUBPART22
621 } subpart_t;
622 
623 
624 // -- Matrix dimension type --
625 
626 typedef enum
627 {
628 	BLIS_M = 0,
629 	BLIS_N = 1
630 } mdim_t;
631 
632 
633 // -- Machine parameter types --
634 
635 typedef enum
636 {
637 	BLIS_MACH_EPS = 0,
638 	BLIS_MACH_SFMIN,
639 	BLIS_MACH_BASE,
640 	BLIS_MACH_PREC,
641 	BLIS_MACH_NDIGMANT,
642 	BLIS_MACH_RND,
643 	BLIS_MACH_EMIN,
644 	BLIS_MACH_RMIN,
645 	BLIS_MACH_EMAX,
646 	BLIS_MACH_RMAX,
647 	BLIS_MACH_EPS2
648 } machval_t;
649 
650 #define BLIS_NUM_MACH_PARAMS   11
651 #define BLIS_MACH_PARAM_FIRST  BLIS_MACH_EPS
652 #define BLIS_MACH_PARAM_LAST   BLIS_MACH_EPS2
653 
654 
655 // -- Induced method types --
656 
657 typedef enum
658 {
659 	BLIS_3MH       = 0,
660 	BLIS_3M1,
661 	BLIS_4MH,
662 	BLIS_4M1B,
663 	BLIS_4M1A,
664 	BLIS_1M,
665 	BLIS_NAT,
666 	BLIS_IND_FIRST = 0,
667 	BLIS_IND_LAST  = BLIS_NAT
668 } ind_t;
669 
670 #define BLIS_NUM_IND_METHODS (BLIS_NAT+1)
671 
672 // These are used in bli_*_oapi.c to construct the ind_t values from
673 // the induced method substrings that go into function names.
674 #define bli_3mh  BLIS_3MH
675 #define bli_3m1  BLIS_3M1
676 #define bli_4mh  BLIS_4MH
677 #define bli_4mb  BLIS_4M1B
678 #define bli_4m1  BLIS_4M1A
679 #define bli_1m   BLIS_1M
680 #define bli_nat  BLIS_NAT
681 
682 
683 // -- Kernel ID types --
684 
685 typedef enum
686 {
687 	BLIS_ADDV_KER  = 0,
688 	BLIS_AMAXV_KER,
689 	BLIS_AXPBYV_KER,
690 	BLIS_AXPYV_KER,
691 	BLIS_COPYV_KER,
692 	BLIS_DOTV_KER,
693 	BLIS_DOTXV_KER,
694 	BLIS_INVERTV_KER,
695 	BLIS_SCALV_KER,
696 	BLIS_SCAL2V_KER,
697 	BLIS_SETV_KER,
698 	BLIS_SUBV_KER,
699 	BLIS_SWAPV_KER,
700 	BLIS_XPBYV_KER
701 } l1vkr_t;
702 
703 #define BLIS_NUM_LEVEL1V_KERS 14
704 
705 
706 typedef enum
707 {
708 	BLIS_AXPY2V_KER = 0,
709 	BLIS_DOTAXPYV_KER,
710 	BLIS_AXPYF_KER,
711 	BLIS_DOTXF_KER,
712 	BLIS_DOTXAXPYF_KER
713 } l1fkr_t;
714 
715 #define BLIS_NUM_LEVEL1F_KERS 5
716 
717 
718 typedef enum
719 {
720 	BLIS_PACKM_0XK_KER  = 0,
721 	BLIS_PACKM_1XK_KER  = 1,
722 	BLIS_PACKM_2XK_KER  = 2,
723 	BLIS_PACKM_3XK_KER  = 3,
724 	BLIS_PACKM_4XK_KER  = 4,
725 	BLIS_PACKM_5XK_KER  = 5,
726 	BLIS_PACKM_6XK_KER  = 6,
727 	BLIS_PACKM_7XK_KER  = 7,
728 	BLIS_PACKM_8XK_KER  = 8,
729 	BLIS_PACKM_9XK_KER  = 9,
730 	BLIS_PACKM_10XK_KER = 10,
731 	BLIS_PACKM_11XK_KER = 11,
732 	BLIS_PACKM_12XK_KER = 12,
733 	BLIS_PACKM_13XK_KER = 13,
734 	BLIS_PACKM_14XK_KER = 14,
735 	BLIS_PACKM_15XK_KER = 15,
736 	BLIS_PACKM_16XK_KER = 16,
737 	BLIS_PACKM_17XK_KER = 17,
738 	BLIS_PACKM_18XK_KER = 18,
739 	BLIS_PACKM_19XK_KER = 19,
740 	BLIS_PACKM_20XK_KER = 20,
741 	BLIS_PACKM_21XK_KER = 21,
742 	BLIS_PACKM_22XK_KER = 22,
743 	BLIS_PACKM_23XK_KER = 23,
744 	BLIS_PACKM_24XK_KER = 24,
745 	BLIS_PACKM_25XK_KER = 25,
746 	BLIS_PACKM_26XK_KER = 26,
747 	BLIS_PACKM_27XK_KER = 27,
748 	BLIS_PACKM_28XK_KER = 28,
749 	BLIS_PACKM_29XK_KER = 29,
750 	BLIS_PACKM_30XK_KER = 30,
751 	BLIS_PACKM_31XK_KER = 31,
752 
753 	BLIS_UNPACKM_0XK_KER  = 0,
754 	BLIS_UNPACKM_1XK_KER  = 1,
755 	BLIS_UNPACKM_2XK_KER  = 2,
756 	BLIS_UNPACKM_3XK_KER  = 3,
757 	BLIS_UNPACKM_4XK_KER  = 4,
758 	BLIS_UNPACKM_5XK_KER  = 5,
759 	BLIS_UNPACKM_6XK_KER  = 6,
760 	BLIS_UNPACKM_7XK_KER  = 7,
761 	BLIS_UNPACKM_8XK_KER  = 8,
762 	BLIS_UNPACKM_9XK_KER  = 9,
763 	BLIS_UNPACKM_10XK_KER = 10,
764 	BLIS_UNPACKM_11XK_KER = 11,
765 	BLIS_UNPACKM_12XK_KER = 12,
766 	BLIS_UNPACKM_13XK_KER = 13,
767 	BLIS_UNPACKM_14XK_KER = 14,
768 	BLIS_UNPACKM_15XK_KER = 15,
769 	BLIS_UNPACKM_16XK_KER = 16,
770 	BLIS_UNPACKM_17XK_KER = 17,
771 	BLIS_UNPACKM_18XK_KER = 18,
772 	BLIS_UNPACKM_19XK_KER = 19,
773 	BLIS_UNPACKM_20XK_KER = 20,
774 	BLIS_UNPACKM_21XK_KER = 21,
775 	BLIS_UNPACKM_22XK_KER = 22,
776 	BLIS_UNPACKM_23XK_KER = 23,
777 	BLIS_UNPACKM_24XK_KER = 24,
778 	BLIS_UNPACKM_25XK_KER = 25,
779 	BLIS_UNPACKM_26XK_KER = 26,
780 	BLIS_UNPACKM_27XK_KER = 27,
781 	BLIS_UNPACKM_28XK_KER = 28,
782 	BLIS_UNPACKM_29XK_KER = 29,
783 	BLIS_UNPACKM_30XK_KER = 30,
784 	BLIS_UNPACKM_31XK_KER = 31
785 
786 } l1mkr_t;
787 
788 #define BLIS_NUM_PACKM_KERS   32
789 #define BLIS_NUM_UNPACKM_KERS 32
790 
791 
792 typedef enum
793 {
794 	BLIS_GEMM_UKR = 0,
795 	BLIS_GEMMTRSM_L_UKR,
796 	BLIS_GEMMTRSM_U_UKR,
797 	BLIS_TRSM_L_UKR,
798 	BLIS_TRSM_U_UKR
799 } l3ukr_t;
800 
801 #define BLIS_NUM_LEVEL3_UKRS 5
802 
803 
804 typedef enum
805 {
806 	BLIS_REFERENCE_UKERNEL = 0,
807 	BLIS_VIRTUAL_UKERNEL,
808 	BLIS_OPTIMIZED_UKERNEL,
809 	BLIS_NOTAPPLIC_UKERNEL
810 } kimpl_t;
811 
812 #define BLIS_NUM_UKR_IMPL_TYPES 4
813 
814 
815 #if 0
816 typedef enum
817 {
818 	// RV = row-stored, contiguous vector-loading
819 	// RG = row-stored, non-contiguous gather-loading
820 	// CV = column-stored, contiguous vector-loading
821 	// CG = column-stored, non-contiguous gather-loading
822 
823 	// RD = row-stored, dot-based
824 	// CD = col-stored, dot-based
825 
826 	// RC = row-stored, column-times-column
827 	// CR = column-stored, row-times-row
828 
829 	// GX = general-stored generic implementation
830 
831 	BLIS_GEMMSUP_RV_UKR = 0,
832 	BLIS_GEMMSUP_RG_UKR,
833 	BLIS_GEMMSUP_CV_UKR,
834 	BLIS_GEMMSUP_CG_UKR,
835 
836 	BLIS_GEMMSUP_RD_UKR,
837 	BLIS_GEMMSUP_CD_UKR,
838 
839 	BLIS_GEMMSUP_RC_UKR,
840 	BLIS_GEMMSUP_CR_UKR,
841 
842 	BLIS_GEMMSUP_GX_UKR,
843 } l3sup_t;
844 
845 #define BLIS_NUM_LEVEL3_SUP_UKRS 9
846 #endif
847 
848 
849 typedef enum
850 {
851 	// 3-operand storage combinations
852 	BLIS_RRR = 0,
853 	BLIS_RRC, // 1
854 	BLIS_RCR, // 2
855 	BLIS_RCC, // 3
856 	BLIS_CRR, // 4
857 	BLIS_CRC, // 5
858 	BLIS_CCR, // 6
859 	BLIS_CCC, // 7
860 	BLIS_XXX, // 8
861 
862 #if 0
863 	BLIS_RRG,
864 	BLIS_RCG,
865 	BLIS_RGR,
866 	BLIS_RGC,
867 	BLIS_RGG,
868 	BLIS_CRG,
869 	BLIS_CCG,
870 	BLIS_CGR,
871 	BLIS_CGC,
872 	BLIS_CGG,
873 	BLIS_GRR,
874 	BLIS_GRC,
875 	BLIS_GRG,
876 	BLIS_GCR,
877 	BLIS_GCC,
878 	BLIS_GCG,
879 	BLIS_GGR,
880 	BLIS_GGC,
881 	BLIS_GGG,
882 #endif
883 } stor3_t;
884 
885 #define BLIS_NUM_3OP_RC_COMBOS 9
886 //#define BLIS_NUM_3OP_RCG_COMBOS 27
887 
888 
889 #if 0
890 typedef enum
891 {
892 	BLIS_JC_IDX = 0,
893 	BLIS_PC_IDX,
894 	BLIS_IC_IDX,
895 	BLIS_JR_IDX,
896 	BLIS_IR_IDX,
897 	BLIS_PR_IDX
898 } thridx_t;
899 #endif
900 
901 #define BLIS_NUM_LOOPS 6
902 
903 
904 // -- Operation ID type --
905 
906 typedef enum
907 {
908 //
909 // NOTE: If/when additional type values are added to this enum,
910 // you must either:
911 // - keep the level-3 values (starting with _GEMM) beginning at
912 //   index 0; or
913 // - if the value range is moved such that it does not begin at
914 //   index 0, implement something like a BLIS_OPID_LEVEL3_RANGE_START
915 //   value that can be subtracted from the opid_t value to map it
916 //   to a zero-based range.
917 // This is needed because these level-3 opid_t values are used in
918 // bli_l3_ind.c to index into arrays.
919 //
920 	BLIS_GEMM = 0,
921 	BLIS_GEMMT,
922 	BLIS_HEMM,
923 	BLIS_HERK,
924 	BLIS_HER2K,
925 	BLIS_SYMM,
926 	BLIS_SYRK,
927 	BLIS_SYR2K,
928 	BLIS_TRMM3,
929 	BLIS_TRMM,
930 	BLIS_TRSM,
931 
932 	BLIS_NOID
933 } opid_t;
934 
935 #define BLIS_NUM_LEVEL3_OPS 11
936 
937 
938 // -- Blocksize ID type --
939 
940 typedef enum
941 {
942 	// NOTE: the level-3 blocksizes MUST be indexed starting at zero.
943 	// At one point, we made this assumption in bli_cntx_set_blkszs()
944 	// and friends.
945 
946 	BLIS_KR = 0,
947 	BLIS_MR,
948 	BLIS_NR,
949 	BLIS_MC,
950 	BLIS_KC,
951 	BLIS_NC,
952 
953 	BLIS_M2, // level-2 blocksize in m dimension
954 	BLIS_N2, // level-2 blocksize in n dimension
955 
956 	BLIS_AF, // level-1f axpyf fusing factor
957 	BLIS_DF, // level-1f dotxf fusing factor
958 	BLIS_XF, // level-1f dotxaxpyf fusing factor
959 
960 	BLIS_NO_PART  // used as a placeholder when blocksizes are not applicable.
961 } bszid_t;
962 
963 #define BLIS_NUM_BLKSZS 11
964 
965 
966 // -- Threshold ID type --
967 
968 typedef enum
969 {
970 	BLIS_MT = 0, // level-3 small/unpacked matrix threshold in m dimension
971 	BLIS_NT,     // level-3 small/unpacked matrix threshold in n dimension
972 	BLIS_KT      // level-3 small/unpacked matrix threshold in k dimension
973 
974 } threshid_t;
975 
976 #define BLIS_NUM_THRESH 3
977 
978 
979 // -- Architecture ID type --
980 
981 // NOTE: This typedef enum must be kept up-to-date with the arch_t
982 // string array in bli_arch.c. Whenever values are added/inserted
983 // OR if values are rearranged, be sure to update the string array
984 // in bli_arch.c.
985 
986 typedef enum
987 {
988 	// NOTE: The C language standard guarantees that the first enum value
989 	// starts at 0.
990 
991 	// Intel
992 	BLIS_ARCH_SKX,
993 	BLIS_ARCH_KNL,
994 	BLIS_ARCH_KNC,
995 	BLIS_ARCH_HASWELL,
996 	BLIS_ARCH_SANDYBRIDGE,
997 	BLIS_ARCH_PENRYN,
998 
999 	// AMD
1000 	BLIS_ARCH_ZEN2,
1001 	BLIS_ARCH_ZEN,
1002 	BLIS_ARCH_EXCAVATOR,
1003 	BLIS_ARCH_STEAMROLLER,
1004 	BLIS_ARCH_PILEDRIVER,
1005 	BLIS_ARCH_BULLDOZER,
1006 
1007 	// ARM
1008 	BLIS_ARCH_THUNDERX2,
1009 	BLIS_ARCH_CORTEXA57,
1010 	BLIS_ARCH_CORTEXA53,
1011 	BLIS_ARCH_CORTEXA15,
1012 	BLIS_ARCH_CORTEXA9,
1013 
1014 	// IBM/Power
1015 	BLIS_ARCH_POWER10,
1016 	BLIS_ARCH_POWER9,
1017 	BLIS_ARCH_POWER7,
1018 	BLIS_ARCH_BGQ,
1019 
1020 	// Generic architecture/configuration
1021 	BLIS_ARCH_GENERIC,
1022 
1023 	// The total number of defined architectures. This must be last in the
1024 	// list of enums since its definition assumes that the previous enum
1025 	// value (BLIS_ARCH_GENERIC) is given index num_archs-1.
1026 	BLIS_NUM_ARCHS
1027 
1028 } arch_t;
1029 
1030 // NOTE: This value must be updated to reflect the number of enum values
1031 // listed above for arch_t!
1032 #define BLIS_NUM_ARCHS 22
1033 
1034 
1035 //
1036 // -- BLIS misc. structure types -----------------------------------------------
1037 //
1038 
1039 // These headers must be included here (or earlier) because definitions they
1040 // provide are needed in the pool_t and related structs.
1041 #include "bli_pthread.h"
1042 #include "bli_malloc.h"
1043 
1044 // -- Pool block type --
1045 
1046 typedef struct
1047 {
1048 	void*     buf;
1049 	siz_t     block_size;
1050 
1051 } pblk_t;
1052 
1053 
1054 // -- Pool type --
1055 
1056 typedef struct
1057 {
1058 	void*     block_ptrs;
1059 	dim_t     block_ptrs_len;
1060 
1061 	dim_t     top_index;
1062 	dim_t     num_blocks;
1063 
1064 	siz_t     block_size;
1065 	siz_t     align_size;
1066 	siz_t     offset_size;
1067 
1068 	malloc_ft malloc_fp;
1069 	free_ft   free_fp;
1070 
1071 } pool_t;
1072 
1073 
1074 // -- Array type --
1075 
1076 typedef struct
1077 {
1078 	void*     buf;
1079 
1080 	siz_t     num_elem;
1081 	siz_t     elem_size;
1082 
1083 } array_t;
1084 
1085 
1086 // -- Locked pool-of-arrays-of-pools type --
1087 
1088 typedef struct
1089 {
1090 	bli_pthread_mutex_t mutex;
1091 	pool_t              pool;
1092 
1093 	siz_t               def_array_len;
1094 
1095 } apool_t;
1096 
1097 
1098 // -- packing block allocator: Locked set of pools type --
1099 
1100 typedef struct membrk_s
1101 {
1102 	pool_t              pools[3];
1103 	bli_pthread_mutex_t mutex;
1104 
1105 	// These fields are used for general-purpose allocation.
1106 	siz_t               align_size;
1107 	malloc_ft           malloc_fp;
1108 	free_ft             free_fp;
1109 
1110 } membrk_t;
1111 
1112 
1113 // -- Memory object type --
1114 
1115 typedef struct mem_s
1116 {
1117 	pblk_t    pblk;
1118 	packbuf_t buf_type;
1119 	pool_t*   pool;
1120 	siz_t     size;
1121 } mem_t;
1122 
1123 
1124 // -- Control tree node type --
1125 
1126 struct cntl_s
1127 {
1128 	// Basic fields (usually required).
1129 	opid_t         family;
1130 	bszid_t        bszid;
1131 	void_fp        var_func;
1132 	struct cntl_s* sub_prenode;
1133 	struct cntl_s* sub_node;
1134 
1135 	// Optional fields (needed only by some operations such as packm).
1136 	// NOTE: first field of params must be a uint64_t containing the size
1137 	// of the struct.
1138 	void*          params;
1139 
1140 	// Internal fields that track "cached" data.
1141 	mem_t          pack_mem;
1142 };
1143 typedef struct cntl_s cntl_t;
1144 
1145 
1146 // -- Blocksize object type --
1147 
1148 typedef struct blksz_s
1149 {
1150 	// Primary blocksize values.
1151 	dim_t  v[BLIS_NUM_FP_TYPES];
1152 
1153 	// Blocksize extensions.
1154 	dim_t  e[BLIS_NUM_FP_TYPES];
1155 
1156 } blksz_t;
1157 
1158 
1159 // -- Function pointer object type --
1160 
1161 typedef struct func_s
1162 {
1163 	// Kernel function address.
1164 	void_fp ptr[BLIS_NUM_FP_TYPES];
1165 
1166 } func_t;
1167 
1168 
1169 // -- Multi-boolean object type --
1170 
1171 typedef struct mbool_s
1172 {
1173 	bool v[BLIS_NUM_FP_TYPES];
1174 
1175 } mbool_t;
1176 
1177 
1178 // -- Auxiliary kernel info type --
1179 
1180 // Note: This struct is used by macro-kernels to package together extra
1181 // parameter values that may be of use to the micro-kernel without
1182 // cluttering up the micro-kernel interface itself.
1183 
1184 typedef struct
1185 {
1186 	// The pack schemas of A and B.
1187 	pack_t schema_a;
1188 	pack_t schema_b;
1189 
1190 	// Pointers to the micro-panels of A and B which will be used by the
1191 	// next call to the micro-kernel.
1192 	void*  a_next;
1193 	void*  b_next;
1194 
1195 	// The imaginary strides of A and B.
1196 	inc_t  is_a;
1197 	inc_t  is_b;
1198 
1199 	// The panel strides of A and B.
1200 	// NOTE: These are only used in situations where iteration over the
1201 	// micropanels takes place in part within the kernel code (e.g. sup
1202 	// millikernels).
1203 	inc_t  ps_a;
1204 	inc_t  ps_b;
1205 
1206 	// The type to convert to on output.
1207 	//num_t  dt_on_output;
1208 
1209 } auxinfo_t;
1210 
1211 
1212 // -- Global scalar constant data struct --
1213 
1214 // Note: This struct is used only when statically initializing the
1215 // global scalar constants in bli_const.c.
1216 typedef struct constdata_s
1217 {
1218 	float    s;
1219 	double   d;
1220 	scomplex c;
1221 	dcomplex z;
1222 	gint_t   i;
1223 
1224 } constdata_t;
1225 
1226 
1227 //
1228 // -- BLIS object type definitions ---------------------------------------------
1229 //
1230 
1231 typedef struct obj_s
1232 {
1233 	// Basic fields
1234 	struct obj_s* root;
1235 
1236 	dim_t         off[2];
1237 	dim_t         dim[2];
1238 	doff_t        diag_off;
1239 
1240 	objbits_t     info;
1241 	objbits_t     info2;
1242 	siz_t         elem_size;
1243 
1244 	void*         buffer;
1245 	inc_t         rs;
1246 	inc_t         cs;
1247 	inc_t         is;
1248 
1249 	// Bufferless scalar storage
1250 	atom_t        scalar;
1251 
1252 	// Pack-related fields
1253 	dim_t         m_padded; // m dimension of matrix, including any padding
1254 	dim_t         n_padded; // n dimension of matrix, including any padding
1255 	inc_t         ps;       // panel stride (distance to next panel)
1256 	inc_t         pd;       // panel dimension (the "width" of a panel:
1257 	                        // usually MR or NR)
1258 	dim_t         m_panel;  // m dimension of a "full" panel
1259 	dim_t         n_panel;  // n dimension of a "full" panel
1260 } obj_t;
1261 
1262 // Pre-initializors. Things that must be set afterwards:
1263 // - root object pointer
1264 // - info bitfields: dt, target_dt, exec_dt, comp_dt
1265 // - info2 bitfields: scalar_dt
1266 // - elem_size
1267 // - dims, strides
1268 // - buffer
1269 // - internal scalar buffer (must always set imaginary component)
1270 
1271 #define BLIS_OBJECT_INITIALIZER \
1272 { \
1273 	.root      = NULL, \
1274 \
1275 	.off       = { 0, 0 }, \
1276 	.dim       = { 0, 0 }, \
1277 	.diag_off  = 0, \
1278 \
1279 	.info      = 0x0 | BLIS_BITVAL_DENSE      | \
1280 	                   BLIS_BITVAL_GENERAL, \
1281 	.info2     = 0x0, \
1282 	.elem_size = sizeof( float ), /* this is changed later. */ \
1283 \
1284 	.buffer    = NULL, \
1285 	.rs        = 0, \
1286 	.cs        = 0, \
1287 	.is        = 1,  \
1288 \
1289 	.scalar    = { 0.0, 0.0 }, \
1290 \
1291 	.m_padded  = 0, \
1292 	.n_padded  = 0, \
1293 	.ps        = 0, \
1294 	.pd        = 0, \
1295 	.m_panel   = 0, \
1296 	.n_panel   = 0  \
1297 }
1298 
1299 #define BLIS_OBJECT_INITIALIZER_1X1 \
1300 { \
1301 	.root      = NULL, \
1302 \
1303 	.off       = { 0, 0 }, \
1304 	.dim       = { 1, 1 }, \
1305 	.diag_off  = 0, \
1306 \
1307 	.info      = 0x0 | BLIS_BITVAL_DENSE      | \
1308 	                   BLIS_BITVAL_GENERAL, \
1309 	.info2     = 0x0, \
1310 	.elem_size = sizeof( float ), /* this is changed later. */ \
1311 \
1312 	.buffer    = NULL, \
1313 	.rs        = 0, \
1314 	.cs        = 0, \
1315 	.is        = 1,  \
1316 \
1317 	.scalar    = { 0.0, 0.0 }, \
1318 \
1319 	.m_padded  = 0, \
1320 	.n_padded  = 0, \
1321 	.ps        = 0, \
1322 	.pd        = 0, \
1323 	.m_panel   = 0, \
1324 	.n_panel   = 0  \
1325 }
1326 
1327 // Define these macros here since they must be updated if contents of
1328 // obj_t changes.
1329 
bli_obj_init_full_shallow_copy_of(obj_t * a,obj_t * b)1330 BLIS_INLINE void bli_obj_init_full_shallow_copy_of( obj_t* a, obj_t* b )
1331 {
1332 	b->root      = a->root;
1333 
1334 	b->off[0]    = a->off[0];
1335 	b->off[1]    = a->off[1];
1336 	b->dim[0]    = a->dim[0];
1337 	b->dim[1]    = a->dim[1];
1338 	b->diag_off  = a->diag_off;
1339 
1340 	b->info      = a->info;
1341 	b->info2     = a->info2;
1342 	b->elem_size = a->elem_size;
1343 
1344 	b->buffer    = a->buffer;
1345 	b->rs        = a->rs;
1346 	b->cs        = a->cs;
1347 	b->is        = a->is;
1348 
1349 	b->scalar    = a->scalar;
1350 
1351 	//b->pack_mem  = a->pack_mem;
1352 	b->m_padded  = a->m_padded;
1353 	b->n_padded  = a->n_padded;
1354 	b->ps        = a->ps;
1355 	b->pd        = a->pd;
1356 	b->m_panel   = a->m_panel;
1357 	b->n_panel   = a->n_panel;
1358 }
1359 
bli_obj_init_subpart_from(obj_t * a,obj_t * b)1360 BLIS_INLINE void bli_obj_init_subpart_from( obj_t* a, obj_t* b )
1361 {
1362 	b->root      = a->root;
1363 
1364 	b->off[0]    = a->off[0];
1365 	b->off[1]    = a->off[1];
1366 	// Avoid copying m and n since they will be overwritten.
1367 	//b->dim[0]    = a->dim[0];
1368 	//b->dim[1]    = a->dim[1];
1369 	b->diag_off  = a->diag_off;
1370 
1371 	b->info      = a->info;
1372 	b->info2     = a->info2;
1373 	b->elem_size = a->elem_size;
1374 
1375 	b->buffer    = a->buffer;
1376 	b->rs        = a->rs;
1377 	b->cs        = a->cs;
1378 	b->is        = a->is;
1379 
1380 	b->scalar    = a->scalar;
1381 
1382 	// Avoid copying pack_mem entry.
1383 	// FGVZ: You should probably make sure this is right.
1384 	//b->pack_mem  = a->pack_mem;
1385 	b->m_padded  = a->m_padded;
1386 	b->n_padded  = a->n_padded;
1387 	b->ps        = a->ps;
1388 	b->pd        = a->pd;
1389 	b->m_panel   = a->m_panel;
1390 	b->n_panel   = a->n_panel;
1391 }
1392 
1393 // Initializors for global scalar constants.
1394 // NOTE: These must remain cpp macros since they are initializor
1395 // expressions, not functions.
1396 
1397 #define bli_obj_init_const( buffer0 ) \
1398 { \
1399 	.root      = NULL, \
1400 \
1401 	.off       = { 0, 0 }, \
1402 	.dim       = { 1, 1 }, \
1403 	.diag_off  = 0, \
1404 \
1405 	.info      = 0x0 | BLIS_BITVAL_CONST_TYPE | \
1406 	                   BLIS_BITVAL_DENSE      | \
1407 	                   BLIS_BITVAL_GENERAL, \
1408 	.info2     = 0x0, \
1409 	.elem_size = sizeof( constdata_t ), \
1410 \
1411 	.buffer    = buffer0, \
1412 	.rs        = 1, \
1413 	.cs        = 1, \
1414 	.is        = 1  \
1415 }
1416 
1417 #define bli_obj_init_constdata( val ) \
1418 { \
1419 	.s =           ( float  )val, \
1420 	.d =           ( double )val, \
1421 	.c = { .real = ( float  )val, .imag = 0.0f }, \
1422 	.z = { .real = ( double )val, .imag = 0.0 }, \
1423 	.i =           ( gint_t )val, \
1424 }
1425 
1426 
1427 // -- Context type --
1428 
1429 typedef struct cntx_s
1430 {
1431 	blksz_t   blkszs[ BLIS_NUM_BLKSZS ];
1432 	bszid_t   bmults[ BLIS_NUM_BLKSZS ];
1433 
1434 	func_t    l3_vir_ukrs[ BLIS_NUM_LEVEL3_UKRS ];
1435 	func_t    l3_nat_ukrs[ BLIS_NUM_LEVEL3_UKRS ];
1436 	mbool_t   l3_nat_ukrs_prefs[ BLIS_NUM_LEVEL3_UKRS ];
1437 
1438 	blksz_t   l3_sup_thresh[ BLIS_NUM_THRESH ];
1439 	void*     l3_sup_handlers[ BLIS_NUM_LEVEL3_OPS ];
1440 	blksz_t   l3_sup_blkszs[ BLIS_NUM_BLKSZS ];
1441 	func_t    l3_sup_kers[ BLIS_NUM_3OP_RC_COMBOS ];
1442 	mbool_t   l3_sup_kers_prefs[ BLIS_NUM_3OP_RC_COMBOS ];
1443 
1444 	func_t    l1f_kers[ BLIS_NUM_LEVEL1F_KERS ];
1445 	func_t    l1v_kers[ BLIS_NUM_LEVEL1V_KERS ];
1446 
1447 	func_t    packm_kers[ BLIS_NUM_PACKM_KERS ];
1448 	func_t    unpackm_kers[ BLIS_NUM_UNPACKM_KERS ];
1449 
1450 	ind_t     method;
1451 	pack_t    schema_a_block;
1452 	pack_t    schema_b_panel;
1453 	pack_t    schema_c_panel;
1454 
1455 } cntx_t;
1456 
1457 
1458 // -- Runtime type --
1459 
1460 // NOTE: The order of these fields must be kept consistent with the definition
1461 // of the BLIS_RNTM_INITIALIZER macro in bli_rntm.h.
1462 
1463 typedef struct rntm_s
1464 {
1465 	// "External" fields: these may be queried by the end-user.
1466 	bool      auto_factor;
1467 
1468 	dim_t     num_threads;
1469 	dim_t     thrloop[ BLIS_NUM_LOOPS ];
1470 	bool      pack_a; // enable/disable packing of left-hand matrix A.
1471 	bool      pack_b; // enable/disable packing of right-hand matrix B.
1472 	bool      l3_sup; // enable/disable small matrix handling in level-3 ops.
1473 
1474 	// "Internal" fields: these should not be exposed to the end-user.
1475 
1476 	// The small block pool, which is attached in the l3 thread decorator.
1477 	pool_t*   sba_pool;
1478 
1479 	// The packing block allocator, which is attached in the l3 thread decorator.
1480 	membrk_t* membrk;
1481 
1482 } rntm_t;
1483 
1484 
1485 // -- Error types --
1486 
1487 typedef enum
1488 {
1489 	BLIS_NO_ERROR_CHECKING = 0,
1490 	BLIS_FULL_ERROR_CHECKING
1491 } errlev_t;
1492 
1493 typedef enum
1494 {
1495 	// Generic error codes
1496 	BLIS_SUCCESS                               = (  -1),
1497 	BLIS_FAILURE                               = (  -2),
1498 
1499 	BLIS_ERROR_CODE_MIN                        = (  -9),
1500 
1501 	// General errors
1502 	BLIS_INVALID_ERROR_CHECKING_LEVEL          = ( -10),
1503 	BLIS_UNDEFINED_ERROR_CODE                  = ( -11),
1504 	BLIS_NULL_POINTER                          = ( -12),
1505 	BLIS_NOT_YET_IMPLEMENTED                   = ( -13),
1506 
1507 	// Parameter-specific errors
1508 	BLIS_INVALID_SIDE                          = ( -20),
1509 	BLIS_INVALID_UPLO                          = ( -21),
1510 	BLIS_INVALID_TRANS                         = ( -22),
1511 	BLIS_INVALID_CONJ                          = ( -23),
1512 	BLIS_INVALID_DIAG                          = ( -24),
1513 	BLIS_INVALID_MACHVAL                       = ( -25),
1514 	BLIS_EXPECTED_NONUNIT_DIAG                 = ( -26),
1515 
1516 	// Datatype-specific errors
1517 	BLIS_INVALID_DATATYPE                      = ( -30),
1518 	BLIS_EXPECTED_FLOATING_POINT_DATATYPE      = ( -31),
1519 	BLIS_EXPECTED_NONINTEGER_DATATYPE          = ( -32),
1520 	BLIS_EXPECTED_NONCONSTANT_DATATYPE         = ( -33),
1521 	BLIS_EXPECTED_REAL_DATATYPE                = ( -34),
1522 	BLIS_EXPECTED_INTEGER_DATATYPE             = ( -35),
1523 	BLIS_INCONSISTENT_DATATYPES                = ( -36),
1524 	BLIS_EXPECTED_REAL_PROJ_OF                 = ( -37),
1525 	BLIS_EXPECTED_REAL_VALUED_OBJECT           = ( -38),
1526 	BLIS_INCONSISTENT_PRECISIONS               = ( -39),
1527 
1528 	// Dimension-specific errors
1529 	BLIS_NONCONFORMAL_DIMENSIONS               = ( -40),
1530 	BLIS_EXPECTED_SCALAR_OBJECT                = ( -41),
1531 	BLIS_EXPECTED_VECTOR_OBJECT                = ( -42),
1532 	BLIS_UNEQUAL_VECTOR_LENGTHS                = ( -43),
1533 	BLIS_EXPECTED_SQUARE_OBJECT                = ( -44),
1534 	BLIS_UNEXPECTED_OBJECT_LENGTH              = ( -45),
1535 	BLIS_UNEXPECTED_OBJECT_WIDTH               = ( -46),
1536 	BLIS_UNEXPECTED_VECTOR_DIM                 = ( -47),
1537 	BLIS_UNEXPECTED_DIAG_OFFSET                = ( -48),
1538 	BLIS_NEGATIVE_DIMENSION                    = ( -49),
1539 
1540 	// Stride-specific errors
1541 	BLIS_INVALID_ROW_STRIDE                    = ( -50),
1542 	BLIS_INVALID_COL_STRIDE                    = ( -51),
1543 	BLIS_INVALID_DIM_STRIDE_COMBINATION        = ( -52),
1544 
1545 	// Structure-specific errors
1546 	BLIS_EXPECTED_GENERAL_OBJECT               = ( -60),
1547 	BLIS_EXPECTED_HERMITIAN_OBJECT             = ( -61),
1548 	BLIS_EXPECTED_SYMMETRIC_OBJECT             = ( -62),
1549 	BLIS_EXPECTED_TRIANGULAR_OBJECT            = ( -63),
1550 
1551 	// Storage-specific errors
1552 	BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT        = ( -70),
1553 
1554 	// Partitioning-specific errors
1555 	BLIS_INVALID_3x1_SUBPART                   = ( -80),
1556 	BLIS_INVALID_1x3_SUBPART                   = ( -81),
1557 	BLIS_INVALID_3x3_SUBPART                   = ( -82),
1558 
1559 	// Control tree-specific errors
1560 	BLIS_UNEXPECTED_NULL_CONTROL_TREE          = ( -90),
1561 
1562 	// Packing-specific errors
1563 	BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK  = (-100),
1564 
1565 	// Buffer-specific errors
1566 	BLIS_EXPECTED_NONNULL_OBJECT_BUFFER        = (-110),
1567 
1568 	// Memory errors
1569 	BLIS_MALLOC_RETURNED_NULL                  = (-120),
1570 
1571 	// Internal memory pool errors
1572 	BLIS_INVALID_PACKBUF                       = (-130),
1573 	BLIS_EXHAUSTED_CONTIG_MEMORY_POOL          = (-131),
1574 	BLIS_INSUFFICIENT_STACK_BUF_SIZE           = (-132),
1575 	BLIS_ALIGNMENT_NOT_POWER_OF_TWO            = (-133),
1576 	BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE        = (-134),
1577 
1578 	// Object-related errors
1579 	BLIS_EXPECTED_OBJECT_ALIAS                 = (-140),
1580 
1581 	// Architecture-related errors
1582 	BLIS_INVALID_ARCH_ID                       = (-150),
1583 	BLIS_UNINITIALIZED_GKS_CNTX                = (-151),
1584 
1585 	// Blocksize-related errors
1586 	BLIS_MC_DEF_NONMULTIPLE_OF_MR              = (-160),
1587 	BLIS_MC_MAX_NONMULTIPLE_OF_MR              = (-161),
1588 	BLIS_NC_DEF_NONMULTIPLE_OF_NR              = (-162),
1589 	BLIS_NC_MAX_NONMULTIPLE_OF_NR              = (-163),
1590 	BLIS_KC_DEF_NONMULTIPLE_OF_KR              = (-164),
1591 	BLIS_KC_MAX_NONMULTIPLE_OF_KR              = (-165),
1592 
1593 	BLIS_ERROR_CODE_MAX                        = (-170)
1594 } err_t;
1595 
1596 #endif
1597