1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #ifndef KERNEL_EXTRA_H_
19 #define KERNEL_EXTRA_H_
20 
21 #include <cltypes.h>
22 
23 enum {
24     MAX_SOLVER_PRIVATE_SIZE = 16
25 };
26 
27 //
28 // Moving BUILD_OPTS_MAXLEN here. Originally in clblas-internal.h
29 //      Including "clblas-internal.h"
30 enum {
31     MEMPAT_PER_BLASFN = 8,
32     BUILD_OPTS_MAXLEN = 256
33 };
34 
35 /**
36  * @internal
37  * @brief BLAS kernel type identifiers
38  *
39  * @ingroup BLAS_SOLVERIF_SPEC
40  */
41 typedef enum CLBlasKernelType {
42     CLBLAS_COMPUTING_KERNEL,        /**< Main computing kernel */
43     CLBLAS_PREP_A_KERNEL,           /**< Kernel preparing matrix A */
44     CLBLAS_PREP_B_KERNEL,           /**< Kernel preparing matrix B */
45     MAX_CLBLAS_KERNELS_PER_STEP
46 } CLBlasKernelType;
47 
48 /**
49  * @internal
50  * @defgroup BLAS_SOLVERIF_SPEC BLAS specifics
51  * @ingroup SOLVERIF
52  */
53 
54 /*@{*/
55 
56 /**
57  * @brief BLAS kernel flags
58  *
59  * These flags uniquely determine problem options kernels are generated for
60  */
61 typedef enum KernelExtraFlags {
62     /** Matches to a problem without any options */
63     KEXTRA_NO_FLAGS = 0,
64     KEXTRA_TRANS_A = 0x01,      /**< Matrix A should be transposed */
65     /** matrix A should be took in the conjugate form */
66     KEXTRA_CONJUGATE_A = 0x02,
67     KEXTRA_TRANS_B = 0x04,      /**< matrix B should be transposed */
68     /** Matrix B should be taken in the conjugate form */
69     KEXTRA_CONJUGATE_B = 0x08,
70     KEXTRA_COLUMN_MAJOR = 0x10, /**< Order is column major */
71     /**
72      * Matrix A is upper triangular, it is lower triangular
73      * if this flag is not set
74      */
75     KEXTRA_UPPER_TRIANG = 0x20,
76     /**
77      * Matrix A is placed on the right, it is placed
78      * on the left if this flag is not set
79      */
80     KEXTRA_SIDE_RIGHT = 0x40,
81     /**
82      * Unit diagonal matrix
83      */
84     KEXTRA_UNIT_DIAGONAL = 0x80,
85     /** kernel should process tails of upper level blocks in M dimension */
86     KEXTRA_TAILS_M = 0x100,
87     /** kernel should process tails of upper level blocks in N dimension */
88     KEXTRA_TAILS_N = 0x200,
89     /** kernel should process tails of upper level blocks in K dimension */
90     KEXTRA_TAILS_K = 0x400,
91     /** Beta multiplier is zero */
92     KEXTRA_BETA_ZERO = 0x800,
93     /** Disable vectorization at block copying for matrix A */
94     KEXTRA_NO_COPY_VEC_A = 0x1000,
95     /** Disable vectorization at block copying for matrix B */
96     KEXTRA_NO_COPY_VEC_B = 0x2000,
97     /** Disable vectorization at block copying for matrix C */
98     KEXTRA_NO_COPY_VEC_C = 0x4000,
99     // SYRXK specific flags
100     /** Diagonal solution blocks are evaluated in a separate kernel */
101     KEXTRA_SYRK_SEPARATE_DIAGONAL = 0x8000,
102     /** Evaluate diagonal solution blocks for a SYRXK function */
103     KEXTRA_SYRK_EVALUATE_DIAGONAL = 0x10000,
104     /** 2k rank update */
105     KEXTRA_SYRK_2K_RANK = 0x20000,
106     // BLAS2 specific flags
107     /** Incx increment is one */
108     KEXTRA_INCX_ONE = 0x40000,
109     /** Incy increment is one */
110     KEXTRA_INCY_ONE = 0x80000,
111     // Generator specific flags
112     /** MAD function can be used */
113     // FIXME: throw this kludge away
114     KEXTRA_ENABLE_MAD = 0x100000,
115     // FIXME: It's a kludge, pass further DeviceIndent structure to generators
116     KEXTRA_VENDOR_AMD = 0x200000,
117     /* Flags showing not zero starting offsets for kernels */
118     KEXTRA_STARTM_NOT_ZERO = 0x400000,
119     KEXTRA_STARTN_NOT_ZERO = 0x800000,
120     //KEXTRA_STARTK_NOT_ZERO = 0x2000000,
121     /** Matrix A offset in a memory object is not zero */
122     KEXTRA_A_OFF_NOT_ZERO = 0x1000000,
123     /** Matrix B or vector X offset in a memory object is not zero */
124     KEXTRA_BX_OFF_NOT_ZERO = 0x2000000,
125     /** Matrix C or vector Y offset in a memory object is not zero */
126     KEXTRA_CY_OFF_NOT_ZERO = 0x4000000,
127     /** kernel should process tails of lower level blocks in M dimension */
128     KEXTRA_TAILS_M_LOWER = 0x8000000,
129     /** kernel should process tails of lower level blocks in N dimension */
130     KEXTRA_TAILS_N_LOWER = 0x10000000,
131     /** kernel should process tails of lower level blocks in K dimension */
132     KEXTRA_TAILS_K_LOWER = 0x20000000
133 } KernelExtraFlags;
134 
135 /**
136  * @internal
137  * @brief extra information CLBLAS kernel generator
138  * @ingroup BLAS_SOLVERIF_SPEC
139  */
140 typedef struct CLBLASKernExtra {
141     DataType dtype;             /**< Data type */
142     KernelExtraFlags flags;     /**< Kernel flags identifying a problem */
143     CLBlasKernelType kernType;  /**< Kernel type */
144     // Fixme: Deprecate it; now it is just for backward compatibility
145     unsigned int vecLen;        /**< vector length to evaluate with */
146     /** vector length for matrix A elements to evaluate with */
147     unsigned int vecLenA;
148     /** vector length for matrix B elements to evaluate with */
149     unsigned int vecLenB;
150     /*
151      * FIXME: remove this kludge; vectorization for the result should be
152      *        autodetected
153      */
154     unsigned int vecLenC;
155     char solverPriv[MAX_SOLVER_PRIVATE_SIZE];
156     char buildOptions[BUILD_OPTS_MAXLEN]; // Build Flags used for the kernel call
157 } CLBLASKernExtra;
158 
159 /*
160  * function to compare blas kernels extra information
161  */
162 int
163 clblasKernelExtraCmp(const void *extra, const void *extraKey);
164 
165 /*@}*/
166 
167 #endif /* KERNEL_EXTRA_H_ */
168