1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #ifndef GEN_HELPER_H_
19 #define GEN_HELPER_H_
20 
21 #include <kerngen.h>
22 #include <dblock_kgen.h>
23 #include <matrix_props.h>
24 
25 #include "blas_kgen.h"
26 
27 typedef enum BufCopyHelperFlags {
28     // buffer copy functions are needed for matrix A blocks
29     BCHF_MATRIX_A = 0x01,
30     // buffer copy functions are needed for matrix B blocks
31     BCHF_MATRIX_B = 0x02,
32     /*
33      * read block of output matrix
34      * (either B or C)
35      */
36     BCHF_READ_OUTPUT = 0x04,
37     // write block of output matrix
38     BCHF_WRITE_OUTPUT = 0x08,
39     // not unroll loops in transposing versions of customized generators
40     BCHF_NOT_UNROLL_TRANSPOSE = 0x10,
41     // output to image
42     BCHF_IMAGE_WRITE = 0x20
43 } BufCopyHelperFlags;
44 
45 typedef enum ZeroGenHelperFlags {
46     ZF_MATRIX_A = 0x01,
47     ZF_MATRIX_B = 0x02,
48     ZF_MATRIX_C = 0x04
49 } ZeroGenHelperFlags;
50 
51 /*
52  * Name of functions copying matrix blocks between the global
53  * and the local memory. Contains customized and generic transposing
54  * or not transposing variants for reading and writing back depending on
55  * generator flags, for all the matrices.
56  *
57  * A function name contained in a 'read*' field matches to a function
58  * copying data from the global memory to the local, and this one
59  * contained in a 'write*' field matches to a function copying in
60  * inverse direction.
61  */
62 typedef struct CopyBufFuncs {
63     char read[MATRIX_ROLES_NUMBER][FUNC_NAME_MAXLEN];
64     char write[FUNC_NAME_MAXLEN];
65     char readGeneric[MATRIX_ROLES_NUMBER][FUNC_NAME_MAXLEN];
66     char writeGeneric[FUNC_NAME_MAXLEN];
67 } CopyBufFuncs;
68 
69 /*
70  * Generate all needed functions copying matrix
71  * blocks between the global and the local memory
72  *
73  * @funcs: function names structure
74  * @ctx: generator context
75  * @funcID: function ID
76  * @gset: generator settings
77  * @flags: helper flags
78  *
79  * The 'flags' field of the 'gset' structure must store flags from
80  * the 'BufCopyHelperFlags' enumeration
81  *
82  * Name of functions dealing with blocks of the output matrix
83  * are always stored to 'MATRIX_C' name fields.
84  *
85  * On success returns 0. If generation fails due
86  * to buffer overflowing, returns -1.
87  */
88 int
89 generateBufCopyFuncs(
90     CopyBufFuncs *funcNames,
91     struct KgenContext *ctx,
92     BlasFunctionID funcID,
93     const BlasGenSettings *gset,
94     BufCopyHelperFlags flags);
95 
96 /*
97  * Have the same semantics as the previous helper,
98  * but generate functions for zeroing local buffers.
99  */
100 int
101 generateZeroingFuncs(
102     ZeroFuncs *funcNames,
103     struct KgenContext *ctx,
104     const SubproblemDim *blasDim,
105     const PGranularity *pgran,
106     DataType dtype,
107     ZeroGenHelperFlags flags);
108 
109 UpdateResultFlags
110 kextraToUpresFlags(BlasFunctionID, KernelExtraFlags kflags);
111 
112 int
113 generateResultUpdate(
114     struct KgenContext *ctx,
115     BlasFunctionID funcID,
116     const BlasGenSettings *gset,
117     const char *optFuncName,
118     const char *genericFuncName);
119 
120 int
121 genResultUpdateWithFlags(
122     struct KgenContext *ctx,
123     BlasFunctionID funcID,
124     const BlasGenSettings *gset,
125     UpdateResultFlags flags,
126     const char *optFuncName,
127     const char *genericFuncName,
128     const char *cachedName);
129 
130 void checkGenBeginHitMatrixBlock(
131     struct KgenContext *ctx,
132     KernelExtraFlags kflags);
133 
134 void checkGenEndHitMatrixBlock(
135     struct KgenContext *ctx,
136     KernelExtraFlags kflags);
137 
138 #endif /* GEN_HELPER_H_ */
139