1 /* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17 18 #ifndef GEN_HELPER_H_ 19 #define GEN_HELPER_H_ 20 21 #include <kerngen.h> 22 #include <dblock_kgen.h> 23 #include <matrix_props.h> 24 25 #include "blas_kgen.h" 26 27 typedef enum BufCopyHelperFlags { 28 // buffer copy functions are needed for matrix A blocks 29 BCHF_MATRIX_A = 0x01, 30 // buffer copy functions are needed for matrix B blocks 31 BCHF_MATRIX_B = 0x02, 32 /* 33 * read block of output matrix 34 * (either B or C) 35 */ 36 BCHF_READ_OUTPUT = 0x04, 37 // write block of output matrix 38 BCHF_WRITE_OUTPUT = 0x08, 39 // not unroll loops in transposing versions of customized generators 40 BCHF_NOT_UNROLL_TRANSPOSE = 0x10, 41 // output to image 42 BCHF_IMAGE_WRITE = 0x20 43 } BufCopyHelperFlags; 44 45 typedef enum ZeroGenHelperFlags { 46 ZF_MATRIX_A = 0x01, 47 ZF_MATRIX_B = 0x02, 48 ZF_MATRIX_C = 0x04 49 } ZeroGenHelperFlags; 50 51 /* 52 * Name of functions copying matrix blocks between the global 53 * and the local memory. Contains customized and generic transposing 54 * or not transposing variants for reading and writing back depending on 55 * generator flags, for all the matrices. 56 * 57 * A function name contained in a 'read*' field matches to a function 58 * copying data from the global memory to the local, and this one 59 * contained in a 'write*' field matches to a function copying in 60 * inverse direction. 61 */ 62 typedef struct CopyBufFuncs { 63 char read[MATRIX_ROLES_NUMBER][FUNC_NAME_MAXLEN]; 64 char write[FUNC_NAME_MAXLEN]; 65 char readGeneric[MATRIX_ROLES_NUMBER][FUNC_NAME_MAXLEN]; 66 char writeGeneric[FUNC_NAME_MAXLEN]; 67 } CopyBufFuncs; 68 69 /* 70 * Generate all needed functions copying matrix 71 * blocks between the global and the local memory 72 * 73 * @funcs: function names structure 74 * @ctx: generator context 75 * @funcID: function ID 76 * @gset: generator settings 77 * @flags: helper flags 78 * 79 * The 'flags' field of the 'gset' structure must store flags from 80 * the 'BufCopyHelperFlags' enumeration 81 * 82 * Name of functions dealing with blocks of the output matrix 83 * are always stored to 'MATRIX_C' name fields. 84 * 85 * On success returns 0. If generation fails due 86 * to buffer overflowing, returns -1. 87 */ 88 int 89 generateBufCopyFuncs( 90 CopyBufFuncs *funcNames, 91 struct KgenContext *ctx, 92 BlasFunctionID funcID, 93 const BlasGenSettings *gset, 94 BufCopyHelperFlags flags); 95 96 /* 97 * Have the same semantics as the previous helper, 98 * but generate functions for zeroing local buffers. 99 */ 100 int 101 generateZeroingFuncs( 102 ZeroFuncs *funcNames, 103 struct KgenContext *ctx, 104 const SubproblemDim *blasDim, 105 const PGranularity *pgran, 106 DataType dtype, 107 ZeroGenHelperFlags flags); 108 109 UpdateResultFlags 110 kextraToUpresFlags(BlasFunctionID, KernelExtraFlags kflags); 111 112 int 113 generateResultUpdate( 114 struct KgenContext *ctx, 115 BlasFunctionID funcID, 116 const BlasGenSettings *gset, 117 const char *optFuncName, 118 const char *genericFuncName); 119 120 int 121 genResultUpdateWithFlags( 122 struct KgenContext *ctx, 123 BlasFunctionID funcID, 124 const BlasGenSettings *gset, 125 UpdateResultFlags flags, 126 const char *optFuncName, 127 const char *genericFuncName, 128 const char *cachedName); 129 130 void checkGenBeginHitMatrixBlock( 131 struct KgenContext *ctx, 132 KernelExtraFlags kflags); 133 134 void checkGenEndHitMatrixBlock( 135 struct KgenContext *ctx, 136 KernelExtraFlags kflags); 137 138 #endif /* GEN_HELPER_H_ */ 139