1import copy 2import KernelParameters 3 4################################################################################ 5# Tile Parameters for Kernel Selection Data 6################################################################################ 7 8kernelSelectionDataHawaii = { 9# [ size, fallback tile, [ valid tiles ] ], 10 "s":[ 11 [ 4000, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ], 12 [ 2496, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4] ] ], 13 [ 2448, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ], 14 [ 1600, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5] ] ], 15 [ 1008, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ], 16 [ 960, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ], 17 [ 896, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ], 18 [ 864, [ 16, 16, 2, 2], [ [ 16, 16, 6, 6], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2] ] ], 19 [ 784, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 20 [ 768, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 21 [ 720, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 3, 3] ] ], 22 [ 464, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3], [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ], 23 [ 304, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 24 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 25 ], 26 "d":[ 27 [ 5408, [ 8, 8, 6, 6], [ [ 8, 8, 6, 6], [ 16, 16, 4, 4] ] ], 28 [ 2800, [ 16, 16, 4, 4], [ [ 8, 8, 6, 6], [ 16, 16, 4, 4] ] ], 29 [ 1536, [ 16, 16, 4, 4], [ [ 8, 8, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5] ] ], 30 [ 1136, [ 16, 16, 4, 4], [ [ 8, 8, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ], 31 [ 576, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 8, 8, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ], 32 [ 384, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 8, 8, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 33 [ 256, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 34 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 35 ], 36 "c":[ 37 [ 3840, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4] ] ], 38 [ 2592, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 3, 3] ] ], 39 [ 2224, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ], 40 [ 720, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ], 41 [ 432, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ], 42 [ 288, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 43 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 44 ], 45 "z":[ 46 [ 3008, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3] ] ], 47 [ 1344, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3], [ 16, 16, 4, 4] ] ], 48 [ 1040, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3], [ 16, 16, 4, 4], [ 16, 16, 2, 2] ] ], 49 [ 832, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 4, 4], [ 16, 16, 2, 2] ] ], 50 [ 544, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ], 51 [ 336, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 52 [ 192, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 53 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 54 ], 55 } 56 57kernelSelectionDataFiji = { 58 "s":[ 59 [ 3072, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 4, 4] ] ], 60 [ 2240, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ], 61 [ 1760, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ], 62 [ 1600, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ], 63 [ 1056, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ], 64 [ 960, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ], 65 [ 736, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ], 66 [ 528, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 67 [ 432, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 68 [ 320, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 69 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 70 ], 71 "d":[ 72 [ 3200, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 2, 2], [ 8, 8, 6, 6 ] ] ], 73 [ 1632, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ] ] ], 74 [ 1280, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ], [ 16, 16, 1, 1] ] ], 75 [ 1056, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 76 [ 672, [ 16, 16, 2, 2], [ [ 16, 16, 1, 1] ] ], 77 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 78 ], 79 "c":[ 80 [ 2240, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], ] ], 81 [ 1440, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ], 82 [ 1088, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ], 83 [ 704, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 5, 5] ] ], 84 [ 528, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ], 85 [ 336, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 86 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 87 ], 88 "z":[ 89 [ 2528, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 3, 3] ] ], 90 [ 1872, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ], 91 [ 1040, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 92 [ 768, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ], 93 [ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ], 94 ] 95} 96 97kernelSelectionData = kernelSelectionDataHawaii 98def setArchitecture(architecture): 99 global kernelSelectionData, kernelSelectionDataHawaii, kernelSelectionDataFiji 100 101 if architecture == "Fiji": 102 kernelSelectionData = kernelSelectionDataFiji 103 else: 104 kernelSelectionData = kernelSelectionDataHawaii 105 106 107################################################################################ 108# Non-Tile Parameters 109################################################################################ 110precisions = ["s", "d", "c", "z"] 111 112orders = [ "clblasColumnMajor" ] 113 114transposes = { "s":["N", "T"], "d":["N", "T"], \ 115 "c":["N", "T", "C"], "z":["N", "T", "C"] } 116 117unrolls = { "s":[16, 8, 1], "d":[8, 1], "c":[8, 1], "z":[8, 1] } 118 119betas = [ 0, 1 ] 120 121def getTilesForPrecision(precision): 122 # valid tiles for this precision 123 tiles = [] 124 tile = KernelParameters.TileParameters() 125 for sizeData in kernelSelectionData[precision]: 126 fallbackTile = sizeData[1] 127 validTiles = sizeData[2] 128 # add valid tiles 129 for tileParams in validTiles: 130 #print(tileParams) 131 tile.workGroupNumRows = tileParams[0] 132 tile.workGroupNumCols = tileParams[1] 133 tile.microTileNumRows = tileParams[2] 134 tile.microTileNumCols = tileParams[3] 135 tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows 136 tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols 137 #print(tile.getName()) 138 for unroll in unrolls[precision]: 139 tile.unroll = unroll 140 if tile.isValid(): 141 tiles.append( copy.copy(tile) ) 142 else: 143 print(tile.getName() + " - SKIPPING - ") 144 145 # add fallback tile 146 tile.workGroupNumRows = fallbackTile[0] 147 tile.workGroupNumCols = fallbackTile[1] 148 tile.microTileNumRows = fallbackTile[2] 149 tile.microTileNumCols = fallbackTile[3] 150 tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows 151 tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols 152 for unroll in unrolls[precision]: 153 tile.unroll = unroll 154 if tile.isValid(): 155 tiles.append( copy.copy(tile) ) 156 else: 157 print(tile.getName() + " - SKIPPING - ") 158 159 setTiles = set(tiles) 160 tiles = list( setTiles ) 161 tiles.sort() 162 return tiles 163 164def getTransposeChoices(): 165 singleTransposes = [] 166 for precision in precisions: 167 for transpose in transposes[precision]: 168 singleTransposes.append( transpose ) 169 singleTransposeSet = set(singleTransposes) 170 singleTranspose = list( singleTransposeSet) 171 transposeChoices = [] 172 for transA in singleTranspose: 173 for transB in singleTranspose: 174 transposePair = transA+transB 175 if transposePair not in transposeChoices: 176 transposeChoices.append(transposePair) 177 return transposeChoices 178 179def getTileChoices(): 180 tileChoices = [] 181 for precision in precisions: 182 tilesForPrecision = getTilesForPrecision(precision) 183 for t in tilesForPrecision: 184 tile = str(t.workGroupNumRows*t.microTileNumRows) + "x" + str(t.workGroupNumCols*t.microTileNumCols) 185 if tile not in tileChoices: 186 tileChoices.append(tile) 187 return tileChoices 188