1import copy
2import KernelParameters
3
4################################################################################
5# Tile Parameters for Kernel Selection Data
6################################################################################
7
8kernelSelectionDataHawaii = {
9# [ size, fallback tile, [ valid tiles ] ],
10  "s":[
11    [ 4000, [ 16, 16,  6,  6], [ [ 16, 16,  6,  6] ] ],
12    [ 2496, [ 16, 16,  4,  4], [ [ 16, 16,  6,  6], [ 16, 16,  4,  4] ] ],
13    [ 2448, [ 16, 16,  6,  6], [ [ 16, 16,  6,  6] ] ],
14    [ 1600, [ 16, 16,  6,  6], [ [ 16, 16,  6,  6], [ 16, 16,  4,  4], [ 16, 16,  5,  5] ] ],
15    [ 1008, [ 16, 16,  6,  6], [ [ 16, 16,  6,  6], [ 16, 16,  4,  4], [ 16, 16,  5,  5], [ 16, 16,  3,  3] ] ],
16    [  960, [ 16, 16,  2,  2], [ [ 16, 16,  4,  4], [ 16, 16,  3,  3], [ 16, 16,  5,  5], [ 16, 16,  2,  2] ] ],
17    [  896, [ 16, 16,  2,  2], [ [ 16, 16,  4,  4], [ 16, 16,  6,  6], [ 16, 16,  3,  3], [ 16, 16,  5,  5], [ 16, 16,  2,  2] ] ],
18    [  864, [ 16, 16,  2,  2], [ [ 16, 16,  6,  6], [ 16, 16,  3,  3], [ 16, 16,  5,  5], [ 16, 16,  4,  4], [ 16, 16,  2,  2] ] ],
19    [  784, [ 16, 16,  2,  2], [ [ 16, 16,  3,  3], [ 16, 16,  5,  5], [ 16, 16,  4,  4], [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
20    [  768, [ 16, 16,  2,  2], [ [ 16, 16,  3,  3], [ 16, 16,  5,  5], [ 16, 16,  4,  4], [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
21    [  720, [ 16, 16,  2,  2], [ [ 16, 16,  4,  4], [ 16, 16,  5,  5], [ 16, 16,  4,  4], [ 16, 16,  6,  6], [ 16, 16,  3,  3] ] ],
22    [  464, [ 16, 16,  3,  3], [ [ 16, 16,  3,  3], [ 16, 16,  4,  4], [ 16, 16,  2,  2], [ 16, 16,  5,  5] ] ],
23    [  304, [ 16, 16,  2,  2], [ [ 16, 16,  3,  3], [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
24    [    0, [ 16, 16,  1,  1], [ [ 16, 16,  1,  1] ] ],
25    ],
26  "d":[
27    [ 5408, [  8,  8,  6,  6], [ [  8,  8,  6,  6], [ 16, 16,  4,  4] ] ],
28    [ 2800, [ 16, 16,  4,  4], [ [  8,  8,  6,  6], [ 16, 16,  4,  4] ] ],
29    [ 1536, [ 16, 16,  4,  4], [ [  8,  8,  6,  6], [ 16, 16,  4,  4], [ 16, 16,  5,  5] ] ],
30    [ 1136, [ 16, 16,  4,  4], [ [  8,  8,  6,  6], [ 16, 16,  4,  4], [ 16, 16,  5,  5], [ 16, 16,  2,  2] ] ],
31    [  576, [ 16, 16,  2,  2], [ [ 16, 16,  4,  4], [  8,  8,  6,  6], [ 16, 16,  5,  5], [ 16, 16,  2,  2] ] ],
32    [  384, [ 16, 16,  2,  2], [ [ 16, 16,  4,  4], [  8,  8,  6,  6], [ 16, 16,  5,  5], [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
33    [  256, [ 16, 16,  1,  1], [ [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
34    [    0, [ 16, 16,  1,  1], [ [ 16, 16,  1,  1] ] ],
35    ],
36  "c":[
37    [ 3840, [ 16, 16,  4,  4], [ [ 16, 16,  4,  4] ] ],
38    [ 2592, [ 16, 16,  4,  4], [ [ 16, 16,  4,  4], [ 16, 16,  6,  6], [ 16, 16,  3,  3] ] ],
39    [ 2224, [ 16, 16,  4,  4], [ [ 16, 16,  4,  4], [ 16, 16,  3,  3], [ 16, 16,  2,  2] ] ],
40    [  720, [ 16, 16,  2,  2], [ [ 16, 16,  4,  4], [ 16, 16,  3,  3], [ 16, 16,  2,  2], [ 16, 16,  5,  5] ] ],
41    [  432, [ 16, 16,  2,  2], [ [ 16, 16,  2,  2], [ 16, 16,  3,  3], [ 16, 16,  1,  1] ] ],
42    [  288, [ 16, 16,  1,  1], [ [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
43    [    0, [ 16, 16,  1,  1], [ [ 16, 16,  1,  1] ] ],
44    ],
45  "z":[
46    [ 3008, [ 16, 16,  3,  3], [ [ 16, 16,  3,  3] ] ],
47    [ 1344, [ 16, 16,  3,  3], [ [ 16, 16,  3,  3], [ 16, 16,  4,  4] ] ],
48    [ 1040, [ 16, 16,  3,  3], [ [ 16, 16,  3,  3], [ 16, 16,  4,  4], [ 16, 16,  2,  2] ] ],
49    [  832, [ 16, 16,  2,  2], [ [ 16, 16,  3,  3], [ 16, 16,  4,  4], [ 16, 16,  2,  2] ] ],
50    [  544, [ 16, 16,  2,  2], [ [ 16, 16,  3,  3], [ 16, 16,  2,  2] ] ],
51    [  336, [ 16, 16,  2,  2], [ [ 16, 16,  3,  3], [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
52    [  192, [ 16, 16,  1,  1], [ [ 16, 16,  2,  2], [ 16, 16,  1,  1] ] ],
53    [    0, [ 16, 16,  1,  1], [ [ 16, 16,  1,  1] ] ],
54    ],
55  }
56
57kernelSelectionDataFiji = {
58  "s":[
59    [ 3072, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 4, 4] ] ],
60    [ 2240, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
61    [ 1760, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
62    [ 1600, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
63    [ 1056, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
64    [  960, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
65    [  736, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
66    [  528, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
67    [  432, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
68    [  320, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
69    [    0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
70  ],
71  "d":[
72    [ 3200, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 2, 2], [  8,  8, 6, 6 ] ] ],
73    [ 1632, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [  8,  8, 6, 6 ] ] ],
74    [ 1280, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [  8,  8, 6, 6 ], [ 16, 16, 1, 1] ] ],
75    [ 1056, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
76    [  672, [ 16, 16, 2, 2], [ [ 16, 16, 1, 1] ] ],
77    [    0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
78  ],
79  "c":[
80    [ 2240,  [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], ] ],
81    [ 1440,  [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
82    [ 1088,  [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ],
83    [  704,  [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 5, 5] ] ],
84    [  528,  [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
85    [  336,  [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
86    [    0,  [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
87  ],
88  "z":[
89    [ 2528, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 3, 3] ] ],
90    [ 1872, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
91    [ 1040, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
92    [  768, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
93    [    0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
94  ]
95}
96
97kernelSelectionData = kernelSelectionDataHawaii
98def setArchitecture(architecture):
99  global kernelSelectionData, kernelSelectionDataHawaii, kernelSelectionDataFiji
100
101  if architecture == "Fiji":
102    kernelSelectionData = kernelSelectionDataFiji
103  else:
104    kernelSelectionData = kernelSelectionDataHawaii
105
106
107################################################################################
108# Non-Tile Parameters
109################################################################################
110precisions = ["s", "d", "c", "z"]
111
112orders = [ "clblasColumnMajor" ]
113
114transposes = { "s":["N", "T"], "d":["N", "T"], \
115    "c":["N", "T", "C"], "z":["N", "T", "C"] }
116
117unrolls = { "s":[16, 8, 1], "d":[8, 1], "c":[8, 1], "z":[8, 1] }
118
119betas = [ 0, 1 ]
120
121def getTilesForPrecision(precision):
122  # valid tiles for this precision
123  tiles = []
124  tile = KernelParameters.TileParameters()
125  for sizeData in kernelSelectionData[precision]:
126    fallbackTile = sizeData[1]
127    validTiles = sizeData[2]
128    # add valid tiles
129    for tileParams in validTiles:
130      #print(tileParams)
131      tile.workGroupNumRows = tileParams[0]
132      tile.workGroupNumCols = tileParams[1]
133      tile.microTileNumRows = tileParams[2]
134      tile.microTileNumCols = tileParams[3]
135      tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows
136      tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols
137      #print(tile.getName())
138      for unroll in unrolls[precision]:
139        tile.unroll = unroll
140        if tile.isValid():
141          tiles.append( copy.copy(tile) )
142        else:
143          print(tile.getName() + " - SKIPPING - ")
144
145    # add fallback tile
146    tile.workGroupNumRows = fallbackTile[0]
147    tile.workGroupNumCols = fallbackTile[1]
148    tile.microTileNumRows = fallbackTile[2]
149    tile.microTileNumCols = fallbackTile[3]
150    tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows
151    tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols
152    for unroll in unrolls[precision]:
153      tile.unroll = unroll
154      if tile.isValid():
155        tiles.append( copy.copy(tile) )
156      else:
157        print(tile.getName() + " - SKIPPING - ")
158
159  setTiles = set(tiles)
160  tiles = list( setTiles )
161  tiles.sort()
162  return tiles
163
164def getTransposeChoices():
165  singleTransposes = []
166  for precision in precisions:
167    for transpose in transposes[precision]:
168      singleTransposes.append( transpose )
169  singleTransposeSet = set(singleTransposes)
170  singleTranspose =  list( singleTransposeSet)
171  transposeChoices = []
172  for transA in singleTranspose:
173    for transB in singleTranspose:
174      transposePair = transA+transB
175      if transposePair not in transposeChoices:
176        transposeChoices.append(transposePair)
177  return transposeChoices
178
179def getTileChoices():
180  tileChoices = []
181  for precision in precisions:
182    tilesForPrecision = getTilesForPrecision(precision)
183    for t in tilesForPrecision:
184      tile = str(t.workGroupNumRows*t.microTileNumRows) + "x" + str(t.workGroupNumCols*t.microTileNumCols)
185      if tile not in tileChoices:
186        tileChoices.append(tile)
187  return tileChoices
188