1# 2# Copyright © 2021 Google, Inc. 3# 4# Permission is hereby granted, free of charge, to any person obtaining a 5# copy of this software and associated documentation files (the "Software"), 6# to deal in the Software without restriction, including without limitation 7# the rights to use, copy, modify, merge, publish, distribute, sublicense, 8# and/or sell copies of the Software, and to permit persons to whom the 9# Software is furnished to do so, subject to the following conditions: 10# 11# The above copyright notice and this permission notice (including the next 12# paragraph) shall be included in all copies or substantial portions of the 13# Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21# IN THE SOFTWARE. 22 23from mako.template import Template 24import sys 25 26def max_bitfield_val(high, low, shift): 27 return ((1 << (high - low)) - 1) << shift 28 29class State(object): 30 def __init__(self): 31 # List of unique device-info structs, multiple different GPU ids 32 # can map to a single info struct in cases where the differences 33 # are not sw visible, or the only differences are parameters 34 # queried from the kernel (like GMEM size) 35 self.gpu_infos = [] 36 37 # Table mapping GPU id to device-info struct 38 self.gpus = {} 39 40 def info_index(self, gpu_info): 41 i = 0 42 for info in self.gpu_infos: 43 if gpu_info == info: 44 return i 45 i += 1 46 raise Error("invalid info") 47 48s = State() 49 50def add_gpus(ids, info): 51 for id in ids: 52 s.gpus[id] = info 53 54class GPUId(object): 55 def __init__(self, gpu_id = None, chip_id = None, name=None): 56 if chip_id == None: 57 assert(gpu_id != None) 58 val = gpu_id 59 core = int(val / 100) 60 val -= (core * 100); 61 major = int(val / 10); 62 val -= (major * 10) 63 minor = val 64 chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff 65 self.chip_id = chip_id 66 if gpu_id == None: 67 gpu_id = 0 68 self.gpu_id = gpu_id 69 if name == None: 70 assert(gpu_id != 0) 71 name = "FD%d" % gpu_id 72 self.name = name 73 74class Struct(object): 75 """A helper class that stringifies itself to a 'C' struct initializer 76 """ 77 def __str__(self): 78 s = "{" 79 for name, value in vars(self).items(): 80 s += "." + name + "=" + str(value) + "," 81 return s + "}" 82 83class GPUInfo(Struct): 84 """Base class for any generation of adreno, consists of GMEM layout 85 related parameters 86 87 Note that tile_max_h is normally only constrained by corresponding 88 bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h 89 tends to have lower limits, in which case a comment will describe 90 the bitfield size/shift 91 """ 92 def __init__(self, gmem_align_w, gmem_align_h, 93 tile_align_w, tile_align_h, 94 tile_max_w, tile_max_h, num_vsc_pipes): 95 self.gmem_align_w = gmem_align_w 96 self.gmem_align_h = gmem_align_h 97 self.tile_align_w = tile_align_w 98 self.tile_align_h = tile_align_h 99 self.tile_max_w = tile_max_w 100 self.tile_max_h = tile_max_h 101 self.num_vsc_pipes = num_vsc_pipes 102 103 s.gpu_infos.append(self) 104 105 106class A6xxGPUInfo(GPUInfo): 107 """The a6xx generation has a lot more parameters, and is broken down 108 into distinct sub-generations. The template parameter avoids 109 duplication of parameters that are unique to the sub-generation. 110 """ 111 def __init__(self, template, num_sp_cores, num_ccu, 112 RB_UNKNOWN_8E04_blit, PC_POWER_CNTL): 113 super().__init__(gmem_align_w = 16, gmem_align_h = 4, 114 tile_align_w = 32, tile_align_h = 32, 115 tile_max_w = 1024, # max_bitfield_val(5, 0, 5) 116 tile_max_h = max_bitfield_val(14, 8, 4), 117 num_vsc_pipes = 32) 118 assert(num_sp_cores == num_ccu) 119 120 self.num_sp_cores = num_sp_cores 121 122 # 96 tile alignment seems correlated to 3 CCU 123 if num_ccu == 3: 124 self.tile_align_w = 96 125 126 self.a6xx = Struct() 127 self.a6xx.magic = Struct() 128 129 for name, val in template["magic"].items(): 130 setattr(self.a6xx.magic, name, val) 131 132 # Various "magic" register values: 133 self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit 134 self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL 135 136 # Things that earlier gens have and later gens remove, provide 137 # defaults here and let them be overridden by sub-gen template: 138 self.a6xx.has_cp_reg_write = True 139 self.a6xx.has_8bpp_ubwc = True 140 141 for name, val in template.items(): 142 if name == "magic": # handled above 143 continue 144 setattr(self.a6xx, name, val) 145 146# a2xx is really two sub-generations, a20x and a22x, but we don't currently 147# capture that in the device-info tables 148add_gpus([ 149 GPUId(200), 150 GPUId(201), 151 GPUId(205), 152 GPUId(220), 153 ], GPUInfo( 154 gmem_align_w = 32, gmem_align_h = 32, 155 tile_align_w = 32, tile_align_h = 32, 156 tile_max_w = 512, 157 tile_max_h = ~0, # TODO 158 num_vsc_pipes = 8, 159 )) 160 161add_gpus([ 162 GPUId(305), 163 GPUId(307), 164 GPUId(320), 165 GPUId(330), 166 ], GPUInfo( 167 gmem_align_w = 32, gmem_align_h = 32, 168 tile_align_w = 32, tile_align_h = 32, 169 tile_max_w = 992, # max_bitfield_val(4, 0, 5) 170 tile_max_h = max_bitfield_val(9, 5, 5), 171 num_vsc_pipes = 8, 172 )) 173 174add_gpus([ 175 GPUId(405), 176 GPUId(420), 177 GPUId(430), 178 ], GPUInfo( 179 gmem_align_w = 32, gmem_align_h = 32, 180 tile_align_w = 32, tile_align_h = 32, 181 tile_max_w = 1024, # max_bitfield_val(4, 0, 5) 182 tile_max_h = max_bitfield_val(9, 5, 5), 183 num_vsc_pipes = 8, 184 )) 185 186add_gpus([ 187 GPUId(508), 188 GPUId(509), 189 GPUId(510), 190 GPUId(512), 191 GPUId(530), 192 GPUId(540), 193 ], GPUInfo( 194 gmem_align_w = 64, gmem_align_h = 32, 195 tile_align_w = 64, tile_align_h = 32, 196 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 197 tile_max_h = max_bitfield_val(16, 9, 5), 198 num_vsc_pipes = 16, 199 )) 200 201# a6xx can be divided into distinct sub-generations, where certain device- 202# info parameters are keyed to the sub-generation. These templates reduce 203# the copypaste 204 205# a615, a618, a630: 206a6xx_gen1 = dict( 207 fibers_per_sp = 128 * 16, 208 reg_size_vec4 = 96, 209 instr_cache_size = 64, 210 ccu_cntl_gmem_unk2 = True, 211 indirect_draw_wfm_quirk = True, 212 depth_bounds_require_depth_test_quirk = True, 213 magic = dict( 214 TPL1_DBG_ECO_CNTL = 0x100000, 215 ) 216 ) 217 218# a640, a680: 219a6xx_gen2 = dict( 220 fibers_per_sp = 128 * 4 * 16, 221 reg_size_vec4 = 96, 222 instr_cache_size = 64, # TODO 223 supports_multiview_mask = True, 224 has_z24uint_s8uint = True, 225 indirect_draw_wfm_quirk = True, 226 depth_bounds_require_depth_test_quirk = True, # TODO: check if true 227 has_dp2acc = False, # TODO: check if true 228 magic = dict( 229 TPL1_DBG_ECO_CNTL = 0, 230 ), 231 ) 232 233# a650: 234a6xx_gen3 = dict( 235 fibers_per_sp = 128 * 2 * 16, 236 reg_size_vec4 = 64, 237 # Blob limits it to 128 but we hang with 128 238 instr_cache_size = 127, 239 supports_multiview_mask = True, 240 has_z24uint_s8uint = True, 241 tess_use_shared = True, 242 storage_16bit = True, 243 has_tex_filter_cubic = True, 244 has_sample_locations = True, 245 has_ccu_flush_bug = True, 246 has_8bpp_ubwc = False, 247 has_dp2acc = True, 248 magic = dict( 249 # this seems to be a chicken bit that fixes cubic filtering: 250 TPL1_DBG_ECO_CNTL = 0x1000000, 251 ), 252 ) 253 254# a635, a660: 255a6xx_gen4 = dict( 256 fibers_per_sp = 128 * 2 * 16, 257 reg_size_vec4 = 64, 258 # Blob limits it to 128 but we hang with 128 259 instr_cache_size = 127, 260 supports_multiview_mask = True, 261 has_z24uint_s8uint = True, 262 tess_use_shared = True, 263 storage_16bit = True, 264 has_tex_filter_cubic = True, 265 has_sample_locations = True, 266 has_ccu_flush_bug = True, 267 has_cp_reg_write = False, 268 has_8bpp_ubwc = False, 269 has_lpac = True, 270 has_shading_rate = True, 271 has_getfiberid = True, 272 has_dp2acc = True, 273 has_dp4acc = True, 274 magic = dict( 275 TPL1_DBG_ECO_CNTL = 0x5008000, 276 ), 277 ) 278 279add_gpus([ 280 GPUId(615), 281 GPUId(618), 282 ], A6xxGPUInfo( 283 a6xx_gen1, 284 num_sp_cores = 1, 285 num_ccu = 1, 286 RB_UNKNOWN_8E04_blit = 0x00100000, 287 PC_POWER_CNTL = 0, 288 )) 289 290add_gpus([ 291 GPUId(630), 292 ], A6xxGPUInfo( 293 a6xx_gen1, 294 num_sp_cores = 2, 295 num_ccu = 2, 296 RB_UNKNOWN_8E04_blit = 0x01000000, 297 PC_POWER_CNTL = 1, 298 )) 299 300add_gpus([ 301 GPUId(640), 302 ], A6xxGPUInfo( 303 a6xx_gen2, 304 num_sp_cores = 2, 305 num_ccu = 2, 306 RB_UNKNOWN_8E04_blit = 0x00100000, 307 PC_POWER_CNTL = 1, 308 )) 309 310add_gpus([ 311 GPUId(680), 312 ], A6xxGPUInfo( 313 a6xx_gen2, 314 num_sp_cores = 4, 315 num_ccu = 4, 316 RB_UNKNOWN_8E04_blit = 0x04100000, 317 PC_POWER_CNTL = 3, 318 )) 319 320add_gpus([ 321 GPUId(650), 322 ], A6xxGPUInfo( 323 a6xx_gen3, 324 num_sp_cores = 3, 325 num_ccu = 3, 326 RB_UNKNOWN_8E04_blit = 0x04100000, 327 PC_POWER_CNTL = 2, 328 )) 329 330add_gpus([ 331 GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"), 332 GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"), 333 # fallback wildcard entry should be last: 334 GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), 335 ], A6xxGPUInfo( 336 a6xx_gen4, 337 num_sp_cores = 2, 338 num_ccu = 2, 339 RB_UNKNOWN_8E04_blit = 0x00100000, 340 PC_POWER_CNTL = 1, 341 )) 342 343add_gpus([ 344 GPUId(660), 345 ], A6xxGPUInfo( 346 a6xx_gen4, 347 num_sp_cores = 3, 348 num_ccu = 3, 349 RB_UNKNOWN_8E04_blit = 0x04100000, 350 PC_POWER_CNTL = 2, 351 )) 352 353template = """\ 354/* Copyright (C) 2021 Google, Inc. 355 * 356 * Permission is hereby granted, free of charge, to any person obtaining a 357 * copy of this software and associated documentation files (the "Software"), 358 * to deal in the Software without restriction, including without limitation 359 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 360 * and/or sell copies of the Software, and to permit persons to whom the 361 * Software is furnished to do so, subject to the following conditions: 362 * 363 * The above copyright notice and this permission notice (including the next 364 * paragraph) shall be included in all copies or substantial portions of the 365 * Software. 366 * 367 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 368 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 369 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 370 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 371 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 372 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 373 * IN THE SOFTWARE. 374 */ 375 376#include "freedreno_dev_info.h" 377 378/* Map python to C: */ 379#define True true 380#define False false 381 382%for info in s.gpu_infos: 383static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)}; 384%endfor 385 386static const struct fd_dev_rec fd_dev_recs[] = { 387%for id, info in s.gpus.items(): 388 { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} }, 389%endfor 390}; 391""" 392 393print(Template(template).render(s=s)) 394 395