1#
2# Copyright © 2021 Google, Inc.
3#
4# Permission is hereby granted, free of charge, to any person obtaining a
5# copy of this software and associated documentation files (the "Software"),
6# to deal in the Software without restriction, including without limitation
7# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8# and/or sell copies of the Software, and to permit persons to whom the
9# Software is furnished to do so, subject to the following conditions:
10#
11# The above copyright notice and this permission notice (including the next
12# paragraph) shall be included in all copies or substantial portions of the
13# Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21# IN THE SOFTWARE.
22
23from mako.template import Template
24import sys
25
26def max_bitfield_val(high, low, shift):
27    return ((1 << (high - low)) - 1) << shift
28
29class State(object):
30    def __init__(self):
31        # List of unique device-info structs, multiple different GPU ids
32        # can map to a single info struct in cases where the differences
33        # are not sw visible, or the only differences are parameters
34        # queried from the kernel (like GMEM size)
35        self.gpu_infos = []
36
37        # Table mapping GPU id to device-info struct
38        self.gpus = {}
39
40    def info_index(self, gpu_info):
41        i = 0
42        for info in self.gpu_infos:
43            if gpu_info == info:
44                return i
45            i += 1
46        raise Error("invalid info")
47
48s = State()
49
50def add_gpus(ids, info):
51    for id in ids:
52        s.gpus[id] = info
53
54class GPUId(object):
55    def __init__(self, gpu_id = None, chip_id = None, name=None):
56        if chip_id == None:
57            assert(gpu_id != None)
58            val = gpu_id
59            core = int(val / 100)
60            val -= (core * 100);
61            major = int(val / 10);
62            val -= (major * 10)
63            minor = val
64            chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
65        self.chip_id = chip_id
66        if gpu_id == None:
67            gpu_id = 0
68        self.gpu_id = gpu_id
69        if name == None:
70            assert(gpu_id != 0)
71            name = "FD%d" % gpu_id
72        self.name = name
73
74class Struct(object):
75    """A helper class that stringifies itself to a 'C' struct initializer
76    """
77    def __str__(self):
78        s = "{"
79        for name, value in vars(self).items():
80            s += "." + name + "=" + str(value) + ","
81        return s + "}"
82
83class GPUInfo(Struct):
84    """Base class for any generation of adreno, consists of GMEM layout
85       related parameters
86
87       Note that tile_max_h is normally only constrained by corresponding
88       bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
89       tends to have lower limits, in which case a comment will describe
90       the bitfield size/shift
91    """
92    def __init__(self, gmem_align_w, gmem_align_h,
93                 tile_align_w, tile_align_h,
94                 tile_max_w, tile_max_h, num_vsc_pipes):
95        self.gmem_align_w  = gmem_align_w
96        self.gmem_align_h  = gmem_align_h
97        self.tile_align_w  = tile_align_w
98        self.tile_align_h  = tile_align_h
99        self.tile_max_w    = tile_max_w
100        self.tile_max_h    = tile_max_h
101        self.num_vsc_pipes = num_vsc_pipes
102
103        s.gpu_infos.append(self)
104
105
106class A6xxGPUInfo(GPUInfo):
107    """The a6xx generation has a lot more parameters, and is broken down
108       into distinct sub-generations.  The template parameter avoids
109       duplication of parameters that are unique to the sub-generation.
110    """
111    def __init__(self, template, num_sp_cores, num_ccu,
112                 RB_UNKNOWN_8E04_blit, PC_POWER_CNTL):
113        super().__init__(gmem_align_w = 16, gmem_align_h = 4,
114                         tile_align_w = 32, tile_align_h = 32,
115                         tile_max_w   = 1024, # max_bitfield_val(5, 0, 5)
116                         tile_max_h   = max_bitfield_val(14, 8, 4),
117                         num_vsc_pipes = 32)
118        assert(num_sp_cores == num_ccu)
119
120        self.num_sp_cores = num_sp_cores
121
122        # 96 tile alignment seems correlated to 3 CCU
123        if num_ccu == 3:
124            self.tile_align_w = 96
125
126        self.a6xx = Struct()
127        self.a6xx.magic = Struct()
128
129        for name, val in template["magic"].items():
130            setattr(self.a6xx.magic, name, val)
131
132        # Various "magic" register values:
133        self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
134        self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL
135
136        # Things that earlier gens have and later gens remove, provide
137        # defaults here and let them be overridden by sub-gen template:
138        self.a6xx.has_cp_reg_write = True
139        self.a6xx.has_8bpp_ubwc = True
140
141        for name, val in template.items():
142            if name == "magic": # handled above
143                continue
144            setattr(self.a6xx, name, val)
145
146# a2xx is really two sub-generations, a20x and a22x, but we don't currently
147# capture that in the device-info tables
148add_gpus([
149        GPUId(200),
150        GPUId(201),
151        GPUId(205),
152        GPUId(220),
153    ], GPUInfo(
154        gmem_align_w = 32,  gmem_align_h = 32,
155        tile_align_w = 32,  tile_align_h = 32,
156        tile_max_w   = 512,
157        tile_max_h   = ~0, # TODO
158        num_vsc_pipes = 8,
159    ))
160
161add_gpus([
162        GPUId(305),
163        GPUId(307),
164        GPUId(320),
165        GPUId(330),
166    ], GPUInfo(
167        gmem_align_w = 32,  gmem_align_h = 32,
168        tile_align_w = 32,  tile_align_h = 32,
169        tile_max_w   = 992, # max_bitfield_val(4, 0, 5)
170        tile_max_h   = max_bitfield_val(9, 5, 5),
171        num_vsc_pipes = 8,
172    ))
173
174add_gpus([
175        GPUId(405),
176        GPUId(420),
177        GPUId(430),
178    ], GPUInfo(
179        gmem_align_w = 32,  gmem_align_h = 32,
180        tile_align_w = 32,  tile_align_h = 32,
181        tile_max_w   = 1024, # max_bitfield_val(4, 0, 5)
182        tile_max_h   = max_bitfield_val(9, 5, 5),
183        num_vsc_pipes = 8,
184    ))
185
186add_gpus([
187        GPUId(508),
188        GPUId(509),
189        GPUId(510),
190        GPUId(512),
191        GPUId(530),
192        GPUId(540),
193    ], GPUInfo(
194        gmem_align_w = 64,  gmem_align_h = 32,
195        tile_align_w = 64,  tile_align_h = 32,
196        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
197        tile_max_h   = max_bitfield_val(16, 9, 5),
198        num_vsc_pipes = 16,
199    ))
200
201# a6xx can be divided into distinct sub-generations, where certain device-
202# info parameters are keyed to the sub-generation.  These templates reduce
203# the copypaste
204
205# a615, a618, a630:
206a6xx_gen1 = dict(
207        fibers_per_sp = 128 * 16,
208        reg_size_vec4 = 96,
209        instr_cache_size = 64,
210        ccu_cntl_gmem_unk2 = True,
211        indirect_draw_wfm_quirk = True,
212        depth_bounds_require_depth_test_quirk = True,
213        magic = dict(
214            TPL1_DBG_ECO_CNTL = 0x100000,
215        )
216    )
217
218# a640, a680:
219a6xx_gen2 = dict(
220        fibers_per_sp = 128 * 4 * 16,
221        reg_size_vec4 = 96,
222        instr_cache_size = 64, # TODO
223        supports_multiview_mask = True,
224        has_z24uint_s8uint = True,
225        indirect_draw_wfm_quirk = True,
226        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
227        has_dp2acc = False, # TODO: check if true
228        magic = dict(
229            TPL1_DBG_ECO_CNTL = 0,
230        ),
231    )
232
233# a650:
234a6xx_gen3 = dict(
235        fibers_per_sp = 128 * 2 * 16,
236        reg_size_vec4 = 64,
237        # Blob limits it to 128 but we hang with 128
238        instr_cache_size = 127,
239        supports_multiview_mask = True,
240        has_z24uint_s8uint = True,
241        tess_use_shared = True,
242        storage_16bit = True,
243        has_tex_filter_cubic = True,
244        has_sample_locations = True,
245        has_ccu_flush_bug = True,
246        has_8bpp_ubwc = False,
247        has_dp2acc = True,
248        magic = dict(
249            # this seems to be a chicken bit that fixes cubic filtering:
250            TPL1_DBG_ECO_CNTL = 0x1000000,
251        ),
252    )
253
254# a635, a660:
255a6xx_gen4 = dict(
256        fibers_per_sp = 128 * 2 * 16,
257        reg_size_vec4 = 64,
258        # Blob limits it to 128 but we hang with 128
259        instr_cache_size = 127,
260        supports_multiview_mask = True,
261        has_z24uint_s8uint = True,
262        tess_use_shared = True,
263        storage_16bit = True,
264        has_tex_filter_cubic = True,
265        has_sample_locations = True,
266        has_ccu_flush_bug = True,
267        has_cp_reg_write = False,
268        has_8bpp_ubwc = False,
269        has_lpac = True,
270        has_shading_rate = True,
271        has_getfiberid = True,
272        has_dp2acc = True,
273        has_dp4acc = True,
274        magic = dict(
275            TPL1_DBG_ECO_CNTL = 0x5008000,
276        ),
277    )
278
279add_gpus([
280        GPUId(615),
281        GPUId(618),
282    ], A6xxGPUInfo(
283        a6xx_gen1,
284        num_sp_cores = 1,
285        num_ccu = 1,
286        RB_UNKNOWN_8E04_blit = 0x00100000,
287        PC_POWER_CNTL = 0,
288    ))
289
290add_gpus([
291        GPUId(630),
292    ], A6xxGPUInfo(
293        a6xx_gen1,
294        num_sp_cores = 2,
295        num_ccu = 2,
296        RB_UNKNOWN_8E04_blit = 0x01000000,
297        PC_POWER_CNTL = 1,
298    ))
299
300add_gpus([
301        GPUId(640),
302    ], A6xxGPUInfo(
303        a6xx_gen2,
304        num_sp_cores = 2,
305        num_ccu = 2,
306        RB_UNKNOWN_8E04_blit = 0x00100000,
307        PC_POWER_CNTL = 1,
308    ))
309
310add_gpus([
311        GPUId(680),
312    ], A6xxGPUInfo(
313        a6xx_gen2,
314        num_sp_cores = 4,
315        num_ccu = 4,
316        RB_UNKNOWN_8E04_blit = 0x04100000,
317        PC_POWER_CNTL = 3,
318    ))
319
320add_gpus([
321        GPUId(650),
322    ], A6xxGPUInfo(
323        a6xx_gen3,
324        num_sp_cores = 3,
325        num_ccu = 3,
326        RB_UNKNOWN_8E04_blit = 0x04100000,
327        PC_POWER_CNTL = 2,
328    ))
329
330add_gpus([
331        GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"),
332        GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"),
333        # fallback wildcard entry should be last:
334        GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
335    ], A6xxGPUInfo(
336        a6xx_gen4,
337        num_sp_cores = 2,
338        num_ccu = 2,
339        RB_UNKNOWN_8E04_blit = 0x00100000,
340        PC_POWER_CNTL = 1,
341    ))
342
343add_gpus([
344        GPUId(660),
345    ], A6xxGPUInfo(
346        a6xx_gen4,
347        num_sp_cores = 3,
348        num_ccu = 3,
349        RB_UNKNOWN_8E04_blit = 0x04100000,
350        PC_POWER_CNTL = 2,
351    ))
352
353template = """\
354/* Copyright (C) 2021 Google, Inc.
355 *
356 * Permission is hereby granted, free of charge, to any person obtaining a
357 * copy of this software and associated documentation files (the "Software"),
358 * to deal in the Software without restriction, including without limitation
359 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
360 * and/or sell copies of the Software, and to permit persons to whom the
361 * Software is furnished to do so, subject to the following conditions:
362 *
363 * The above copyright notice and this permission notice (including the next
364 * paragraph) shall be included in all copies or substantial portions of the
365 * Software.
366 *
367 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
368 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
369 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
370 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
371 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
372 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
373 * IN THE SOFTWARE.
374 */
375
376#include "freedreno_dev_info.h"
377
378/* Map python to C: */
379#define True true
380#define False false
381
382%for info in s.gpu_infos:
383static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
384%endfor
385
386static const struct fd_dev_rec fd_dev_recs[] = {
387%for id, info in s.gpus.items():
388   { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
389%endfor
390};
391"""
392
393print(Template(template).render(s=s))
394
395