1 /* $NetBSD: amdgpu_gfx_v8_0.c,v 1.6 2021/12/19 12:02:39 riastradh Exp $ */
2
3 /*
4 * Copyright 2014 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gfx_v8_0.c,v 1.6 2021/12/19 12:02:39 riastradh Exp $");
28
29 #include <linux/delay.h>
30 #include <linux/kernel.h>
31 #include <linux/firmware.h>
32 #include <linux/module.h>
33 #include <linux/pci.h>
34
35 #include "amdgpu.h"
36 #include "amdgpu_gfx.h"
37 #include "vi.h"
38 #include "vi_structs.h"
39 #include "vid.h"
40 #include "amdgpu_ucode.h"
41 #include "amdgpu_atombios.h"
42 #include "atombios_i2c.h"
43 #include "clearstate_vi.h"
44 #include "gfx_v8_0.h"
45
46 #include "gmc/gmc_8_2_d.h"
47 #include "gmc/gmc_8_2_sh_mask.h"
48
49 #include "oss/oss_3_0_d.h"
50 #include "oss/oss_3_0_sh_mask.h"
51
52 #include "bif/bif_5_0_d.h"
53 #include "bif/bif_5_0_sh_mask.h"
54 #include "gca/gfx_8_0_d.h"
55 #include "gca/gfx_8_0_enum.h"
56 #include "gca/gfx_8_0_sh_mask.h"
57
58 #include "dce/dce_10_0_d.h"
59 #include "dce/dce_10_0_sh_mask.h"
60
61 #include "smu/smu_7_1_3_d.h"
62
63 #include "ivsrcid/ivsrcid_vislands30.h"
64
65 #include <linux/nbsd-namespace.h>
66
67 #define GFX8_NUM_GFX_RINGS 1
68 #define GFX8_MEC_HPD_SIZE 4096
69
70 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
71 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
72 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
73 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
74
75 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
76 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
77 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
78 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
79 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
80 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
81 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
82 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
83 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
84
85 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
86 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
87 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
88 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
89 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
90 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
91
92 /* BPM SERDES CMD */
93 #define SET_BPM_SERDES_CMD 1
94 #define CLE_BPM_SERDES_CMD 0
95
96 /* BPM Register Address*/
97 enum {
98 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
99 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
100 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
101 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
102 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
103 BPM_REG_FGCG_MAX
104 };
105
106 #define RLC_FormatDirectRegListLength 14
107
108 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
109 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
111 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
112 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
116 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
118 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
119 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
122 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
124 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
125 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
129 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
131 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
132 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
135 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
137 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
138 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
140
141 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
152
153 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
157 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
158 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
160 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
164
165 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
169 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
170 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
171 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
172 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
173 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
175 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
176
177 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
178 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
179 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
180 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
181 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
182 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
183
184 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
185 {
186 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
187 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
188 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
189 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
190 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
191 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
192 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
193 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
194 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
195 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
196 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
197 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
198 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
199 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
200 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
201 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
202 };
203
204 static const u32 golden_settings_tonga_a11[] =
205 {
206 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
207 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
208 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
209 mmGB_GPU_ID, 0x0000000f, 0x00000000,
210 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
211 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
212 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
213 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
214 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
215 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
216 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
217 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
218 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
219 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
220 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
221 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
222 };
223
224 static const u32 tonga_golden_common_all[] =
225 {
226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
228 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
229 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
230 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
231 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
232 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
233 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
234 };
235
236 static const u32 tonga_mgcg_cgcg_init[] =
237 {
238 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
239 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
240 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
245 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
246 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
247 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
249 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
250 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
251 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
252 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
253 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
254 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
255 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
256 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
257 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
258 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
259 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
260 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
261 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
262 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
263 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
264 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
265 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
266 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
267 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
268 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
269 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
277 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
282 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
287 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
290 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
291 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
292 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
293 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
294 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
295 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
296 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
297 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
298 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
299 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
300 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
301 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
302 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
303 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
304 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
305 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
306 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
307 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
308 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
309 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
310 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
311 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
312 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
313 };
314
315 static const u32 golden_settings_vegam_a11[] =
316 {
317 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
318 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
319 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
320 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
321 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
322 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
323 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
324 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
325 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
326 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
327 mmSQ_CONFIG, 0x07f80000, 0x01180000,
328 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
329 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
330 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
331 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
332 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
333 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
334 };
335
336 static const u32 vegam_golden_common_all[] =
337 {
338 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
343 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
344 };
345
346 static const u32 golden_settings_polaris11_a11[] =
347 {
348 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
349 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
350 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
355 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
356 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
357 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
358 mmSQ_CONFIG, 0x07f80000, 0x01180000,
359 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
360 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
361 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
362 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
363 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
364 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
365 };
366
367 static const u32 polaris11_golden_common_all[] =
368 {
369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
371 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
372 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
373 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
374 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
375 };
376
377 static const u32 golden_settings_polaris10_a11[] =
378 {
379 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
380 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
381 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
382 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
383 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
384 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
385 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
386 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
387 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
388 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
389 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
390 mmSQ_CONFIG, 0x07f80000, 0x07180000,
391 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
392 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
393 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
394 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
395 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
396 };
397
398 static const u32 polaris10_golden_common_all[] =
399 {
400 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
401 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
402 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
403 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
404 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
405 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
406 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
407 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
408 };
409
410 static const u32 fiji_golden_common_all[] =
411 {
412 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
414 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
415 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
416 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
417 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
418 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
419 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
422 };
423
424 static const u32 golden_settings_fiji_a10[] =
425 {
426 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
427 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
428 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
429 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
430 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
431 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
432 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
433 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
434 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
435 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
436 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
437 };
438
439 static const u32 fiji_mgcg_cgcg_init[] =
440 {
441 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
442 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
443 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
448 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
450 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
452 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
459 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
460 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
461 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
463 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
466 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
467 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
468 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
469 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
470 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
471 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
472 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
473 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
474 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
475 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
476 };
477
478 static const u32 golden_settings_iceland_a11[] =
479 {
480 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
481 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
482 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
483 mmGB_GPU_ID, 0x0000000f, 0x00000000,
484 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
485 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
486 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
487 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
488 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
489 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
490 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
491 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
492 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
493 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
494 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
495 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
496 };
497
498 static const u32 iceland_golden_common_all[] =
499 {
500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
502 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
503 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
504 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
505 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
506 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
507 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
508 };
509
510 static const u32 iceland_mgcg_cgcg_init[] =
511 {
512 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
513 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
514 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
517 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
518 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
519 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
520 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
521 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
522 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
523 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
524 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
525 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
526 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
527 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
528 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
530 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
531 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
532 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
533 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
534 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
535 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
537 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
538 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
539 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
540 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
541 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
544 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
545 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
546 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
547 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
548 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
549 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
550 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
551 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
552 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
553 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
554 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
555 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
556 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
557 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
558 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
559 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
560 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
561 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
562 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
563 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
564 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
565 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
566 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
567 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
568 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
569 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
570 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
571 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
572 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
573 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
574 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
575 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
576 };
577
578 static const u32 cz_golden_settings_a11[] =
579 {
580 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
581 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
582 mmGB_GPU_ID, 0x0000000f, 0x00000000,
583 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
584 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
585 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
586 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
587 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
588 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
589 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
590 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
591 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
592 };
593
594 static const u32 cz_golden_common_all[] =
595 {
596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
598 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
599 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
600 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
601 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
602 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
603 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
604 };
605
606 static const u32 cz_mgcg_cgcg_init[] =
607 {
608 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
609 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
610 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
615 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
616 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
617 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
619 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
620 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
621 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
622 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
623 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
624 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
625 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
626 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
627 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
628 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
629 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
630 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
631 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
632 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
633 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
634 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
635 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
636 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
637 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
638 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
640 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
641 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
642 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
643 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
644 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
645 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
646 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
647 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
648 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
649 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
650 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
651 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
652 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
653 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
654 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
655 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
656 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
657 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
658 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
659 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
660 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
661 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
662 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
663 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
664 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
665 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
666 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
667 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
668 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
669 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
670 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
671 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
672 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
673 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
674 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
675 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
676 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
677 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
678 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
679 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
680 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
681 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
682 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
683 };
684
685 static const u32 stoney_golden_settings_a11[] =
686 {
687 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
688 mmGB_GPU_ID, 0x0000000f, 0x00000000,
689 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
690 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
691 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
692 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
693 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
694 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
695 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
696 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
697 };
698
699 static const u32 stoney_golden_common_all[] =
700 {
701 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
702 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
703 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
704 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
705 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
706 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
707 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
708 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
709 };
710
711 static const u32 stoney_mgcg_cgcg_init[] =
712 {
713 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
714 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
715 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
716 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
717 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
718 };
719
720
721 static const char * const sq_edc_source_names[] = {
722 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
723 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
724 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
725 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
726 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
727 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
728 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
729 };
730
731 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
732 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
733 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
734 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
735 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
736 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
737 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
738 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
739
gfx_v8_0_init_golden_registers(struct amdgpu_device * adev)740 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
741 {
742 switch (adev->asic_type) {
743 case CHIP_TOPAZ:
744 amdgpu_device_program_register_sequence(adev,
745 iceland_mgcg_cgcg_init,
746 ARRAY_SIZE(iceland_mgcg_cgcg_init));
747 amdgpu_device_program_register_sequence(adev,
748 golden_settings_iceland_a11,
749 ARRAY_SIZE(golden_settings_iceland_a11));
750 amdgpu_device_program_register_sequence(adev,
751 iceland_golden_common_all,
752 ARRAY_SIZE(iceland_golden_common_all));
753 break;
754 case CHIP_FIJI:
755 amdgpu_device_program_register_sequence(adev,
756 fiji_mgcg_cgcg_init,
757 ARRAY_SIZE(fiji_mgcg_cgcg_init));
758 amdgpu_device_program_register_sequence(adev,
759 golden_settings_fiji_a10,
760 ARRAY_SIZE(golden_settings_fiji_a10));
761 amdgpu_device_program_register_sequence(adev,
762 fiji_golden_common_all,
763 ARRAY_SIZE(fiji_golden_common_all));
764 break;
765
766 case CHIP_TONGA:
767 amdgpu_device_program_register_sequence(adev,
768 tonga_mgcg_cgcg_init,
769 ARRAY_SIZE(tonga_mgcg_cgcg_init));
770 amdgpu_device_program_register_sequence(adev,
771 golden_settings_tonga_a11,
772 ARRAY_SIZE(golden_settings_tonga_a11));
773 amdgpu_device_program_register_sequence(adev,
774 tonga_golden_common_all,
775 ARRAY_SIZE(tonga_golden_common_all));
776 break;
777 case CHIP_VEGAM:
778 amdgpu_device_program_register_sequence(adev,
779 golden_settings_vegam_a11,
780 ARRAY_SIZE(golden_settings_vegam_a11));
781 amdgpu_device_program_register_sequence(adev,
782 vegam_golden_common_all,
783 ARRAY_SIZE(vegam_golden_common_all));
784 break;
785 case CHIP_POLARIS11:
786 case CHIP_POLARIS12:
787 amdgpu_device_program_register_sequence(adev,
788 golden_settings_polaris11_a11,
789 ARRAY_SIZE(golden_settings_polaris11_a11));
790 amdgpu_device_program_register_sequence(adev,
791 polaris11_golden_common_all,
792 ARRAY_SIZE(polaris11_golden_common_all));
793 break;
794 case CHIP_POLARIS10:
795 amdgpu_device_program_register_sequence(adev,
796 golden_settings_polaris10_a11,
797 ARRAY_SIZE(golden_settings_polaris10_a11));
798 amdgpu_device_program_register_sequence(adev,
799 polaris10_golden_common_all,
800 ARRAY_SIZE(polaris10_golden_common_all));
801 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
802 if (adev->pdev->revision == 0xc7 &&
803 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
804 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
805 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
806 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
807 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
808 }
809 break;
810 case CHIP_CARRIZO:
811 amdgpu_device_program_register_sequence(adev,
812 cz_mgcg_cgcg_init,
813 ARRAY_SIZE(cz_mgcg_cgcg_init));
814 amdgpu_device_program_register_sequence(adev,
815 cz_golden_settings_a11,
816 ARRAY_SIZE(cz_golden_settings_a11));
817 amdgpu_device_program_register_sequence(adev,
818 cz_golden_common_all,
819 ARRAY_SIZE(cz_golden_common_all));
820 break;
821 case CHIP_STONEY:
822 amdgpu_device_program_register_sequence(adev,
823 stoney_mgcg_cgcg_init,
824 ARRAY_SIZE(stoney_mgcg_cgcg_init));
825 amdgpu_device_program_register_sequence(adev,
826 stoney_golden_settings_a11,
827 ARRAY_SIZE(stoney_golden_settings_a11));
828 amdgpu_device_program_register_sequence(adev,
829 stoney_golden_common_all,
830 ARRAY_SIZE(stoney_golden_common_all));
831 break;
832 default:
833 break;
834 }
835 }
836
gfx_v8_0_scratch_init(struct amdgpu_device * adev)837 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
838 {
839 adev->gfx.scratch.num_reg = 8;
840 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
841 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
842 }
843
gfx_v8_0_ring_test_ring(struct amdgpu_ring * ring)844 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
845 {
846 struct amdgpu_device *adev = ring->adev;
847 uint32_t scratch;
848 uint32_t tmp = 0;
849 unsigned i;
850 int r;
851
852 r = amdgpu_gfx_scratch_get(adev, &scratch);
853 if (r)
854 return r;
855
856 WREG32(scratch, 0xCAFEDEAD);
857 r = amdgpu_ring_alloc(ring, 3);
858 if (r)
859 goto error_free_scratch;
860
861 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
862 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
863 amdgpu_ring_write(ring, 0xDEADBEEF);
864 amdgpu_ring_commit(ring);
865
866 for (i = 0; i < adev->usec_timeout; i++) {
867 tmp = RREG32(scratch);
868 if (tmp == 0xDEADBEEF)
869 break;
870 udelay(1);
871 }
872
873 if (i >= adev->usec_timeout)
874 r = -ETIMEDOUT;
875
876 error_free_scratch:
877 amdgpu_gfx_scratch_free(adev, scratch);
878 return r;
879 }
880
gfx_v8_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)881 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
882 {
883 struct amdgpu_device *adev = ring->adev;
884 struct amdgpu_ib ib;
885 struct dma_fence *f = NULL;
886
887 unsigned int index;
888 uint64_t gpu_addr;
889 uint32_t tmp;
890 long r;
891
892 r = amdgpu_device_wb_get(adev, &index);
893 if (r)
894 return r;
895
896 gpu_addr = adev->wb.gpu_addr + (index * 4);
897 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
898 memset(&ib, 0, sizeof(ib));
899 r = amdgpu_ib_get(adev, NULL, 16, &ib);
900 if (r)
901 goto err1;
902
903 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
904 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
905 ib.ptr[2] = lower_32_bits(gpu_addr);
906 ib.ptr[3] = upper_32_bits(gpu_addr);
907 ib.ptr[4] = 0xDEADBEEF;
908 ib.length_dw = 5;
909
910 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
911 if (r)
912 goto err2;
913
914 r = dma_fence_wait_timeout(f, false, timeout);
915 if (r == 0) {
916 r = -ETIMEDOUT;
917 goto err2;
918 } else if (r < 0) {
919 goto err2;
920 }
921
922 tmp = adev->wb.wb[index];
923 if (tmp == 0xDEADBEEF)
924 r = 0;
925 else
926 r = -EINVAL;
927
928 err2:
929 amdgpu_ib_free(adev, &ib, NULL);
930 dma_fence_put(f);
931 err1:
932 amdgpu_device_wb_free(adev, index);
933 return r;
934 }
935
936
gfx_v8_0_free_microcode(struct amdgpu_device * adev)937 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
938 {
939 release_firmware(adev->gfx.pfp_fw);
940 adev->gfx.pfp_fw = NULL;
941 release_firmware(adev->gfx.me_fw);
942 adev->gfx.me_fw = NULL;
943 release_firmware(adev->gfx.ce_fw);
944 adev->gfx.ce_fw = NULL;
945 release_firmware(adev->gfx.rlc_fw);
946 adev->gfx.rlc_fw = NULL;
947 release_firmware(adev->gfx.mec_fw);
948 adev->gfx.mec_fw = NULL;
949 if ((adev->asic_type != CHIP_STONEY) &&
950 (adev->asic_type != CHIP_TOPAZ))
951 release_firmware(adev->gfx.mec2_fw);
952 adev->gfx.mec2_fw = NULL;
953
954 kfree(adev->gfx.rlc.register_list_format);
955 }
956
gfx_v8_0_init_microcode(struct amdgpu_device * adev)957 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
958 {
959 const char *chip_name;
960 char fw_name[30];
961 int err;
962 struct amdgpu_firmware_info *info = NULL;
963 const struct common_firmware_header *header = NULL;
964 const struct gfx_firmware_header_v1_0 *cp_hdr;
965 const struct rlc_firmware_header_v2_0 *rlc_hdr;
966 unsigned int *tmp = NULL, i;
967
968 DRM_DEBUG("\n");
969
970 switch (adev->asic_type) {
971 case CHIP_TOPAZ:
972 chip_name = "topaz";
973 break;
974 case CHIP_TONGA:
975 chip_name = "tonga";
976 break;
977 case CHIP_CARRIZO:
978 chip_name = "carrizo";
979 break;
980 case CHIP_FIJI:
981 chip_name = "fiji";
982 break;
983 case CHIP_STONEY:
984 chip_name = "stoney";
985 break;
986 case CHIP_POLARIS10:
987 chip_name = "polaris10";
988 break;
989 case CHIP_POLARIS11:
990 chip_name = "polaris11";
991 break;
992 case CHIP_POLARIS12:
993 chip_name = "polaris12";
994 break;
995 case CHIP_VEGAM:
996 chip_name = "vegam";
997 break;
998 default:
999 BUG();
1000 }
1001
1002 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1003 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1004 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1005 if (err == -ENOENT) {
1006 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1007 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1008 }
1009 } else {
1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012 }
1013 if (err)
1014 goto out;
1015 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1016 if (err)
1017 goto out;
1018 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1019 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1020 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1021
1022 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1023 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1024 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1025 if (err == -ENOENT) {
1026 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1027 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1028 }
1029 } else {
1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032 }
1033 if (err)
1034 goto out;
1035 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1036 if (err)
1037 goto out;
1038 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1039 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1040
1041 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1042
1043 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1044 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1045 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1046 if (err == -ENOENT) {
1047 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1048 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1049 }
1050 } else {
1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053 }
1054 if (err)
1055 goto out;
1056 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1057 if (err)
1058 goto out;
1059 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1060 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1061 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1062
1063 /*
1064 * Support for MCBP/Virtualization in combination with chained IBs is
1065 * formal released on feature version #46
1066 */
1067 if (adev->gfx.ce_feature_version >= 46 &&
1068 adev->gfx.pfp_feature_version >= 46) {
1069 adev->virt.chained_ib_support = true;
1070 DRM_INFO("Chained IB support enabled!\n");
1071 } else
1072 adev->virt.chained_ib_support = false;
1073
1074 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1075 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1076 if (err)
1077 goto out;
1078 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1079 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1080 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1081 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1082
1083 adev->gfx.rlc.save_and_restore_offset =
1084 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1085 adev->gfx.rlc.clear_state_descriptor_offset =
1086 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1087 adev->gfx.rlc.avail_scratch_ram_locations =
1088 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1089 adev->gfx.rlc.reg_restore_list_size =
1090 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1091 adev->gfx.rlc.reg_list_format_start =
1092 le32_to_cpu(rlc_hdr->reg_list_format_start);
1093 adev->gfx.rlc.reg_list_format_separate_start =
1094 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1095 adev->gfx.rlc.starting_offsets_start =
1096 le32_to_cpu(rlc_hdr->starting_offsets_start);
1097 adev->gfx.rlc.reg_list_format_size_bytes =
1098 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1099 adev->gfx.rlc.reg_list_size_bytes =
1100 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1101
1102 adev->gfx.rlc.register_list_format =
1103 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1104 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1105
1106 if (!adev->gfx.rlc.register_list_format) {
1107 err = -ENOMEM;
1108 goto out;
1109 }
1110
1111 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1112 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1113 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1114 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1115
1116 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1117
1118 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1119 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1120 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1121 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1122
1123 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1124 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1125 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1126 if (err == -ENOENT) {
1127 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1128 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1129 }
1130 } else {
1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133 }
1134 if (err)
1135 goto out;
1136 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1137 if (err)
1138 goto out;
1139 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1140 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1141 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1142
1143 if ((adev->asic_type != CHIP_STONEY) &&
1144 (adev->asic_type != CHIP_TOPAZ)) {
1145 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1146 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1147 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1148 if (err == -ENOENT) {
1149 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1150 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1151 }
1152 } else {
1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155 }
1156 if (!err) {
1157 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1158 if (err)
1159 goto out;
1160 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1161 adev->gfx.mec2_fw->data;
1162 adev->gfx.mec2_fw_version =
1163 le32_to_cpu(cp_hdr->header.ucode_version);
1164 adev->gfx.mec2_feature_version =
1165 le32_to_cpu(cp_hdr->ucode_feature_version);
1166 } else {
1167 err = 0;
1168 adev->gfx.mec2_fw = NULL;
1169 }
1170 }
1171
1172 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1173 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1174 info->fw = adev->gfx.pfp_fw;
1175 header = (const struct common_firmware_header *)info->fw->data;
1176 adev->firmware.fw_size +=
1177 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1178
1179 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1180 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1181 info->fw = adev->gfx.me_fw;
1182 header = (const struct common_firmware_header *)info->fw->data;
1183 adev->firmware.fw_size +=
1184 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1185
1186 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1187 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1188 info->fw = adev->gfx.ce_fw;
1189 header = (const struct common_firmware_header *)info->fw->data;
1190 adev->firmware.fw_size +=
1191 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1194 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1195 info->fw = adev->gfx.rlc_fw;
1196 header = (const struct common_firmware_header *)info->fw->data;
1197 adev->firmware.fw_size +=
1198 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
1200 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1201 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1202 info->fw = adev->gfx.mec_fw;
1203 header = (const struct common_firmware_header *)info->fw->data;
1204 adev->firmware.fw_size +=
1205 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1206
1207 /* we need account JT in */
1208 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1209 adev->firmware.fw_size +=
1210 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1211
1212 if (amdgpu_sriov_vf(adev)) {
1213 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1214 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1215 info->fw = adev->gfx.mec_fw;
1216 adev->firmware.fw_size +=
1217 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1218 }
1219
1220 if (adev->gfx.mec2_fw) {
1221 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1222 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1223 info->fw = adev->gfx.mec2_fw;
1224 header = (const struct common_firmware_header *)info->fw->data;
1225 adev->firmware.fw_size +=
1226 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1227 }
1228
1229 out:
1230 if (err) {
1231 dev_err(adev->dev,
1232 "gfx8: Failed to load firmware \"%s\"\n",
1233 fw_name);
1234 release_firmware(adev->gfx.pfp_fw);
1235 adev->gfx.pfp_fw = NULL;
1236 release_firmware(adev->gfx.me_fw);
1237 adev->gfx.me_fw = NULL;
1238 release_firmware(adev->gfx.ce_fw);
1239 adev->gfx.ce_fw = NULL;
1240 release_firmware(adev->gfx.rlc_fw);
1241 adev->gfx.rlc_fw = NULL;
1242 release_firmware(adev->gfx.mec_fw);
1243 adev->gfx.mec_fw = NULL;
1244 release_firmware(adev->gfx.mec2_fw);
1245 adev->gfx.mec2_fw = NULL;
1246 }
1247 return err;
1248 }
1249
gfx_v8_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1250 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1251 volatile u32 *buffer)
1252 {
1253 u32 count = 0, i;
1254 const struct cs_section_def *sect = NULL;
1255 const struct cs_extent_def *ext = NULL;
1256
1257 if (adev->gfx.rlc.cs_data == NULL)
1258 return;
1259 if (buffer == NULL)
1260 return;
1261
1262 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1263 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1264
1265 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1266 buffer[count++] = cpu_to_le32(0x80000000);
1267 buffer[count++] = cpu_to_le32(0x80000000);
1268
1269 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1270 for (ext = sect->section; ext->extent != NULL; ++ext) {
1271 if (sect->id == SECT_CONTEXT) {
1272 buffer[count++] =
1273 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1274 buffer[count++] = cpu_to_le32(ext->reg_index -
1275 PACKET3_SET_CONTEXT_REG_START);
1276 for (i = 0; i < ext->reg_count; i++)
1277 buffer[count++] = cpu_to_le32(ext->extent[i]);
1278 } else {
1279 return;
1280 }
1281 }
1282 }
1283
1284 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1285 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1286 PACKET3_SET_CONTEXT_REG_START);
1287 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1288 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1289
1290 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1291 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1292
1293 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1294 buffer[count++] = cpu_to_le32(0);
1295 }
1296
gfx_v8_0_cp_jump_table_num(struct amdgpu_device * adev)1297 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1298 {
1299 if (adev->asic_type == CHIP_CARRIZO)
1300 return 5;
1301 else
1302 return 4;
1303 }
1304
gfx_v8_0_rlc_init(struct amdgpu_device * adev)1305 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1306 {
1307 const struct cs_section_def *cs_data;
1308 int r;
1309
1310 adev->gfx.rlc.cs_data = vi_cs_data;
1311
1312 cs_data = adev->gfx.rlc.cs_data;
1313
1314 if (cs_data) {
1315 /* init clear state block */
1316 r = amdgpu_gfx_rlc_init_csb(adev);
1317 if (r)
1318 return r;
1319 }
1320
1321 if ((adev->asic_type == CHIP_CARRIZO) ||
1322 (adev->asic_type == CHIP_STONEY)) {
1323 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1324 r = amdgpu_gfx_rlc_init_cpt(adev);
1325 if (r)
1326 return r;
1327 }
1328
1329 return 0;
1330 }
1331
gfx_v8_0_mec_fini(struct amdgpu_device * adev)1332 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1333 {
1334 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1335 }
1336
gfx_v8_0_mec_init(struct amdgpu_device * adev)1337 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1338 {
1339 int r;
1340 u32 *hpd;
1341 size_t mec_hpd_size;
1342
1343 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1344
1345 /* take ownership of the relevant compute queues */
1346 amdgpu_gfx_compute_queue_acquire(adev);
1347
1348 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1349
1350 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1351 AMDGPU_GEM_DOMAIN_VRAM,
1352 &adev->gfx.mec.hpd_eop_obj,
1353 &adev->gfx.mec.hpd_eop_gpu_addr,
1354 (void **)&hpd);
1355 if (r) {
1356 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1357 return r;
1358 }
1359
1360 memset(hpd, 0, mec_hpd_size);
1361
1362 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1363 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364
1365 return 0;
1366 }
1367
1368 static const u32 vgpr_init_compute_shader[] =
1369 {
1370 0x7e000209, 0x7e020208,
1371 0x7e040207, 0x7e060206,
1372 0x7e080205, 0x7e0a0204,
1373 0x7e0c0203, 0x7e0e0202,
1374 0x7e100201, 0x7e120200,
1375 0x7e140209, 0x7e160208,
1376 0x7e180207, 0x7e1a0206,
1377 0x7e1c0205, 0x7e1e0204,
1378 0x7e200203, 0x7e220202,
1379 0x7e240201, 0x7e260200,
1380 0x7e280209, 0x7e2a0208,
1381 0x7e2c0207, 0x7e2e0206,
1382 0x7e300205, 0x7e320204,
1383 0x7e340203, 0x7e360202,
1384 0x7e380201, 0x7e3a0200,
1385 0x7e3c0209, 0x7e3e0208,
1386 0x7e400207, 0x7e420206,
1387 0x7e440205, 0x7e460204,
1388 0x7e480203, 0x7e4a0202,
1389 0x7e4c0201, 0x7e4e0200,
1390 0x7e500209, 0x7e520208,
1391 0x7e540207, 0x7e560206,
1392 0x7e580205, 0x7e5a0204,
1393 0x7e5c0203, 0x7e5e0202,
1394 0x7e600201, 0x7e620200,
1395 0x7e640209, 0x7e660208,
1396 0x7e680207, 0x7e6a0206,
1397 0x7e6c0205, 0x7e6e0204,
1398 0x7e700203, 0x7e720202,
1399 0x7e740201, 0x7e760200,
1400 0x7e780209, 0x7e7a0208,
1401 0x7e7c0207, 0x7e7e0206,
1402 0xbf8a0000, 0xbf810000,
1403 };
1404
1405 static const u32 sgpr_init_compute_shader[] =
1406 {
1407 0xbe8a0100, 0xbe8c0102,
1408 0xbe8e0104, 0xbe900106,
1409 0xbe920108, 0xbe940100,
1410 0xbe960102, 0xbe980104,
1411 0xbe9a0106, 0xbe9c0108,
1412 0xbe9e0100, 0xbea00102,
1413 0xbea20104, 0xbea40106,
1414 0xbea60108, 0xbea80100,
1415 0xbeaa0102, 0xbeac0104,
1416 0xbeae0106, 0xbeb00108,
1417 0xbeb20100, 0xbeb40102,
1418 0xbeb60104, 0xbeb80106,
1419 0xbeba0108, 0xbebc0100,
1420 0xbebe0102, 0xbec00104,
1421 0xbec20106, 0xbec40108,
1422 0xbec60100, 0xbec80102,
1423 0xbee60004, 0xbee70005,
1424 0xbeea0006, 0xbeeb0007,
1425 0xbee80008, 0xbee90009,
1426 0xbefc0000, 0xbf8a0000,
1427 0xbf810000, 0x00000000,
1428 };
1429
1430 static const u32 vgpr_init_regs[] =
1431 {
1432 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1433 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1434 mmCOMPUTE_NUM_THREAD_X, 256*4,
1435 mmCOMPUTE_NUM_THREAD_Y, 1,
1436 mmCOMPUTE_NUM_THREAD_Z, 1,
1437 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1438 mmCOMPUTE_PGM_RSRC2, 20,
1439 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1440 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1441 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1442 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1443 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1444 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1445 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1446 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1447 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1448 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1449 };
1450
1451 static const u32 sgpr1_init_regs[] =
1452 {
1453 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1454 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1455 mmCOMPUTE_NUM_THREAD_X, 256*5,
1456 mmCOMPUTE_NUM_THREAD_Y, 1,
1457 mmCOMPUTE_NUM_THREAD_Z, 1,
1458 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1459 mmCOMPUTE_PGM_RSRC2, 20,
1460 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1461 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1462 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1463 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1464 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1465 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1466 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1467 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1468 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1469 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1470 };
1471
1472 static const u32 sgpr2_init_regs[] =
1473 {
1474 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1475 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1476 mmCOMPUTE_NUM_THREAD_X, 256*5,
1477 mmCOMPUTE_NUM_THREAD_Y, 1,
1478 mmCOMPUTE_NUM_THREAD_Z, 1,
1479 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1480 mmCOMPUTE_PGM_RSRC2, 20,
1481 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1482 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1483 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1484 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1485 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1486 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1487 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1488 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1489 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1490 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1491 };
1492
1493 static const u32 sec_ded_counter_registers[] =
1494 {
1495 mmCPC_EDC_ATC_CNT,
1496 mmCPC_EDC_SCRATCH_CNT,
1497 mmCPC_EDC_UCODE_CNT,
1498 mmCPF_EDC_ATC_CNT,
1499 mmCPF_EDC_ROQ_CNT,
1500 mmCPF_EDC_TAG_CNT,
1501 mmCPG_EDC_ATC_CNT,
1502 mmCPG_EDC_DMA_CNT,
1503 mmCPG_EDC_TAG_CNT,
1504 mmDC_EDC_CSINVOC_CNT,
1505 mmDC_EDC_RESTORE_CNT,
1506 mmDC_EDC_STATE_CNT,
1507 mmGDS_EDC_CNT,
1508 mmGDS_EDC_GRBM_CNT,
1509 mmGDS_EDC_OA_DED,
1510 mmSPI_EDC_CNT,
1511 mmSQC_ATC_EDC_GATCL1_CNT,
1512 mmSQC_EDC_CNT,
1513 mmSQ_EDC_DED_CNT,
1514 mmSQ_EDC_INFO,
1515 mmSQ_EDC_SEC_CNT,
1516 mmTCC_EDC_CNT,
1517 mmTCP_ATC_EDC_GATCL1_CNT,
1518 mmTCP_EDC_CNT,
1519 mmTD_EDC_CNT
1520 };
1521
gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)1522 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1523 {
1524 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1525 struct amdgpu_ib ib;
1526 struct dma_fence *f = NULL;
1527 int r, i;
1528 u32 tmp;
1529 unsigned total_size, vgpr_offset, sgpr_offset;
1530 u64 gpu_addr;
1531
1532 /* only supported on CZ */
1533 if (adev->asic_type != CHIP_CARRIZO)
1534 return 0;
1535
1536 /* bail if the compute ring is not ready */
1537 if (!ring->sched.ready)
1538 return 0;
1539
1540 tmp = RREG32(mmGB_EDC_MODE);
1541 WREG32(mmGB_EDC_MODE, 0);
1542
1543 total_size =
1544 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1545 total_size +=
1546 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1547 total_size +=
1548 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1549 total_size = ALIGN(total_size, 256);
1550 vgpr_offset = total_size;
1551 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1552 sgpr_offset = total_size;
1553 total_size += sizeof(sgpr_init_compute_shader);
1554
1555 /* allocate an indirect buffer to put the commands in */
1556 memset(&ib, 0, sizeof(ib));
1557 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1558 if (r) {
1559 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1560 return r;
1561 }
1562
1563 /* load the compute shaders */
1564 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1565 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1566
1567 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1568 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1569
1570 /* init the ib length to 0 */
1571 ib.length_dw = 0;
1572
1573 /* VGPR */
1574 /* write the register state for the compute dispatch */
1575 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1576 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1577 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1578 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1579 }
1580 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1581 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1582 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1583 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1584 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1585 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1586
1587 /* write dispatch packet */
1588 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1589 ib.ptr[ib.length_dw++] = 8; /* x */
1590 ib.ptr[ib.length_dw++] = 1; /* y */
1591 ib.ptr[ib.length_dw++] = 1; /* z */
1592 ib.ptr[ib.length_dw++] =
1593 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1594
1595 /* write CS partial flush packet */
1596 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1597 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1598
1599 /* SGPR1 */
1600 /* write the register state for the compute dispatch */
1601 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1604 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1605 }
1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1612
1613 /* write dispatch packet */
1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615 ib.ptr[ib.length_dw++] = 8; /* x */
1616 ib.ptr[ib.length_dw++] = 1; /* y */
1617 ib.ptr[ib.length_dw++] = 1; /* z */
1618 ib.ptr[ib.length_dw++] =
1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1620
1621 /* write CS partial flush packet */
1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1624
1625 /* SGPR2 */
1626 /* write the register state for the compute dispatch */
1627 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1631 }
1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639 /* write dispatch packet */
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 ib.ptr[ib.length_dw++] = 8; /* x */
1642 ib.ptr[ib.length_dw++] = 1; /* y */
1643 ib.ptr[ib.length_dw++] = 1; /* z */
1644 ib.ptr[ib.length_dw++] =
1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647 /* write CS partial flush packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651 /* shedule the ib on the ring */
1652 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1653 if (r) {
1654 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1655 goto fail;
1656 }
1657
1658 /* wait for the GPU to finish processing the IB */
1659 r = dma_fence_wait(f, false);
1660 if (r) {
1661 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1662 goto fail;
1663 }
1664
1665 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1666 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1667 WREG32(mmGB_EDC_MODE, tmp);
1668
1669 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1670 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1671 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1672
1673
1674 /* read back registers to clear the counters */
1675 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1676 RREG32(sec_ded_counter_registers[i]);
1677
1678 fail:
1679 amdgpu_ib_free(adev, &ib, NULL);
1680 dma_fence_put(f);
1681
1682 return r;
1683 }
1684
gfx_v8_0_gpu_early_init(struct amdgpu_device * adev)1685 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1686 {
1687 u32 gb_addr_config;
1688 u32 mc_arb_ramcfg;
1689 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1690 u32 tmp;
1691 int ret;
1692
1693 switch (adev->asic_type) {
1694 case CHIP_TOPAZ:
1695 adev->gfx.config.max_shader_engines = 1;
1696 adev->gfx.config.max_tile_pipes = 2;
1697 adev->gfx.config.max_cu_per_sh = 6;
1698 adev->gfx.config.max_sh_per_se = 1;
1699 adev->gfx.config.max_backends_per_se = 2;
1700 adev->gfx.config.max_texture_channel_caches = 2;
1701 adev->gfx.config.max_gprs = 256;
1702 adev->gfx.config.max_gs_threads = 32;
1703 adev->gfx.config.max_hw_contexts = 8;
1704
1705 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1706 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1707 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1708 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1709 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1710 break;
1711 case CHIP_FIJI:
1712 adev->gfx.config.max_shader_engines = 4;
1713 adev->gfx.config.max_tile_pipes = 16;
1714 adev->gfx.config.max_cu_per_sh = 16;
1715 adev->gfx.config.max_sh_per_se = 1;
1716 adev->gfx.config.max_backends_per_se = 4;
1717 adev->gfx.config.max_texture_channel_caches = 16;
1718 adev->gfx.config.max_gprs = 256;
1719 adev->gfx.config.max_gs_threads = 32;
1720 adev->gfx.config.max_hw_contexts = 8;
1721
1722 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1723 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1724 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1725 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1726 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1727 break;
1728 case CHIP_POLARIS11:
1729 case CHIP_POLARIS12:
1730 ret = amdgpu_atombios_get_gfx_info(adev);
1731 if (ret)
1732 return ret;
1733 adev->gfx.config.max_gprs = 256;
1734 adev->gfx.config.max_gs_threads = 32;
1735 adev->gfx.config.max_hw_contexts = 8;
1736
1737 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1738 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1739 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1740 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1741 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1742 break;
1743 case CHIP_POLARIS10:
1744 case CHIP_VEGAM:
1745 ret = amdgpu_atombios_get_gfx_info(adev);
1746 if (ret)
1747 return ret;
1748 adev->gfx.config.max_gprs = 256;
1749 adev->gfx.config.max_gs_threads = 32;
1750 adev->gfx.config.max_hw_contexts = 8;
1751
1752 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1753 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1754 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1755 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1756 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1757 break;
1758 case CHIP_TONGA:
1759 adev->gfx.config.max_shader_engines = 4;
1760 adev->gfx.config.max_tile_pipes = 8;
1761 adev->gfx.config.max_cu_per_sh = 8;
1762 adev->gfx.config.max_sh_per_se = 1;
1763 adev->gfx.config.max_backends_per_se = 2;
1764 adev->gfx.config.max_texture_channel_caches = 8;
1765 adev->gfx.config.max_gprs = 256;
1766 adev->gfx.config.max_gs_threads = 32;
1767 adev->gfx.config.max_hw_contexts = 8;
1768
1769 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1770 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1771 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1772 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1773 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1774 break;
1775 case CHIP_CARRIZO:
1776 adev->gfx.config.max_shader_engines = 1;
1777 adev->gfx.config.max_tile_pipes = 2;
1778 adev->gfx.config.max_sh_per_se = 1;
1779 adev->gfx.config.max_backends_per_se = 2;
1780 adev->gfx.config.max_cu_per_sh = 8;
1781 adev->gfx.config.max_texture_channel_caches = 2;
1782 adev->gfx.config.max_gprs = 256;
1783 adev->gfx.config.max_gs_threads = 32;
1784 adev->gfx.config.max_hw_contexts = 8;
1785
1786 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1787 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1788 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1789 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1790 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1791 break;
1792 case CHIP_STONEY:
1793 adev->gfx.config.max_shader_engines = 1;
1794 adev->gfx.config.max_tile_pipes = 2;
1795 adev->gfx.config.max_sh_per_se = 1;
1796 adev->gfx.config.max_backends_per_se = 1;
1797 adev->gfx.config.max_cu_per_sh = 3;
1798 adev->gfx.config.max_texture_channel_caches = 2;
1799 adev->gfx.config.max_gprs = 256;
1800 adev->gfx.config.max_gs_threads = 16;
1801 adev->gfx.config.max_hw_contexts = 8;
1802
1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1808 break;
1809 default:
1810 adev->gfx.config.max_shader_engines = 2;
1811 adev->gfx.config.max_tile_pipes = 4;
1812 adev->gfx.config.max_cu_per_sh = 2;
1813 adev->gfx.config.max_sh_per_se = 1;
1814 adev->gfx.config.max_backends_per_se = 2;
1815 adev->gfx.config.max_texture_channel_caches = 4;
1816 adev->gfx.config.max_gprs = 256;
1817 adev->gfx.config.max_gs_threads = 32;
1818 adev->gfx.config.max_hw_contexts = 8;
1819
1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825 break;
1826 }
1827
1828 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1829 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1830
1831 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1832 adev->gfx.config.mem_max_burst_length_bytes = 256;
1833 if (adev->flags & AMD_IS_APU) {
1834 /* Get memory bank mapping mode. */
1835 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1836 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1837 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1838
1839 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1840 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1841 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1842
1843 /* Validate settings in case only one DIMM installed. */
1844 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1845 dimm00_addr_map = 0;
1846 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1847 dimm01_addr_map = 0;
1848 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1849 dimm10_addr_map = 0;
1850 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1851 dimm11_addr_map = 0;
1852
1853 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1854 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1855 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1856 adev->gfx.config.mem_row_size_in_kb = 2;
1857 else
1858 adev->gfx.config.mem_row_size_in_kb = 1;
1859 } else {
1860 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1861 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1862 if (adev->gfx.config.mem_row_size_in_kb > 4)
1863 adev->gfx.config.mem_row_size_in_kb = 4;
1864 }
1865
1866 adev->gfx.config.shader_engine_tile_size = 32;
1867 adev->gfx.config.num_gpus = 1;
1868 adev->gfx.config.multi_gpu_tile_size = 64;
1869
1870 /* fix up row size */
1871 switch (adev->gfx.config.mem_row_size_in_kb) {
1872 case 1:
1873 default:
1874 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1875 break;
1876 case 2:
1877 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1878 break;
1879 case 4:
1880 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1881 break;
1882 }
1883 adev->gfx.config.gb_addr_config = gb_addr_config;
1884
1885 return 0;
1886 }
1887
gfx_v8_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)1888 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1889 int mec, int pipe, int queue)
1890 {
1891 int r;
1892 unsigned irq_type;
1893 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1894
1895 ring = &adev->gfx.compute_ring[ring_id];
1896
1897 /* mec0 is me1 */
1898 ring->me = mec + 1;
1899 ring->pipe = pipe;
1900 ring->queue = queue;
1901
1902 ring->ring_obj = NULL;
1903 ring->use_doorbell = true;
1904 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1905 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1906 + (ring_id * GFX8_MEC_HPD_SIZE);
1907 snprintf(ring->name, sizeof ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1908
1909 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1910 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1911 + ring->pipe;
1912
1913 /* type-2 packets are deprecated on MEC, use type-3 instead */
1914 r = amdgpu_ring_init(adev, ring, 1024,
1915 &adev->gfx.eop_irq, irq_type);
1916 if (r)
1917 return r;
1918
1919
1920 return 0;
1921 }
1922
1923 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1924
gfx_v8_0_sw_init(void * handle)1925 static int gfx_v8_0_sw_init(void *handle)
1926 {
1927 int i, j, k, r, ring_id;
1928 struct amdgpu_ring *ring;
1929 struct amdgpu_kiq *kiq;
1930 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1931
1932 switch (adev->asic_type) {
1933 case CHIP_TONGA:
1934 case CHIP_CARRIZO:
1935 case CHIP_FIJI:
1936 case CHIP_POLARIS10:
1937 case CHIP_POLARIS11:
1938 case CHIP_POLARIS12:
1939 case CHIP_VEGAM:
1940 adev->gfx.mec.num_mec = 2;
1941 break;
1942 case CHIP_TOPAZ:
1943 case CHIP_STONEY:
1944 default:
1945 adev->gfx.mec.num_mec = 1;
1946 break;
1947 }
1948
1949 adev->gfx.mec.num_pipe_per_mec = 4;
1950 adev->gfx.mec.num_queue_per_pipe = 8;
1951
1952 /* EOP Event */
1953 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1954 if (r)
1955 return r;
1956
1957 /* Privileged reg */
1958 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1959 &adev->gfx.priv_reg_irq);
1960 if (r)
1961 return r;
1962
1963 /* Privileged inst */
1964 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1965 &adev->gfx.priv_inst_irq);
1966 if (r)
1967 return r;
1968
1969 /* Add CP EDC/ECC irq */
1970 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1971 &adev->gfx.cp_ecc_error_irq);
1972 if (r)
1973 return r;
1974
1975 /* SQ interrupts. */
1976 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1977 &adev->gfx.sq_irq);
1978 if (r) {
1979 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1980 return r;
1981 }
1982
1983 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1984
1985 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1986
1987 gfx_v8_0_scratch_init(adev);
1988
1989 r = gfx_v8_0_init_microcode(adev);
1990 if (r) {
1991 DRM_ERROR("Failed to load gfx firmware!\n");
1992 return r;
1993 }
1994
1995 r = adev->gfx.rlc.funcs->init(adev);
1996 if (r) {
1997 DRM_ERROR("Failed to init rlc BOs!\n");
1998 return r;
1999 }
2000
2001 r = gfx_v8_0_mec_init(adev);
2002 if (r) {
2003 DRM_ERROR("Failed to init MEC BOs!\n");
2004 return r;
2005 }
2006
2007 /* set up the gfx ring */
2008 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2009 ring = &adev->gfx.gfx_ring[i];
2010 ring->ring_obj = NULL;
2011 snprintf(ring->name, sizeof ring->name, "gfx");
2012 /* no gfx doorbells on iceland */
2013 if (adev->asic_type != CHIP_TOPAZ) {
2014 ring->use_doorbell = true;
2015 ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2016 }
2017
2018 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2019 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2020 if (r)
2021 return r;
2022 }
2023
2024
2025 /* set up the compute queues - allocate horizontally across pipes */
2026 ring_id = 0;
2027 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2028 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2029 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2030 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2031 continue;
2032
2033 r = gfx_v8_0_compute_ring_init(adev,
2034 ring_id,
2035 i, k, j);
2036 if (r)
2037 return r;
2038
2039 ring_id++;
2040 }
2041 }
2042 }
2043
2044 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2045 if (r) {
2046 DRM_ERROR("Failed to init KIQ BOs!\n");
2047 return r;
2048 }
2049
2050 kiq = &adev->gfx.kiq;
2051 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2052 if (r)
2053 return r;
2054
2055 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2056 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2057 if (r)
2058 return r;
2059
2060 adev->gfx.ce_ram_size = 0x8000;
2061
2062 r = gfx_v8_0_gpu_early_init(adev);
2063 if (r)
2064 return r;
2065
2066 return 0;
2067 }
2068
gfx_v8_0_sw_fini(void * handle)2069 static int gfx_v8_0_sw_fini(void *handle)
2070 {
2071 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2072 int i;
2073
2074 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2075 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2076 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2077 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2078
2079 amdgpu_gfx_mqd_sw_fini(adev);
2080 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2081 amdgpu_gfx_kiq_fini(adev);
2082
2083 gfx_v8_0_mec_fini(adev);
2084 amdgpu_gfx_rlc_fini(adev);
2085 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2086 &adev->gfx.rlc.clear_state_gpu_addr,
2087 (void **)__UNVOLATILE(&adev->gfx.rlc.cs_ptr));
2088 if ((adev->asic_type == CHIP_CARRIZO) ||
2089 (adev->asic_type == CHIP_STONEY)) {
2090 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2091 &adev->gfx.rlc.cp_table_gpu_addr,
2092 (void **)__UNVOLATILE(&adev->gfx.rlc.cp_table_ptr));
2093 }
2094 gfx_v8_0_free_microcode(adev);
2095
2096 return 0;
2097 }
2098
gfx_v8_0_tiling_mode_table_init(struct amdgpu_device * adev)2099 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2100 {
2101 uint32_t *modearray, *mod2array;
2102 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2103 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2104 u32 reg_offset;
2105
2106 modearray = adev->gfx.config.tile_mode_array;
2107 mod2array = adev->gfx.config.macrotile_mode_array;
2108
2109 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2110 modearray[reg_offset] = 0;
2111
2112 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2113 mod2array[reg_offset] = 0;
2114
2115 switch (adev->asic_type) {
2116 case CHIP_TOPAZ:
2117 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 PIPE_CONFIG(ADDR_SURF_P2) |
2119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2120 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2121 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122 PIPE_CONFIG(ADDR_SURF_P2) |
2123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2125 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2126 PIPE_CONFIG(ADDR_SURF_P2) |
2127 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2129 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 PIPE_CONFIG(ADDR_SURF_P2) |
2131 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2133 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134 PIPE_CONFIG(ADDR_SURF_P2) |
2135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2138 PIPE_CONFIG(ADDR_SURF_P2) |
2139 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2141 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2142 PIPE_CONFIG(ADDR_SURF_P2) |
2143 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2144 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2145 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2146 PIPE_CONFIG(ADDR_SURF_P2));
2147 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 PIPE_CONFIG(ADDR_SURF_P2) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2151 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 PIPE_CONFIG(ADDR_SURF_P2) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2159 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2171 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2172 PIPE_CONFIG(ADDR_SURF_P2) |
2173 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2175 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2176 PIPE_CONFIG(ADDR_SURF_P2) |
2177 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2179 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2180 PIPE_CONFIG(ADDR_SURF_P2) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2183 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2184 PIPE_CONFIG(ADDR_SURF_P2) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2187 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2188 PIPE_CONFIG(ADDR_SURF_P2) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2191 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2192 PIPE_CONFIG(ADDR_SURF_P2) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2195 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2196 PIPE_CONFIG(ADDR_SURF_P2) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2199 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2200 PIPE_CONFIG(ADDR_SURF_P2) |
2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2203 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2204 PIPE_CONFIG(ADDR_SURF_P2) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2207 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2219
2220 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2221 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2222 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2223 NUM_BANKS(ADDR_SURF_8_BANK));
2224 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2227 NUM_BANKS(ADDR_SURF_8_BANK));
2228 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2231 NUM_BANKS(ADDR_SURF_8_BANK));
2232 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2235 NUM_BANKS(ADDR_SURF_8_BANK));
2236 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239 NUM_BANKS(ADDR_SURF_8_BANK));
2240 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2243 NUM_BANKS(ADDR_SURF_8_BANK));
2244 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2245 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2246 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2247 NUM_BANKS(ADDR_SURF_8_BANK));
2248 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251 NUM_BANKS(ADDR_SURF_16_BANK));
2252 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2255 NUM_BANKS(ADDR_SURF_16_BANK));
2256 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2259 NUM_BANKS(ADDR_SURF_16_BANK));
2260 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2263 NUM_BANKS(ADDR_SURF_16_BANK));
2264 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267 NUM_BANKS(ADDR_SURF_16_BANK));
2268 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271 NUM_BANKS(ADDR_SURF_16_BANK));
2272 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 NUM_BANKS(ADDR_SURF_8_BANK));
2276
2277 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2278 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2279 reg_offset != 23)
2280 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2281
2282 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2283 if (reg_offset != 7)
2284 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2285
2286 break;
2287 case CHIP_FIJI:
2288 case CHIP_VEGAM:
2289 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2293 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2297 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2301 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2305 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2309 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2313 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2318 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2321 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2323 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2327 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2331 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2335 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2336 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2339 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2347 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2351 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2355 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2359 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2363 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2367 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2371 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2372 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2375 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2379 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2380 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2383 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2384 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2387 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2388 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2391 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2392 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2407 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2411
2412 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415 NUM_BANKS(ADDR_SURF_8_BANK));
2416 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2419 NUM_BANKS(ADDR_SURF_8_BANK));
2420 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2423 NUM_BANKS(ADDR_SURF_8_BANK));
2424 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427 NUM_BANKS(ADDR_SURF_8_BANK));
2428 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2431 NUM_BANKS(ADDR_SURF_8_BANK));
2432 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2435 NUM_BANKS(ADDR_SURF_8_BANK));
2436 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2439 NUM_BANKS(ADDR_SURF_8_BANK));
2440 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 NUM_BANKS(ADDR_SURF_8_BANK));
2444 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447 NUM_BANKS(ADDR_SURF_8_BANK));
2448 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451 NUM_BANKS(ADDR_SURF_8_BANK));
2452 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 NUM_BANKS(ADDR_SURF_8_BANK));
2456 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459 NUM_BANKS(ADDR_SURF_8_BANK));
2460 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463 NUM_BANKS(ADDR_SURF_8_BANK));
2464 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 NUM_BANKS(ADDR_SURF_4_BANK));
2468
2469 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2470 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2471
2472 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2473 if (reg_offset != 7)
2474 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2475
2476 break;
2477 case CHIP_TONGA:
2478 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2479 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2481 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2482 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2485 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2486 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2487 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2490 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2491 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2492 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2494 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2498 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2499 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2502 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2506 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2507 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2510 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2511 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2512 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2516 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2520 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2524 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2525 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2528 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2544 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2552 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2556 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2560 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2561 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2564 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2565 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2568 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2569 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2570 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2572 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2573 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2576 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2577 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2580 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2581 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2585 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2588 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2596 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2597 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2600
2601 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2604 NUM_BANKS(ADDR_SURF_16_BANK));
2605 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2608 NUM_BANKS(ADDR_SURF_16_BANK));
2609 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2612 NUM_BANKS(ADDR_SURF_16_BANK));
2613 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2616 NUM_BANKS(ADDR_SURF_16_BANK));
2617 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2620 NUM_BANKS(ADDR_SURF_16_BANK));
2621 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2624 NUM_BANKS(ADDR_SURF_16_BANK));
2625 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2628 NUM_BANKS(ADDR_SURF_16_BANK));
2629 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2631 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2632 NUM_BANKS(ADDR_SURF_16_BANK));
2633 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636 NUM_BANKS(ADDR_SURF_16_BANK));
2637 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2640 NUM_BANKS(ADDR_SURF_16_BANK));
2641 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2644 NUM_BANKS(ADDR_SURF_16_BANK));
2645 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2648 NUM_BANKS(ADDR_SURF_8_BANK));
2649 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2652 NUM_BANKS(ADDR_SURF_4_BANK));
2653 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656 NUM_BANKS(ADDR_SURF_4_BANK));
2657
2658 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2659 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2660
2661 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2662 if (reg_offset != 7)
2663 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2664
2665 break;
2666 case CHIP_POLARIS11:
2667 case CHIP_POLARIS12:
2668 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2672 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2684 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2701 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2702 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2706 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2710 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2714 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2718 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2738 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2742 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2746 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2750 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2754 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2778 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2782 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2786 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2790
2791 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2793 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 NUM_BANKS(ADDR_SURF_16_BANK));
2795
2796 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2799 NUM_BANKS(ADDR_SURF_16_BANK));
2800
2801 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2804 NUM_BANKS(ADDR_SURF_16_BANK));
2805
2806 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2809 NUM_BANKS(ADDR_SURF_16_BANK));
2810
2811 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2812 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2813 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2814 NUM_BANKS(ADDR_SURF_16_BANK));
2815
2816 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819 NUM_BANKS(ADDR_SURF_16_BANK));
2820
2821 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854 NUM_BANKS(ADDR_SURF_8_BANK));
2855
2856 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859 NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2863
2864 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2865 if (reg_offset != 7)
2866 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2867
2868 break;
2869 case CHIP_POLARIS10:
2870 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2874 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2878 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2882 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2886 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2891 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2895 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2899 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2900 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2903 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2904 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2916 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2920 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2936 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2940 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2944 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2948 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2952 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2956 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2961 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2965 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2977 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2980 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2984 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2988 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2992
2993 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 NUM_BANKS(ADDR_SURF_16_BANK));
2997
2998 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3000 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3001 NUM_BANKS(ADDR_SURF_16_BANK));
3002
3003 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 NUM_BANKS(ADDR_SURF_16_BANK));
3007
3008 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 NUM_BANKS(ADDR_SURF_16_BANK));
3012
3013 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3016 NUM_BANKS(ADDR_SURF_16_BANK));
3017
3018 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3021 NUM_BANKS(ADDR_SURF_16_BANK));
3022
3023 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3026 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3041 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051 NUM_BANKS(ADDR_SURF_8_BANK));
3052
3053 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056 NUM_BANKS(ADDR_SURF_4_BANK));
3057
3058 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061 NUM_BANKS(ADDR_SURF_4_BANK));
3062
3063 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3064 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3065
3066 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3067 if (reg_offset != 7)
3068 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3069
3070 break;
3071 case CHIP_STONEY:
3072 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3073 PIPE_CONFIG(ADDR_SURF_P2) |
3074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3076 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077 PIPE_CONFIG(ADDR_SURF_P2) |
3078 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3080 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081 PIPE_CONFIG(ADDR_SURF_P2) |
3082 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3084 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3085 PIPE_CONFIG(ADDR_SURF_P2) |
3086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3088 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089 PIPE_CONFIG(ADDR_SURF_P2) |
3090 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3093 PIPE_CONFIG(ADDR_SURF_P2) |
3094 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3097 PIPE_CONFIG(ADDR_SURF_P2) |
3098 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3100 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3101 PIPE_CONFIG(ADDR_SURF_P2));
3102 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3106 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3110 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3114 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3126 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3130 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3134 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3138 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3142 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3146 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3163 PIPE_CONFIG(ADDR_SURF_P2) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3166 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3167 PIPE_CONFIG(ADDR_SURF_P2) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3170 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3174
3175 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3178 NUM_BANKS(ADDR_SURF_8_BANK));
3179 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3182 NUM_BANKS(ADDR_SURF_8_BANK));
3183 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3186 NUM_BANKS(ADDR_SURF_8_BANK));
3187 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3190 NUM_BANKS(ADDR_SURF_8_BANK));
3191 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3194 NUM_BANKS(ADDR_SURF_8_BANK));
3195 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3198 NUM_BANKS(ADDR_SURF_8_BANK));
3199 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202 NUM_BANKS(ADDR_SURF_8_BANK));
3203 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206 NUM_BANKS(ADDR_SURF_16_BANK));
3207 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210 NUM_BANKS(ADDR_SURF_16_BANK));
3211 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214 NUM_BANKS(ADDR_SURF_16_BANK));
3215 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3218 NUM_BANKS(ADDR_SURF_16_BANK));
3219 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222 NUM_BANKS(ADDR_SURF_16_BANK));
3223 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3226 NUM_BANKS(ADDR_SURF_16_BANK));
3227 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3230 NUM_BANKS(ADDR_SURF_8_BANK));
3231
3232 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3233 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3234 reg_offset != 23)
3235 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3236
3237 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3238 if (reg_offset != 7)
3239 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3240
3241 break;
3242 default:
3243 dev_warn(adev->dev,
3244 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3245 adev->asic_type);
3246 /* fall through */
3247
3248 case CHIP_CARRIZO:
3249 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3250 PIPE_CONFIG(ADDR_SURF_P2) |
3251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3253 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3257 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258 PIPE_CONFIG(ADDR_SURF_P2) |
3259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3261 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3262 PIPE_CONFIG(ADDR_SURF_P2) |
3263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3265 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266 PIPE_CONFIG(ADDR_SURF_P2) |
3267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3270 PIPE_CONFIG(ADDR_SURF_P2) |
3271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3274 PIPE_CONFIG(ADDR_SURF_P2) |
3275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3278 PIPE_CONFIG(ADDR_SURF_P2));
3279 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3283 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3291 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3315 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3319 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3323 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3324 PIPE_CONFIG(ADDR_SURF_P2) |
3325 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3328 PIPE_CONFIG(ADDR_SURF_P2) |
3329 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3336 PIPE_CONFIG(ADDR_SURF_P2) |
3337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3340 PIPE_CONFIG(ADDR_SURF_P2) |
3341 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3343 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3344 PIPE_CONFIG(ADDR_SURF_P2) |
3345 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3347 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3348 PIPE_CONFIG(ADDR_SURF_P2) |
3349 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3351
3352 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 NUM_BANKS(ADDR_SURF_8_BANK));
3356 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359 NUM_BANKS(ADDR_SURF_8_BANK));
3360 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3363 NUM_BANKS(ADDR_SURF_8_BANK));
3364 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3367 NUM_BANKS(ADDR_SURF_8_BANK));
3368 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371 NUM_BANKS(ADDR_SURF_8_BANK));
3372 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3375 NUM_BANKS(ADDR_SURF_8_BANK));
3376 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379 NUM_BANKS(ADDR_SURF_8_BANK));
3380 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3383 NUM_BANKS(ADDR_SURF_16_BANK));
3384 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387 NUM_BANKS(ADDR_SURF_16_BANK));
3388 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391 NUM_BANKS(ADDR_SURF_16_BANK));
3392 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395 NUM_BANKS(ADDR_SURF_16_BANK));
3396 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399 NUM_BANKS(ADDR_SURF_16_BANK));
3400 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403 NUM_BANKS(ADDR_SURF_16_BANK));
3404 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407 NUM_BANKS(ADDR_SURF_8_BANK));
3408
3409 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3410 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3411 reg_offset != 23)
3412 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3413
3414 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3415 if (reg_offset != 7)
3416 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3417
3418 break;
3419 }
3420 }
3421
gfx_v8_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance)3422 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3423 u32 se_num, u32 sh_num, u32 instance)
3424 {
3425 u32 data;
3426
3427 if (instance == 0xffffffff)
3428 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3429 else
3430 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3431
3432 if (se_num == 0xffffffff)
3433 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3434 else
3435 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3436
3437 if (sh_num == 0xffffffff)
3438 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3439 else
3440 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3441
3442 WREG32(mmGRBM_GFX_INDEX, data);
3443 }
3444
gfx_v8_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm)3445 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3446 u32 me, u32 pipe, u32 q, u32 vm)
3447 {
3448 vi_srbm_select(adev, me, pipe, q, vm);
3449 }
3450
gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device * adev)3451 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3452 {
3453 u32 data, mask;
3454
3455 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3456 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3457
3458 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3459
3460 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3461 adev->gfx.config.max_sh_per_se);
3462
3463 return (~data) & mask;
3464 }
3465
3466 static void
gfx_v8_0_raster_config(struct amdgpu_device * adev,u32 * rconf,u32 * rconf1)3467 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3468 {
3469 switch (adev->asic_type) {
3470 case CHIP_FIJI:
3471 case CHIP_VEGAM:
3472 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3473 RB_XSEL2(1) | PKR_MAP(2) |
3474 PKR_XSEL(1) | PKR_YSEL(1) |
3475 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3476 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3477 SE_PAIR_YSEL(2);
3478 break;
3479 case CHIP_TONGA:
3480 case CHIP_POLARIS10:
3481 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3482 SE_XSEL(1) | SE_YSEL(1);
3483 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3484 SE_PAIR_YSEL(2);
3485 break;
3486 case CHIP_TOPAZ:
3487 case CHIP_CARRIZO:
3488 *rconf |= RB_MAP_PKR0(2);
3489 *rconf1 |= 0x0;
3490 break;
3491 case CHIP_POLARIS11:
3492 case CHIP_POLARIS12:
3493 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3494 SE_XSEL(1) | SE_YSEL(1);
3495 *rconf1 |= 0x0;
3496 break;
3497 case CHIP_STONEY:
3498 *rconf |= 0x0;
3499 *rconf1 |= 0x0;
3500 break;
3501 default:
3502 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3503 break;
3504 }
3505 }
3506
3507 static void
gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device * adev,u32 raster_config,u32 raster_config_1,unsigned rb_mask,unsigned num_rb)3508 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3509 u32 raster_config, u32 raster_config_1,
3510 unsigned rb_mask, unsigned num_rb)
3511 {
3512 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3513 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3514 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3515 unsigned rb_per_se = num_rb / num_se;
3516 unsigned se_mask[4];
3517 unsigned se;
3518
3519 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3520 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3521 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3522 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3523
3524 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3525 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3526 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3527
3528 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3529 (!se_mask[2] && !se_mask[3]))) {
3530 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3531
3532 if (!se_mask[0] && !se_mask[1]) {
3533 raster_config_1 |=
3534 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3535 } else {
3536 raster_config_1 |=
3537 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3538 }
3539 }
3540
3541 for (se = 0; se < num_se; se++) {
3542 unsigned raster_config_se = raster_config;
3543 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3544 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3545 int idx = (se / 2) * 2;
3546
3547 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3548 raster_config_se &= ~SE_MAP_MASK;
3549
3550 if (!se_mask[idx]) {
3551 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3552 } else {
3553 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3554 }
3555 }
3556
3557 pkr0_mask &= rb_mask;
3558 pkr1_mask &= rb_mask;
3559 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3560 raster_config_se &= ~PKR_MAP_MASK;
3561
3562 if (!pkr0_mask) {
3563 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3564 } else {
3565 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3566 }
3567 }
3568
3569 if (rb_per_se >= 2) {
3570 unsigned rb0_mask = 1 << (se * rb_per_se);
3571 unsigned rb1_mask = rb0_mask << 1;
3572
3573 rb0_mask &= rb_mask;
3574 rb1_mask &= rb_mask;
3575 if (!rb0_mask || !rb1_mask) {
3576 raster_config_se &= ~RB_MAP_PKR0_MASK;
3577
3578 if (!rb0_mask) {
3579 raster_config_se |=
3580 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3581 } else {
3582 raster_config_se |=
3583 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3584 }
3585 }
3586
3587 if (rb_per_se > 2) {
3588 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3589 rb1_mask = rb0_mask << 1;
3590 rb0_mask &= rb_mask;
3591 rb1_mask &= rb_mask;
3592 if (!rb0_mask || !rb1_mask) {
3593 raster_config_se &= ~RB_MAP_PKR1_MASK;
3594
3595 if (!rb0_mask) {
3596 raster_config_se |=
3597 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3598 } else {
3599 raster_config_se |=
3600 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3601 }
3602 }
3603 }
3604 }
3605
3606 /* GRBM_GFX_INDEX has a different offset on VI */
3607 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3608 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3609 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3610 }
3611
3612 /* GRBM_GFX_INDEX has a different offset on VI */
3613 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3614 }
3615
gfx_v8_0_setup_rb(struct amdgpu_device * adev)3616 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3617 {
3618 int i, j;
3619 u32 data;
3620 u32 raster_config = 0, raster_config_1 = 0;
3621 u32 active_rbs = 0;
3622 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3623 adev->gfx.config.max_sh_per_se;
3624 unsigned num_rb_pipes;
3625
3626 mutex_lock(&adev->grbm_idx_mutex);
3627 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3628 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3629 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3630 data = gfx_v8_0_get_rb_active_bitmap(adev);
3631 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3632 rb_bitmap_width_per_sh);
3633 }
3634 }
3635 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3636
3637 adev->gfx.config.backend_enable_mask = active_rbs;
3638 adev->gfx.config.num_rbs = hweight32(active_rbs);
3639
3640 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3641 adev->gfx.config.max_shader_engines, 16);
3642
3643 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3644
3645 if (!adev->gfx.config.backend_enable_mask ||
3646 adev->gfx.config.num_rbs >= num_rb_pipes) {
3647 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3648 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3649 } else {
3650 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3651 adev->gfx.config.backend_enable_mask,
3652 num_rb_pipes);
3653 }
3654
3655 /* cache the values for userspace */
3656 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3657 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3658 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3659 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3660 RREG32(mmCC_RB_BACKEND_DISABLE);
3661 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3662 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3663 adev->gfx.config.rb_config[i][j].raster_config =
3664 RREG32(mmPA_SC_RASTER_CONFIG);
3665 adev->gfx.config.rb_config[i][j].raster_config_1 =
3666 RREG32(mmPA_SC_RASTER_CONFIG_1);
3667 }
3668 }
3669 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3670 mutex_unlock(&adev->grbm_idx_mutex);
3671 }
3672
3673 /**
3674 * gfx_v8_0_init_compute_vmid - gart enable
3675 *
3676 * @adev: amdgpu_device pointer
3677 *
3678 * Initialize compute vmid sh_mem registers
3679 *
3680 */
3681 #define DEFAULT_SH_MEM_BASES (0x6000)
3682 #define FIRST_COMPUTE_VMID (8)
3683 #define LAST_COMPUTE_VMID (16)
gfx_v8_0_init_compute_vmid(struct amdgpu_device * adev)3684 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3685 {
3686 int i;
3687 uint32_t sh_mem_config;
3688 uint32_t sh_mem_bases;
3689
3690 /*
3691 * Configure apertures:
3692 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3693 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3694 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3695 */
3696 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3697
3698 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3699 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3700 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3701 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3702 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3703 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3704
3705 mutex_lock(&adev->srbm_mutex);
3706 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3707 vi_srbm_select(adev, 0, 0, 0, i);
3708 /* CP and shaders */
3709 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3710 WREG32(mmSH_MEM_APE1_BASE, 1);
3711 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3712 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3713 }
3714 vi_srbm_select(adev, 0, 0, 0, 0);
3715 mutex_unlock(&adev->srbm_mutex);
3716
3717 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3718 acccess. These should be enabled by FW for target VMIDs. */
3719 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3720 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3721 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3722 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3723 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3724 }
3725 }
3726
gfx_v8_0_init_gds_vmid(struct amdgpu_device * adev)3727 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3728 {
3729 int vmid;
3730
3731 /*
3732 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3733 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3734 * the driver can enable them for graphics. VMID0 should maintain
3735 * access so that HWS firmware can save/restore entries.
3736 */
3737 for (vmid = 1; vmid < 16; vmid++) {
3738 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3739 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3740 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3741 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3742 }
3743 }
3744
gfx_v8_0_config_init(struct amdgpu_device * adev)3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3746 {
3747 switch (adev->asic_type) {
3748 default:
3749 adev->gfx.config.double_offchip_lds_buf = 1;
3750 break;
3751 case CHIP_CARRIZO:
3752 case CHIP_STONEY:
3753 adev->gfx.config.double_offchip_lds_buf = 0;
3754 break;
3755 }
3756 }
3757
gfx_v8_0_constants_init(struct amdgpu_device * adev)3758 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3759 {
3760 u32 tmp, sh_static_mem_cfg;
3761 int i;
3762
3763 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3764 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3767
3768 gfx_v8_0_tiling_mode_table_init(adev);
3769 gfx_v8_0_setup_rb(adev);
3770 gfx_v8_0_get_cu_info(adev);
3771 gfx_v8_0_config_init(adev);
3772
3773 /* XXX SH_MEM regs */
3774 /* where to put LDS, scratch, GPUVM in FSA64 space */
3775 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3776 SWIZZLE_ENABLE, 1);
3777 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3778 ELEMENT_SIZE, 1);
3779 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3780 INDEX_STRIDE, 3);
3781 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3782
3783 mutex_lock(&adev->srbm_mutex);
3784 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3785 vi_srbm_select(adev, 0, 0, 0, i);
3786 /* CP and shaders */
3787 if (i == 0) {
3788 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3789 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792 WREG32(mmSH_MEM_CONFIG, tmp);
3793 WREG32(mmSH_MEM_BASES, 0);
3794 } else {
3795 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3796 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799 WREG32(mmSH_MEM_CONFIG, tmp);
3800 tmp = adev->gmc.shared_aperture_start >> 48;
3801 WREG32(mmSH_MEM_BASES, tmp);
3802 }
3803
3804 WREG32(mmSH_MEM_APE1_BASE, 1);
3805 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3806 }
3807 vi_srbm_select(adev, 0, 0, 0, 0);
3808 mutex_unlock(&adev->srbm_mutex);
3809
3810 gfx_v8_0_init_compute_vmid(adev);
3811 gfx_v8_0_init_gds_vmid(adev);
3812
3813 mutex_lock(&adev->grbm_idx_mutex);
3814 /*
3815 * making sure that the following register writes will be broadcasted
3816 * to all the shaders
3817 */
3818 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3819
3820 WREG32(mmPA_SC_FIFO_SIZE,
3821 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3822 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3823 (adev->gfx.config.sc_prim_fifo_size_backend <<
3824 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3825 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3826 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3827 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3828 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3829
3830 tmp = RREG32(mmSPI_ARB_PRIORITY);
3831 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3832 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3833 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3834 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3835 WREG32(mmSPI_ARB_PRIORITY, tmp);
3836
3837 mutex_unlock(&adev->grbm_idx_mutex);
3838
3839 }
3840
gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device * adev)3841 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3842 {
3843 u32 i, j, k;
3844 u32 mask;
3845
3846 mutex_lock(&adev->grbm_idx_mutex);
3847 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3848 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3849 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3850 for (k = 0; k < adev->usec_timeout; k++) {
3851 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3852 break;
3853 udelay(1);
3854 }
3855 if (k == adev->usec_timeout) {
3856 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3857 0xffffffff, 0xffffffff);
3858 mutex_unlock(&adev->grbm_idx_mutex);
3859 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3860 i, j);
3861 return;
3862 }
3863 }
3864 }
3865 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3866 mutex_unlock(&adev->grbm_idx_mutex);
3867
3868 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3869 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3870 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3871 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3872 for (k = 0; k < adev->usec_timeout; k++) {
3873 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3874 break;
3875 udelay(1);
3876 }
3877 }
3878
gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)3879 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3880 bool enable)
3881 {
3882 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3883
3884 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3885 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3886 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3887 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3888
3889 WREG32(mmCP_INT_CNTL_RING0, tmp);
3890 }
3891
gfx_v8_0_init_csb(struct amdgpu_device * adev)3892 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3893 {
3894 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3895 /* csib */
3896 WREG32(mmRLC_CSIB_ADDR_HI,
3897 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3898 WREG32(mmRLC_CSIB_ADDR_LO,
3899 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3900 WREG32(mmRLC_CSIB_LENGTH,
3901 adev->gfx.rlc.clear_state_size);
3902 }
3903
gfx_v8_0_parse_ind_reg_list(int * register_list_format,int ind_offset,int list_size,int * unique_indices,int * indices_count,int max_indices,int * ind_start_offsets,int * offset_count,int max_offset)3904 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3905 int ind_offset,
3906 int list_size,
3907 int *unique_indices,
3908 int *indices_count,
3909 int max_indices,
3910 int *ind_start_offsets,
3911 int *offset_count,
3912 int max_offset)
3913 {
3914 int indices;
3915 bool new_entry = true;
3916
3917 for (; ind_offset < list_size; ind_offset++) {
3918
3919 if (new_entry) {
3920 new_entry = false;
3921 ind_start_offsets[*offset_count] = ind_offset;
3922 *offset_count = *offset_count + 1;
3923 BUG_ON(*offset_count >= max_offset);
3924 }
3925
3926 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3927 new_entry = true;
3928 continue;
3929 }
3930
3931 ind_offset += 2;
3932
3933 /* look for the matching indice */
3934 for (indices = 0;
3935 indices < *indices_count;
3936 indices++) {
3937 if (unique_indices[indices] ==
3938 register_list_format[ind_offset])
3939 break;
3940 }
3941
3942 if (indices >= *indices_count) {
3943 unique_indices[*indices_count] =
3944 register_list_format[ind_offset];
3945 indices = *indices_count;
3946 *indices_count = *indices_count + 1;
3947 BUG_ON(*indices_count >= max_indices);
3948 }
3949
3950 register_list_format[ind_offset] = indices;
3951 }
3952 }
3953
gfx_v8_0_init_save_restore_list(struct amdgpu_device * adev)3954 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3955 {
3956 int i, temp, data;
3957 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3958 int indices_count = 0;
3959 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3960 int offset_count = 0;
3961
3962 int list_size;
3963 unsigned int *register_list_format =
3964 kmemdup(adev->gfx.rlc.register_list_format,
3965 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3966 if (!register_list_format)
3967 return -ENOMEM;
3968
3969 gfx_v8_0_parse_ind_reg_list(register_list_format,
3970 RLC_FormatDirectRegListLength,
3971 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3972 unique_indices,
3973 &indices_count,
3974 ARRAY_SIZE(unique_indices),
3975 indirect_start_offsets,
3976 &offset_count,
3977 ARRAY_SIZE(indirect_start_offsets));
3978
3979 /* save and restore list */
3980 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3981
3982 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3983 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3984 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3985
3986 /* indirect list */
3987 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3988 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3989 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3990
3991 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3992 list_size = list_size >> 1;
3993 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3994 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3995
3996 /* starting offsets starts */
3997 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3998 adev->gfx.rlc.starting_offsets_start);
3999 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4000 WREG32(mmRLC_GPM_SCRATCH_DATA,
4001 indirect_start_offsets[i]);
4002
4003 /* unique indices */
4004 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4005 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4006 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4007 if (unique_indices[i] != 0) {
4008 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4009 WREG32(data + i, unique_indices[i] >> 20);
4010 }
4011 }
4012 kfree(register_list_format);
4013
4014 return 0;
4015 }
4016
gfx_v8_0_enable_save_restore_machine(struct amdgpu_device * adev)4017 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4018 {
4019 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4020 }
4021
gfx_v8_0_init_power_gating(struct amdgpu_device * adev)4022 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4023 {
4024 uint32_t data;
4025
4026 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4027
4028 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4029 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4030 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4031 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4032 WREG32(mmRLC_PG_DELAY, data);
4033
4034 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4035 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4036
4037 }
4038
cz_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)4039 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4040 bool enable)
4041 {
4042 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4043 }
4044
cz_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)4045 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4046 bool enable)
4047 {
4048 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4049 }
4050
cz_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)4051 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4052 {
4053 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4054 }
4055
gfx_v8_0_init_pg(struct amdgpu_device * adev)4056 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4057 {
4058 if ((adev->asic_type == CHIP_CARRIZO) ||
4059 (adev->asic_type == CHIP_STONEY)) {
4060 gfx_v8_0_init_csb(adev);
4061 gfx_v8_0_init_save_restore_list(adev);
4062 gfx_v8_0_enable_save_restore_machine(adev);
4063 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4064 gfx_v8_0_init_power_gating(adev);
4065 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4066 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4067 (adev->asic_type == CHIP_POLARIS12) ||
4068 (adev->asic_type == CHIP_VEGAM)) {
4069 gfx_v8_0_init_csb(adev);
4070 gfx_v8_0_init_save_restore_list(adev);
4071 gfx_v8_0_enable_save_restore_machine(adev);
4072 gfx_v8_0_init_power_gating(adev);
4073 }
4074
4075 }
4076
gfx_v8_0_rlc_stop(struct amdgpu_device * adev)4077 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4078 {
4079 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4080
4081 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4082 gfx_v8_0_wait_for_rlc_serdes(adev);
4083 }
4084
gfx_v8_0_rlc_reset(struct amdgpu_device * adev)4085 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4086 {
4087 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4088 udelay(50);
4089
4090 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4091 udelay(50);
4092 }
4093
gfx_v8_0_rlc_start(struct amdgpu_device * adev)4094 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4095 {
4096 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4097
4098 /* carrizo do enable cp interrupt after cp inited */
4099 if (!(adev->flags & AMD_IS_APU))
4100 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4101
4102 udelay(50);
4103 }
4104
gfx_v8_0_rlc_resume(struct amdgpu_device * adev)4105 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4106 {
4107 if (amdgpu_sriov_vf(adev)) {
4108 gfx_v8_0_init_csb(adev);
4109 return 0;
4110 }
4111
4112 adev->gfx.rlc.funcs->stop(adev);
4113 adev->gfx.rlc.funcs->reset(adev);
4114 gfx_v8_0_init_pg(adev);
4115 adev->gfx.rlc.funcs->start(adev);
4116
4117 return 0;
4118 }
4119
gfx_v8_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)4120 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4121 {
4122 int i;
4123 u32 tmp = RREG32(mmCP_ME_CNTL);
4124
4125 if (enable) {
4126 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4127 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4128 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4129 } else {
4130 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4131 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4132 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4133 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4134 adev->gfx.gfx_ring[i].sched.ready = false;
4135 }
4136 WREG32(mmCP_ME_CNTL, tmp);
4137 udelay(50);
4138 }
4139
gfx_v8_0_get_csb_size(struct amdgpu_device * adev)4140 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4141 {
4142 u32 count = 0;
4143 const struct cs_section_def *sect = NULL;
4144 const struct cs_extent_def *ext = NULL;
4145
4146 /* begin clear state */
4147 count += 2;
4148 /* context control state */
4149 count += 3;
4150
4151 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4152 for (ext = sect->section; ext->extent != NULL; ++ext) {
4153 if (sect->id == SECT_CONTEXT)
4154 count += 2 + ext->reg_count;
4155 else
4156 return 0;
4157 }
4158 }
4159 /* pa_sc_raster_config/pa_sc_raster_config1 */
4160 count += 4;
4161 /* end clear state */
4162 count += 2;
4163 /* clear state */
4164 count += 2;
4165
4166 return count;
4167 }
4168
gfx_v8_0_cp_gfx_start(struct amdgpu_device * adev)4169 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4170 {
4171 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4172 const struct cs_section_def *sect = NULL;
4173 const struct cs_extent_def *ext = NULL;
4174 int r, i;
4175
4176 /* init the CP */
4177 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4178 WREG32(mmCP_ENDIAN_SWAP, 0);
4179 WREG32(mmCP_DEVICE_ID, 1);
4180
4181 gfx_v8_0_cp_gfx_enable(adev, true);
4182
4183 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4184 if (r) {
4185 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4186 return r;
4187 }
4188
4189 /* clear state buffer */
4190 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4191 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4192
4193 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4194 amdgpu_ring_write(ring, 0x80000000);
4195 amdgpu_ring_write(ring, 0x80000000);
4196
4197 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4198 for (ext = sect->section; ext->extent != NULL; ++ext) {
4199 if (sect->id == SECT_CONTEXT) {
4200 amdgpu_ring_write(ring,
4201 PACKET3(PACKET3_SET_CONTEXT_REG,
4202 ext->reg_count));
4203 amdgpu_ring_write(ring,
4204 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4205 for (i = 0; i < ext->reg_count; i++)
4206 amdgpu_ring_write(ring, ext->extent[i]);
4207 }
4208 }
4209 }
4210
4211 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4212 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4213 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4214 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4215
4216 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4217 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4218
4219 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4220 amdgpu_ring_write(ring, 0);
4221
4222 /* init the CE partitions */
4223 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4224 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4225 amdgpu_ring_write(ring, 0x8000);
4226 amdgpu_ring_write(ring, 0x8000);
4227
4228 amdgpu_ring_commit(ring);
4229
4230 return 0;
4231 }
gfx_v8_0_set_cpg_door_bell(struct amdgpu_device * adev,struct amdgpu_ring * ring)4232 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4233 {
4234 u32 tmp;
4235 /* no gfx doorbells on iceland */
4236 if (adev->asic_type == CHIP_TOPAZ)
4237 return;
4238
4239 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4240
4241 if (ring->use_doorbell) {
4242 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4243 DOORBELL_OFFSET, ring->doorbell_index);
4244 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4245 DOORBELL_HIT, 0);
4246 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4247 DOORBELL_EN, 1);
4248 } else {
4249 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4250 }
4251
4252 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4253
4254 if (adev->flags & AMD_IS_APU)
4255 return;
4256
4257 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4258 DOORBELL_RANGE_LOWER,
4259 adev->doorbell_index.gfx_ring0);
4260 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4261
4262 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4263 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4264 }
4265
gfx_v8_0_cp_gfx_resume(struct amdgpu_device * adev)4266 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4267 {
4268 struct amdgpu_ring *ring;
4269 u32 tmp;
4270 u32 rb_bufsz;
4271 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4272
4273 /* Set the write pointer delay */
4274 WREG32(mmCP_RB_WPTR_DELAY, 0);
4275
4276 /* set the RB to use vmid 0 */
4277 WREG32(mmCP_RB_VMID, 0);
4278
4279 /* Set ring buffer size */
4280 ring = &adev->gfx.gfx_ring[0];
4281 rb_bufsz = order_base_2(ring->ring_size / 8);
4282 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4283 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4284 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4285 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4286 #ifdef __BIG_ENDIAN
4287 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4288 #endif
4289 WREG32(mmCP_RB0_CNTL, tmp);
4290
4291 /* Initialize the ring buffer's read and write pointers */
4292 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4293 ring->wptr = 0;
4294 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4295
4296 /* set the wb address wether it's enabled or not */
4297 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4298 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4299 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4300
4301 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4302 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4303 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4304 mdelay(1);
4305 WREG32(mmCP_RB0_CNTL, tmp);
4306
4307 rb_addr = ring->gpu_addr >> 8;
4308 WREG32(mmCP_RB0_BASE, rb_addr);
4309 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4310
4311 gfx_v8_0_set_cpg_door_bell(adev, ring);
4312 /* start the ring */
4313 amdgpu_ring_clear_ring(ring);
4314 gfx_v8_0_cp_gfx_start(adev);
4315 ring->sched.ready = true;
4316
4317 return 0;
4318 }
4319
gfx_v8_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)4320 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4321 {
4322 int i;
4323
4324 if (enable) {
4325 WREG32(mmCP_MEC_CNTL, 0);
4326 } else {
4327 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4328 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4329 adev->gfx.compute_ring[i].sched.ready = false;
4330 adev->gfx.kiq.ring.sched.ready = false;
4331 }
4332 udelay(50);
4333 }
4334
4335 /* KIQ functions */
gfx_v8_0_kiq_setting(struct amdgpu_ring * ring)4336 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4337 {
4338 uint32_t tmp;
4339 struct amdgpu_device *adev = ring->adev;
4340
4341 /* tell RLC which is KIQ queue */
4342 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4343 tmp &= 0xffffff00;
4344 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4345 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4346 tmp |= 0x80;
4347 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4348 }
4349
gfx_v8_0_kiq_kcq_enable(struct amdgpu_device * adev)4350 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4351 {
4352 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4353 uint64_t queue_mask = 0;
4354 int r, i;
4355
4356 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4357 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4358 continue;
4359
4360 /* This situation may be hit in the future if a new HW
4361 * generation exposes more than 64 queues. If so, the
4362 * definition of queue_mask needs updating */
4363 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4364 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4365 break;
4366 }
4367
4368 queue_mask |= (1ull << i);
4369 }
4370
4371 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4372 if (r) {
4373 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4374 return r;
4375 }
4376 /* set resources */
4377 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4378 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4379 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4380 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4381 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4382 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4383 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4384 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4385 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4386 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4387 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4388 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4389
4390 /* map queues */
4391 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4392 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4393 amdgpu_ring_write(kiq_ring,
4394 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4395 amdgpu_ring_write(kiq_ring,
4396 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4397 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4398 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4399 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4400 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4401 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4402 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4403 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4404 }
4405
4406 amdgpu_ring_commit(kiq_ring);
4407
4408 return 0;
4409 }
4410
gfx_v8_0_deactivate_hqd(struct amdgpu_device * adev,u32 req)4411 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4412 {
4413 int i, r = 0;
4414
4415 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4416 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4417 for (i = 0; i < adev->usec_timeout; i++) {
4418 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4419 break;
4420 udelay(1);
4421 }
4422 if (i == adev->usec_timeout)
4423 r = -ETIMEDOUT;
4424 }
4425 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4426 WREG32(mmCP_HQD_PQ_RPTR, 0);
4427 WREG32(mmCP_HQD_PQ_WPTR, 0);
4428
4429 return r;
4430 }
4431
gfx_v8_0_mqd_init(struct amdgpu_ring * ring)4432 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4433 {
4434 struct amdgpu_device *adev = ring->adev;
4435 struct vi_mqd *mqd = ring->mqd_ptr;
4436 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4437 uint32_t tmp;
4438
4439 mqd->header = 0xC0310800;
4440 mqd->compute_pipelinestat_enable = 0x00000001;
4441 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4442 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4443 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4444 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4445 mqd->compute_misc_reserved = 0x00000003;
4446 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4447 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4448 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4449 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4450 eop_base_addr = ring->eop_gpu_addr >> 8;
4451 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4452 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4453
4454 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4455 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4456 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4457 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4458
4459 mqd->cp_hqd_eop_control = tmp;
4460
4461 /* enable doorbell? */
4462 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4463 CP_HQD_PQ_DOORBELL_CONTROL,
4464 DOORBELL_EN,
4465 ring->use_doorbell ? 1 : 0);
4466
4467 mqd->cp_hqd_pq_doorbell_control = tmp;
4468
4469 /* set the pointer to the MQD */
4470 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4471 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4472
4473 /* set MQD vmid to 0 */
4474 tmp = RREG32(mmCP_MQD_CONTROL);
4475 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4476 mqd->cp_mqd_control = tmp;
4477
4478 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4479 hqd_gpu_addr = ring->gpu_addr >> 8;
4480 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4481 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4482
4483 /* set up the HQD, this is similar to CP_RB0_CNTL */
4484 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4486 (order_base_2(ring->ring_size / 4) - 1));
4487 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4488 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4489 #ifdef __BIG_ENDIAN
4490 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4491 #endif
4492 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4494 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4495 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4496 mqd->cp_hqd_pq_control = tmp;
4497
4498 /* set the wb address whether it's enabled or not */
4499 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4500 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4501 mqd->cp_hqd_pq_rptr_report_addr_hi =
4502 upper_32_bits(wb_gpu_addr) & 0xffff;
4503
4504 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4505 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4506 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4507 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4508
4509 tmp = 0;
4510 /* enable the doorbell if requested */
4511 if (ring->use_doorbell) {
4512 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4513 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4514 DOORBELL_OFFSET, ring->doorbell_index);
4515
4516 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4517 DOORBELL_EN, 1);
4518 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4519 DOORBELL_SOURCE, 0);
4520 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4521 DOORBELL_HIT, 0);
4522 }
4523
4524 mqd->cp_hqd_pq_doorbell_control = tmp;
4525
4526 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4527 ring->wptr = 0;
4528 mqd->cp_hqd_pq_wptr = ring->wptr;
4529 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4530
4531 /* set the vmid for the queue */
4532 mqd->cp_hqd_vmid = 0;
4533
4534 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4535 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4536 mqd->cp_hqd_persistent_state = tmp;
4537
4538 /* set MTYPE */
4539 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4540 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4541 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4542 mqd->cp_hqd_ib_control = tmp;
4543
4544 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4545 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4546 mqd->cp_hqd_iq_timer = tmp;
4547
4548 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4549 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4550 mqd->cp_hqd_ctx_save_control = tmp;
4551
4552 /* defaults */
4553 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4554 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4555 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4556 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4557 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4558 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4559 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4560 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4561 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4562 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4563 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4564 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4565 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4566 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4567 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4568
4569 /* map_queues packet doesn't need activate the queue,
4570 * so only kiq need set this field.
4571 */
4572 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4573 mqd->cp_hqd_active = 1;
4574
4575 return 0;
4576 }
4577
gfx_v8_0_mqd_commit(struct amdgpu_device * adev,struct vi_mqd * mqd)4578 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4579 struct vi_mqd *mqd)
4580 {
4581 uint32_t mqd_reg;
4582 uint32_t *mqd_data;
4583
4584 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4585 mqd_data = &mqd->cp_mqd_base_addr_lo;
4586
4587 /* disable wptr polling */
4588 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4589
4590 /* program all HQD registers */
4591 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4592 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4593
4594 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4595 * This is safe since EOP RPTR==WPTR for any inactive HQD
4596 * on ASICs that do not support context-save.
4597 * EOP writes/reads can start anywhere in the ring.
4598 */
4599 if (adev->asic_type != CHIP_TONGA) {
4600 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4601 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4602 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4603 }
4604
4605 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4606 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4607
4608 /* activate the HQD */
4609 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4610 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4611
4612 return 0;
4613 }
4614
gfx_v8_0_kiq_init_queue(struct amdgpu_ring * ring)4615 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4616 {
4617 struct amdgpu_device *adev = ring->adev;
4618 struct vi_mqd *mqd = ring->mqd_ptr;
4619 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4620
4621 gfx_v8_0_kiq_setting(ring);
4622
4623 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4624 /* reset MQD to a clean status */
4625 if (adev->gfx.mec.mqd_backup[mqd_idx])
4626 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4627
4628 /* reset ring buffer */
4629 ring->wptr = 0;
4630 amdgpu_ring_clear_ring(ring);
4631 mutex_lock(&adev->srbm_mutex);
4632 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4633 gfx_v8_0_mqd_commit(adev, mqd);
4634 vi_srbm_select(adev, 0, 0, 0, 0);
4635 mutex_unlock(&adev->srbm_mutex);
4636 } else {
4637 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4638 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4639 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4640 mutex_lock(&adev->srbm_mutex);
4641 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4642 gfx_v8_0_mqd_init(ring);
4643 gfx_v8_0_mqd_commit(adev, mqd);
4644 vi_srbm_select(adev, 0, 0, 0, 0);
4645 mutex_unlock(&adev->srbm_mutex);
4646
4647 if (adev->gfx.mec.mqd_backup[mqd_idx])
4648 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4649 }
4650
4651 return 0;
4652 }
4653
gfx_v8_0_kcq_init_queue(struct amdgpu_ring * ring)4654 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4655 {
4656 struct amdgpu_device *adev = ring->adev;
4657 struct vi_mqd *mqd = ring->mqd_ptr;
4658 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4659
4660 if (!adev->in_gpu_reset && !adev->in_suspend) {
4661 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4662 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4663 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4664 mutex_lock(&adev->srbm_mutex);
4665 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4666 gfx_v8_0_mqd_init(ring);
4667 vi_srbm_select(adev, 0, 0, 0, 0);
4668 mutex_unlock(&adev->srbm_mutex);
4669
4670 if (adev->gfx.mec.mqd_backup[mqd_idx])
4671 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4672 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4673 /* reset MQD to a clean status */
4674 if (adev->gfx.mec.mqd_backup[mqd_idx])
4675 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4676 /* reset ring buffer */
4677 ring->wptr = 0;
4678 amdgpu_ring_clear_ring(ring);
4679 } else {
4680 amdgpu_ring_clear_ring(ring);
4681 }
4682 return 0;
4683 }
4684
gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device * adev)4685 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4686 {
4687 if (adev->asic_type > CHIP_TONGA) {
4688 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4689 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4690 }
4691 /* enable doorbells */
4692 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4693 }
4694
gfx_v8_0_kiq_resume(struct amdgpu_device * adev)4695 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4696 {
4697 struct amdgpu_ring *ring;
4698 int r;
4699
4700 ring = &adev->gfx.kiq.ring;
4701
4702 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4703 if (unlikely(r != 0))
4704 return r;
4705
4706 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4707 if (unlikely(r != 0))
4708 return r;
4709
4710 gfx_v8_0_kiq_init_queue(ring);
4711 amdgpu_bo_kunmap(ring->mqd_obj);
4712 ring->mqd_ptr = NULL;
4713 amdgpu_bo_unreserve(ring->mqd_obj);
4714 ring->sched.ready = true;
4715 return 0;
4716 }
4717
gfx_v8_0_kcq_resume(struct amdgpu_device * adev)4718 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4719 {
4720 struct amdgpu_ring *ring = NULL;
4721 int r = 0, i;
4722
4723 gfx_v8_0_cp_compute_enable(adev, true);
4724
4725 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4726 ring = &adev->gfx.compute_ring[i];
4727
4728 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4729 if (unlikely(r != 0))
4730 goto done;
4731 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4732 if (!r) {
4733 r = gfx_v8_0_kcq_init_queue(ring);
4734 amdgpu_bo_kunmap(ring->mqd_obj);
4735 ring->mqd_ptr = NULL;
4736 }
4737 amdgpu_bo_unreserve(ring->mqd_obj);
4738 if (r)
4739 goto done;
4740 }
4741
4742 gfx_v8_0_set_mec_doorbell_range(adev);
4743
4744 r = gfx_v8_0_kiq_kcq_enable(adev);
4745 if (r)
4746 goto done;
4747
4748 done:
4749 return r;
4750 }
4751
gfx_v8_0_cp_test_all_rings(struct amdgpu_device * adev)4752 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4753 {
4754 int r, i;
4755 struct amdgpu_ring *ring;
4756
4757 /* collect all the ring_tests here, gfx, kiq, compute */
4758 ring = &adev->gfx.gfx_ring[0];
4759 r = amdgpu_ring_test_helper(ring);
4760 if (r)
4761 return r;
4762
4763 ring = &adev->gfx.kiq.ring;
4764 r = amdgpu_ring_test_helper(ring);
4765 if (r)
4766 return r;
4767
4768 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4769 ring = &adev->gfx.compute_ring[i];
4770 amdgpu_ring_test_helper(ring);
4771 }
4772
4773 return 0;
4774 }
4775
gfx_v8_0_cp_resume(struct amdgpu_device * adev)4776 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4777 {
4778 int r;
4779
4780 if (!(adev->flags & AMD_IS_APU))
4781 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4782
4783 r = gfx_v8_0_kiq_resume(adev);
4784 if (r)
4785 return r;
4786
4787 r = gfx_v8_0_cp_gfx_resume(adev);
4788 if (r)
4789 return r;
4790
4791 r = gfx_v8_0_kcq_resume(adev);
4792 if (r)
4793 return r;
4794
4795 r = gfx_v8_0_cp_test_all_rings(adev);
4796 if (r)
4797 return r;
4798
4799 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4800
4801 return 0;
4802 }
4803
gfx_v8_0_cp_enable(struct amdgpu_device * adev,bool enable)4804 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4805 {
4806 gfx_v8_0_cp_gfx_enable(adev, enable);
4807 gfx_v8_0_cp_compute_enable(adev, enable);
4808 }
4809
gfx_v8_0_hw_init(void * handle)4810 static int gfx_v8_0_hw_init(void *handle)
4811 {
4812 int r;
4813 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4814
4815 gfx_v8_0_init_golden_registers(adev);
4816 gfx_v8_0_constants_init(adev);
4817
4818 r = adev->gfx.rlc.funcs->resume(adev);
4819 if (r)
4820 return r;
4821
4822 r = gfx_v8_0_cp_resume(adev);
4823
4824 return r;
4825 }
4826
gfx_v8_0_kcq_disable(struct amdgpu_device * adev)4827 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4828 {
4829 int r, i;
4830 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4831
4832 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4833 if (r)
4834 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4835
4836 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4837 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4838
4839 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4840 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4841 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4842 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4843 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4844 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4845 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4846 amdgpu_ring_write(kiq_ring, 0);
4847 amdgpu_ring_write(kiq_ring, 0);
4848 amdgpu_ring_write(kiq_ring, 0);
4849 }
4850 r = amdgpu_ring_test_helper(kiq_ring);
4851 if (r)
4852 DRM_ERROR("KCQ disable failed\n");
4853
4854 return r;
4855 }
4856
gfx_v8_0_is_idle(void * handle)4857 static bool gfx_v8_0_is_idle(void *handle)
4858 {
4859 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4860
4861 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4862 || RREG32(mmGRBM_STATUS2) != 0x8)
4863 return false;
4864 else
4865 return true;
4866 }
4867
gfx_v8_0_rlc_is_idle(void * handle)4868 static bool gfx_v8_0_rlc_is_idle(void *handle)
4869 {
4870 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4871
4872 if (RREG32(mmGRBM_STATUS2) != 0x8)
4873 return false;
4874 else
4875 return true;
4876 }
4877
gfx_v8_0_wait_for_rlc_idle(void * handle)4878 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4879 {
4880 unsigned int i;
4881 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4882
4883 for (i = 0; i < adev->usec_timeout; i++) {
4884 if (gfx_v8_0_rlc_is_idle(handle))
4885 return 0;
4886
4887 udelay(1);
4888 }
4889 return -ETIMEDOUT;
4890 }
4891
gfx_v8_0_wait_for_idle(void * handle)4892 static int gfx_v8_0_wait_for_idle(void *handle)
4893 {
4894 unsigned int i;
4895 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4896
4897 for (i = 0; i < adev->usec_timeout; i++) {
4898 if (gfx_v8_0_is_idle(handle))
4899 return 0;
4900
4901 udelay(1);
4902 }
4903 return -ETIMEDOUT;
4904 }
4905
gfx_v8_0_hw_fini(void * handle)4906 static int gfx_v8_0_hw_fini(void *handle)
4907 {
4908 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4909
4910 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4911 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4912
4913 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4914
4915 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4916
4917 /* disable KCQ to avoid CPC touch memory not valid anymore */
4918 gfx_v8_0_kcq_disable(adev);
4919
4920 if (amdgpu_sriov_vf(adev)) {
4921 pr_debug("For SRIOV client, shouldn't do anything.\n");
4922 return 0;
4923 }
4924 amdgpu_gfx_rlc_enter_safe_mode(adev);
4925 if (!gfx_v8_0_wait_for_idle(adev))
4926 gfx_v8_0_cp_enable(adev, false);
4927 else
4928 pr_err("cp is busy, skip halt cp\n");
4929 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4930 adev->gfx.rlc.funcs->stop(adev);
4931 else
4932 pr_err("rlc is busy, skip halt rlc\n");
4933 amdgpu_gfx_rlc_exit_safe_mode(adev);
4934
4935 return 0;
4936 }
4937
gfx_v8_0_suspend(void * handle)4938 static int gfx_v8_0_suspend(void *handle)
4939 {
4940 return gfx_v8_0_hw_fini(handle);
4941 }
4942
gfx_v8_0_resume(void * handle)4943 static int gfx_v8_0_resume(void *handle)
4944 {
4945 return gfx_v8_0_hw_init(handle);
4946 }
4947
gfx_v8_0_check_soft_reset(void * handle)4948 static bool gfx_v8_0_check_soft_reset(void *handle)
4949 {
4950 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4951 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4952 u32 tmp;
4953
4954 /* GRBM_STATUS */
4955 tmp = RREG32(mmGRBM_STATUS);
4956 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4957 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4958 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4959 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4960 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4961 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4962 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4963 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4964 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4965 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4966 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4967 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4968 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4969 }
4970
4971 /* GRBM_STATUS2 */
4972 tmp = RREG32(mmGRBM_STATUS2);
4973 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4974 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4975 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4976
4977 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4978 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4979 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4980 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4981 SOFT_RESET_CPF, 1);
4982 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4983 SOFT_RESET_CPC, 1);
4984 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4985 SOFT_RESET_CPG, 1);
4986 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4987 SOFT_RESET_GRBM, 1);
4988 }
4989
4990 /* SRBM_STATUS */
4991 tmp = RREG32(mmSRBM_STATUS);
4992 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4993 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4994 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4995 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4996 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4997 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4998
4999 if (grbm_soft_reset || srbm_soft_reset) {
5000 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5001 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5002 return true;
5003 } else {
5004 adev->gfx.grbm_soft_reset = 0;
5005 adev->gfx.srbm_soft_reset = 0;
5006 return false;
5007 }
5008 }
5009
gfx_v8_0_pre_soft_reset(void * handle)5010 static int gfx_v8_0_pre_soft_reset(void *handle)
5011 {
5012 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5013 u32 grbm_soft_reset = 0;
5014
5015 if ((!adev->gfx.grbm_soft_reset) &&
5016 (!adev->gfx.srbm_soft_reset))
5017 return 0;
5018
5019 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5020
5021 /* stop the rlc */
5022 adev->gfx.rlc.funcs->stop(adev);
5023
5024 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5025 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5026 /* Disable GFX parsing/prefetching */
5027 gfx_v8_0_cp_gfx_enable(adev, false);
5028
5029 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5030 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5031 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5032 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5033 int i;
5034
5035 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5036 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5037
5038 mutex_lock(&adev->srbm_mutex);
5039 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5040 gfx_v8_0_deactivate_hqd(adev, 2);
5041 vi_srbm_select(adev, 0, 0, 0, 0);
5042 mutex_unlock(&adev->srbm_mutex);
5043 }
5044 /* Disable MEC parsing/prefetching */
5045 gfx_v8_0_cp_compute_enable(adev, false);
5046 }
5047
5048 return 0;
5049 }
5050
gfx_v8_0_soft_reset(void * handle)5051 static int gfx_v8_0_soft_reset(void *handle)
5052 {
5053 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5054 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5055 u32 tmp;
5056
5057 if ((!adev->gfx.grbm_soft_reset) &&
5058 (!adev->gfx.srbm_soft_reset))
5059 return 0;
5060
5061 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5062 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5063
5064 if (grbm_soft_reset || srbm_soft_reset) {
5065 tmp = RREG32(mmGMCON_DEBUG);
5066 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5067 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5068 WREG32(mmGMCON_DEBUG, tmp);
5069 udelay(50);
5070 }
5071
5072 if (grbm_soft_reset) {
5073 tmp = RREG32(mmGRBM_SOFT_RESET);
5074 tmp |= grbm_soft_reset;
5075 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5076 WREG32(mmGRBM_SOFT_RESET, tmp);
5077 tmp = RREG32(mmGRBM_SOFT_RESET);
5078
5079 udelay(50);
5080
5081 tmp &= ~grbm_soft_reset;
5082 WREG32(mmGRBM_SOFT_RESET, tmp);
5083 tmp = RREG32(mmGRBM_SOFT_RESET);
5084 }
5085
5086 if (srbm_soft_reset) {
5087 tmp = RREG32(mmSRBM_SOFT_RESET);
5088 tmp |= srbm_soft_reset;
5089 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5090 WREG32(mmSRBM_SOFT_RESET, tmp);
5091 tmp = RREG32(mmSRBM_SOFT_RESET);
5092
5093 udelay(50);
5094
5095 tmp &= ~srbm_soft_reset;
5096 WREG32(mmSRBM_SOFT_RESET, tmp);
5097 tmp = RREG32(mmSRBM_SOFT_RESET);
5098 }
5099
5100 if (grbm_soft_reset || srbm_soft_reset) {
5101 tmp = RREG32(mmGMCON_DEBUG);
5102 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5103 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5104 WREG32(mmGMCON_DEBUG, tmp);
5105 }
5106
5107 /* Wait a little for things to settle down */
5108 udelay(50);
5109
5110 return 0;
5111 }
5112
gfx_v8_0_post_soft_reset(void * handle)5113 static int gfx_v8_0_post_soft_reset(void *handle)
5114 {
5115 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5116 u32 grbm_soft_reset = 0;
5117
5118 if ((!adev->gfx.grbm_soft_reset) &&
5119 (!adev->gfx.srbm_soft_reset))
5120 return 0;
5121
5122 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5123
5124 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5125 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5126 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5127 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5128 int i;
5129
5130 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5131 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5132
5133 mutex_lock(&adev->srbm_mutex);
5134 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5135 gfx_v8_0_deactivate_hqd(adev, 2);
5136 vi_srbm_select(adev, 0, 0, 0, 0);
5137 mutex_unlock(&adev->srbm_mutex);
5138 }
5139 gfx_v8_0_kiq_resume(adev);
5140 gfx_v8_0_kcq_resume(adev);
5141 }
5142
5143 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5144 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5145 gfx_v8_0_cp_gfx_resume(adev);
5146
5147 gfx_v8_0_cp_test_all_rings(adev);
5148
5149 adev->gfx.rlc.funcs->start(adev);
5150
5151 return 0;
5152 }
5153
5154 /**
5155 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5156 *
5157 * @adev: amdgpu_device pointer
5158 *
5159 * Fetches a GPU clock counter snapshot.
5160 * Returns the 64 bit clock counter snapshot.
5161 */
gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device * adev)5162 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5163 {
5164 uint64_t clock;
5165
5166 mutex_lock(&adev->gfx.gpu_clock_mutex);
5167 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5168 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5169 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5170 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5171 return clock;
5172 }
5173
gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)5174 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5175 uint32_t vmid,
5176 uint32_t gds_base, uint32_t gds_size,
5177 uint32_t gws_base, uint32_t gws_size,
5178 uint32_t oa_base, uint32_t oa_size)
5179 {
5180 /* GDS Base */
5181 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5182 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5183 WRITE_DATA_DST_SEL(0)));
5184 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5185 amdgpu_ring_write(ring, 0);
5186 amdgpu_ring_write(ring, gds_base);
5187
5188 /* GDS Size */
5189 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191 WRITE_DATA_DST_SEL(0)));
5192 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5193 amdgpu_ring_write(ring, 0);
5194 amdgpu_ring_write(ring, gds_size);
5195
5196 /* GWS */
5197 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199 WRITE_DATA_DST_SEL(0)));
5200 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5201 amdgpu_ring_write(ring, 0);
5202 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5203
5204 /* OA */
5205 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5206 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5207 WRITE_DATA_DST_SEL(0)));
5208 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5209 amdgpu_ring_write(ring, 0);
5210 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5211 }
5212
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)5213 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5214 {
5215 WREG32(mmSQ_IND_INDEX,
5216 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5217 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5218 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5219 (SQ_IND_INDEX__FORCE_READ_MASK));
5220 return RREG32(mmSQ_IND_DATA);
5221 }
5222
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)5223 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5224 uint32_t wave, uint32_t thread,
5225 uint32_t regno, uint32_t num, uint32_t *out)
5226 {
5227 WREG32(mmSQ_IND_INDEX,
5228 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5229 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5230 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5231 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5232 (SQ_IND_INDEX__FORCE_READ_MASK) |
5233 (SQ_IND_INDEX__AUTO_INCR_MASK));
5234 while (num--)
5235 *(out++) = RREG32(mmSQ_IND_DATA);
5236 }
5237
gfx_v8_0_read_wave_data(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)5238 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5239 {
5240 /* type 0 wave data */
5241 dst[(*no_fields)++] = 0;
5242 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5243 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5244 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5245 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5246 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5247 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5248 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5249 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5250 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5251 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5252 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5253 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5254 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5255 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5256 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5257 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5258 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5259 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5260 }
5261
gfx_v8_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)5262 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5263 uint32_t wave, uint32_t start,
5264 uint32_t size, uint32_t *dst)
5265 {
5266 wave_read_regs(
5267 adev, simd, wave, 0,
5268 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5269 }
5270
5271
5272 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5273 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5274 .select_se_sh = &gfx_v8_0_select_se_sh,
5275 .read_wave_data = &gfx_v8_0_read_wave_data,
5276 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5277 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5278 };
5279
gfx_v8_0_early_init(void * handle)5280 static int gfx_v8_0_early_init(void *handle)
5281 {
5282 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5283
5284 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5285 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5286 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5287 gfx_v8_0_set_ring_funcs(adev);
5288 gfx_v8_0_set_irq_funcs(adev);
5289 gfx_v8_0_set_gds_init(adev);
5290 gfx_v8_0_set_rlc_funcs(adev);
5291
5292 return 0;
5293 }
5294
gfx_v8_0_late_init(void * handle)5295 static int gfx_v8_0_late_init(void *handle)
5296 {
5297 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5298 int r;
5299
5300 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5301 if (r)
5302 return r;
5303
5304 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5305 if (r)
5306 return r;
5307
5308 /* requires IBs so do in late init after IB pool is initialized */
5309 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5310 if (r)
5311 return r;
5312
5313 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5314 if (r) {
5315 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5316 return r;
5317 }
5318
5319 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5320 if (r) {
5321 DRM_ERROR(
5322 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5323 r);
5324 return r;
5325 }
5326
5327 return 0;
5328 }
5329
gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)5330 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5331 bool enable)
5332 {
5333 if (((adev->asic_type == CHIP_POLARIS11) ||
5334 (adev->asic_type == CHIP_POLARIS12) ||
5335 (adev->asic_type == CHIP_VEGAM)) &&
5336 adev->powerplay.pp_funcs->set_powergating_by_smu)
5337 /* Send msg to SMU via Powerplay */
5338 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5339
5340 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5341 }
5342
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)5343 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5344 bool enable)
5345 {
5346 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5347 }
5348
polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device * adev,bool enable)5349 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5350 bool enable)
5351 {
5352 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5353 }
5354
cz_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5355 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5356 bool enable)
5357 {
5358 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5359 }
5360
cz_enable_gfx_pipeline_power_gating(struct amdgpu_device * adev,bool enable)5361 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5362 bool enable)
5363 {
5364 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5365
5366 /* Read any GFX register to wake up GFX. */
5367 if (!enable)
5368 RREG32(mmDB_RENDER_CONTROL);
5369 }
5370
cz_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5371 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5372 bool enable)
5373 {
5374 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5375 cz_enable_gfx_cg_power_gating(adev, true);
5376 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5377 cz_enable_gfx_pipeline_power_gating(adev, true);
5378 } else {
5379 cz_enable_gfx_cg_power_gating(adev, false);
5380 cz_enable_gfx_pipeline_power_gating(adev, false);
5381 }
5382 }
5383
gfx_v8_0_set_powergating_state(void * handle,enum amd_powergating_state state)5384 static int gfx_v8_0_set_powergating_state(void *handle,
5385 enum amd_powergating_state state)
5386 {
5387 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5388 bool enable = (state == AMD_PG_STATE_GATE);
5389
5390 if (amdgpu_sriov_vf(adev))
5391 return 0;
5392
5393 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5394 AMD_PG_SUPPORT_RLC_SMU_HS |
5395 AMD_PG_SUPPORT_CP |
5396 AMD_PG_SUPPORT_GFX_DMG))
5397 amdgpu_gfx_rlc_enter_safe_mode(adev);
5398 switch (adev->asic_type) {
5399 case CHIP_CARRIZO:
5400 case CHIP_STONEY:
5401
5402 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5403 cz_enable_sck_slow_down_on_power_up(adev, true);
5404 cz_enable_sck_slow_down_on_power_down(adev, true);
5405 } else {
5406 cz_enable_sck_slow_down_on_power_up(adev, false);
5407 cz_enable_sck_slow_down_on_power_down(adev, false);
5408 }
5409 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5410 cz_enable_cp_power_gating(adev, true);
5411 else
5412 cz_enable_cp_power_gating(adev, false);
5413
5414 cz_update_gfx_cg_power_gating(adev, enable);
5415
5416 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5417 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5418 else
5419 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5420
5421 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5422 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5423 else
5424 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5425 break;
5426 case CHIP_POLARIS11:
5427 case CHIP_POLARIS12:
5428 case CHIP_VEGAM:
5429 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5430 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5431 else
5432 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5433
5434 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5435 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5436 else
5437 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5438
5439 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5440 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5441 else
5442 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5443 break;
5444 default:
5445 break;
5446 }
5447 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5448 AMD_PG_SUPPORT_RLC_SMU_HS |
5449 AMD_PG_SUPPORT_CP |
5450 AMD_PG_SUPPORT_GFX_DMG))
5451 amdgpu_gfx_rlc_exit_safe_mode(adev);
5452 return 0;
5453 }
5454
gfx_v8_0_get_clockgating_state(void * handle,u32 * flags)5455 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5456 {
5457 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5458 int data;
5459
5460 if (amdgpu_sriov_vf(adev))
5461 *flags = 0;
5462
5463 /* AMD_CG_SUPPORT_GFX_MGCG */
5464 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5465 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5466 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5467
5468 /* AMD_CG_SUPPORT_GFX_CGLG */
5469 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5470 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5471 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5472
5473 /* AMD_CG_SUPPORT_GFX_CGLS */
5474 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5475 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5476
5477 /* AMD_CG_SUPPORT_GFX_CGTS */
5478 data = RREG32(mmCGTS_SM_CTRL_REG);
5479 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5480 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5481
5482 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5483 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5484 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5485
5486 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5487 data = RREG32(mmRLC_MEM_SLP_CNTL);
5488 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5489 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5490
5491 /* AMD_CG_SUPPORT_GFX_CP_LS */
5492 data = RREG32(mmCP_MEM_SLP_CNTL);
5493 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5494 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5495 }
5496
gfx_v8_0_send_serdes_cmd(struct amdgpu_device * adev,uint32_t reg_addr,uint32_t cmd)5497 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5498 uint32_t reg_addr, uint32_t cmd)
5499 {
5500 uint32_t data;
5501
5502 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5503
5504 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5505 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5506
5507 data = RREG32(mmRLC_SERDES_WR_CTRL);
5508 if (adev->asic_type == CHIP_STONEY)
5509 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5510 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5511 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5512 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5513 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5514 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5515 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5516 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5517 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5518 else
5519 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5520 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5521 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5522 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5523 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5524 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5525 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5526 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5527 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5528 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5529 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5530 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5531 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5532 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5533 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5534
5535 WREG32(mmRLC_SERDES_WR_CTRL, data);
5536 }
5537
5538 #define MSG_ENTER_RLC_SAFE_MODE 1
5539 #define MSG_EXIT_RLC_SAFE_MODE 0
5540 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5541 #define RLC_GPR_REG2__REQ__SHIFT 0
5542 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5543 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5544
gfx_v8_0_is_rlc_enabled(struct amdgpu_device * adev)5545 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5546 {
5547 uint32_t rlc_setting;
5548
5549 rlc_setting = RREG32(mmRLC_CNTL);
5550 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5551 return false;
5552
5553 return true;
5554 }
5555
gfx_v8_0_set_safe_mode(struct amdgpu_device * adev)5556 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5557 {
5558 uint32_t data;
5559 unsigned i;
5560 data = RREG32(mmRLC_CNTL);
5561 data |= RLC_SAFE_MODE__CMD_MASK;
5562 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5563 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5564 WREG32(mmRLC_SAFE_MODE, data);
5565
5566 /* wait for RLC_SAFE_MODE */
5567 for (i = 0; i < adev->usec_timeout; i++) {
5568 if ((RREG32(mmRLC_GPM_STAT) &
5569 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5570 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5571 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5572 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5573 break;
5574 udelay(1);
5575 }
5576 for (i = 0; i < adev->usec_timeout; i++) {
5577 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5578 break;
5579 udelay(1);
5580 }
5581 }
5582
gfx_v8_0_unset_safe_mode(struct amdgpu_device * adev)5583 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5584 {
5585 uint32_t data;
5586 unsigned i;
5587
5588 data = RREG32(mmRLC_CNTL);
5589 data |= RLC_SAFE_MODE__CMD_MASK;
5590 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5591 WREG32(mmRLC_SAFE_MODE, data);
5592
5593 for (i = 0; i < adev->usec_timeout; i++) {
5594 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5595 break;
5596 udelay(1);
5597 }
5598 }
5599
5600 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5601 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5602 .set_safe_mode = gfx_v8_0_set_safe_mode,
5603 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5604 .init = gfx_v8_0_rlc_init,
5605 .get_csb_size = gfx_v8_0_get_csb_size,
5606 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5607 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5608 .resume = gfx_v8_0_rlc_resume,
5609 .stop = gfx_v8_0_rlc_stop,
5610 .reset = gfx_v8_0_rlc_reset,
5611 .start = gfx_v8_0_rlc_start
5612 };
5613
gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)5614 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5615 bool enable)
5616 {
5617 uint32_t temp, data;
5618
5619 amdgpu_gfx_rlc_enter_safe_mode(adev);
5620
5621 /* It is disabled by HW by default */
5622 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5623 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5624 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5625 /* 1 - RLC memory Light sleep */
5626 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5627
5628 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5629 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5630 }
5631
5632 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5633 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5634 if (adev->flags & AMD_IS_APU)
5635 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5636 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5637 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5638 else
5639 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5640 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5641 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5642 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5643
5644 if (temp != data)
5645 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5646
5647 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5648 gfx_v8_0_wait_for_rlc_serdes(adev);
5649
5650 /* 5 - clear mgcg override */
5651 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5652
5653 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5654 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5655 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5656 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5657 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5658 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5659 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5660 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5661 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5662 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5663 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5664 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5665 if (temp != data)
5666 WREG32(mmCGTS_SM_CTRL_REG, data);
5667 }
5668 udelay(50);
5669
5670 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5671 gfx_v8_0_wait_for_rlc_serdes(adev);
5672 } else {
5673 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5674 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5675 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5676 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5677 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5678 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5679 if (temp != data)
5680 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5681
5682 /* 2 - disable MGLS in RLC */
5683 data = RREG32(mmRLC_MEM_SLP_CNTL);
5684 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5685 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5686 WREG32(mmRLC_MEM_SLP_CNTL, data);
5687 }
5688
5689 /* 3 - disable MGLS in CP */
5690 data = RREG32(mmCP_MEM_SLP_CNTL);
5691 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5692 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5693 WREG32(mmCP_MEM_SLP_CNTL, data);
5694 }
5695
5696 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5697 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5698 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5699 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5700 if (temp != data)
5701 WREG32(mmCGTS_SM_CTRL_REG, data);
5702
5703 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704 gfx_v8_0_wait_for_rlc_serdes(adev);
5705
5706 /* 6 - set mgcg override */
5707 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5708
5709 udelay(50);
5710
5711 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5712 gfx_v8_0_wait_for_rlc_serdes(adev);
5713 }
5714
5715 amdgpu_gfx_rlc_exit_safe_mode(adev);
5716 }
5717
gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5718 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5719 bool enable)
5720 {
5721 uint32_t temp, temp1, data, data1;
5722
5723 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5724
5725 amdgpu_gfx_rlc_enter_safe_mode(adev);
5726
5727 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5728 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5729 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5730 if (temp1 != data1)
5731 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5732
5733 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5734 gfx_v8_0_wait_for_rlc_serdes(adev);
5735
5736 /* 2 - clear cgcg override */
5737 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5738
5739 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5740 gfx_v8_0_wait_for_rlc_serdes(adev);
5741
5742 /* 3 - write cmd to set CGLS */
5743 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5744
5745 /* 4 - enable cgcg */
5746 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5747
5748 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5749 /* enable cgls*/
5750 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5751
5752 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5753 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5754
5755 if (temp1 != data1)
5756 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5757 } else {
5758 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5759 }
5760
5761 if (temp != data)
5762 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5763
5764 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5765 * Cmp_busy/GFX_Idle interrupts
5766 */
5767 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5768 } else {
5769 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5770 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5771
5772 /* TEST CGCG */
5773 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5774 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5775 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5776 if (temp1 != data1)
5777 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5778
5779 /* read gfx register to wake up cgcg */
5780 RREG32(mmCB_CGTT_SCLK_CTRL);
5781 RREG32(mmCB_CGTT_SCLK_CTRL);
5782 RREG32(mmCB_CGTT_SCLK_CTRL);
5783 RREG32(mmCB_CGTT_SCLK_CTRL);
5784
5785 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5786 gfx_v8_0_wait_for_rlc_serdes(adev);
5787
5788 /* write cmd to Set CGCG Overrride */
5789 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5790
5791 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5792 gfx_v8_0_wait_for_rlc_serdes(adev);
5793
5794 /* write cmd to Clear CGLS */
5795 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5796
5797 /* disable cgcg, cgls should be disabled too. */
5798 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5799 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5800 if (temp != data)
5801 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5802 /* enable interrupts again for PG */
5803 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5804 }
5805
5806 gfx_v8_0_wait_for_rlc_serdes(adev);
5807
5808 amdgpu_gfx_rlc_exit_safe_mode(adev);
5809 }
gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5810 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5811 bool enable)
5812 {
5813 if (enable) {
5814 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5815 * === MGCG + MGLS + TS(CG/LS) ===
5816 */
5817 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5818 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5819 } else {
5820 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5821 * === CGCG + CGLS ===
5822 */
5823 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5824 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5825 }
5826 return 0;
5827 }
5828
gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5829 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5830 enum amd_clockgating_state state)
5831 {
5832 uint32_t msg_id, pp_state = 0;
5833 uint32_t pp_support_state = 0;
5834
5835 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5836 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5837 pp_support_state = PP_STATE_SUPPORT_LS;
5838 pp_state = PP_STATE_LS;
5839 }
5840 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5841 pp_support_state |= PP_STATE_SUPPORT_CG;
5842 pp_state |= PP_STATE_CG;
5843 }
5844 if (state == AMD_CG_STATE_UNGATE)
5845 pp_state = 0;
5846
5847 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5848 PP_BLOCK_GFX_CG,
5849 pp_support_state,
5850 pp_state);
5851 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5852 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5853 }
5854
5855 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5856 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5857 pp_support_state = PP_STATE_SUPPORT_LS;
5858 pp_state = PP_STATE_LS;
5859 }
5860
5861 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5862 pp_support_state |= PP_STATE_SUPPORT_CG;
5863 pp_state |= PP_STATE_CG;
5864 }
5865
5866 if (state == AMD_CG_STATE_UNGATE)
5867 pp_state = 0;
5868
5869 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5870 PP_BLOCK_GFX_MG,
5871 pp_support_state,
5872 pp_state);
5873 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5874 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5875 }
5876
5877 return 0;
5878 }
5879
gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5880 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5881 enum amd_clockgating_state state)
5882 {
5883
5884 uint32_t msg_id, pp_state = 0;
5885 uint32_t pp_support_state = 0;
5886
5887 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5888 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5889 pp_support_state = PP_STATE_SUPPORT_LS;
5890 pp_state = PP_STATE_LS;
5891 }
5892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5893 pp_support_state |= PP_STATE_SUPPORT_CG;
5894 pp_state |= PP_STATE_CG;
5895 }
5896 if (state == AMD_CG_STATE_UNGATE)
5897 pp_state = 0;
5898
5899 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5900 PP_BLOCK_GFX_CG,
5901 pp_support_state,
5902 pp_state);
5903 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5904 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5905 }
5906
5907 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5908 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5909 pp_support_state = PP_STATE_SUPPORT_LS;
5910 pp_state = PP_STATE_LS;
5911 }
5912 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5913 pp_support_state |= PP_STATE_SUPPORT_CG;
5914 pp_state |= PP_STATE_CG;
5915 }
5916 if (state == AMD_CG_STATE_UNGATE)
5917 pp_state = 0;
5918
5919 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5920 PP_BLOCK_GFX_3D,
5921 pp_support_state,
5922 pp_state);
5923 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5924 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5925 }
5926
5927 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5928 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5929 pp_support_state = PP_STATE_SUPPORT_LS;
5930 pp_state = PP_STATE_LS;
5931 }
5932
5933 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5934 pp_support_state |= PP_STATE_SUPPORT_CG;
5935 pp_state |= PP_STATE_CG;
5936 }
5937
5938 if (state == AMD_CG_STATE_UNGATE)
5939 pp_state = 0;
5940
5941 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5942 PP_BLOCK_GFX_MG,
5943 pp_support_state,
5944 pp_state);
5945 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5946 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5947 }
5948
5949 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5950 pp_support_state = PP_STATE_SUPPORT_LS;
5951
5952 if (state == AMD_CG_STATE_UNGATE)
5953 pp_state = 0;
5954 else
5955 pp_state = PP_STATE_LS;
5956
5957 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5958 PP_BLOCK_GFX_RLC,
5959 pp_support_state,
5960 pp_state);
5961 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5962 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5963 }
5964
5965 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5966 pp_support_state = PP_STATE_SUPPORT_LS;
5967
5968 if (state == AMD_CG_STATE_UNGATE)
5969 pp_state = 0;
5970 else
5971 pp_state = PP_STATE_LS;
5972 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5973 PP_BLOCK_GFX_CP,
5974 pp_support_state,
5975 pp_state);
5976 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5977 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5978 }
5979
5980 return 0;
5981 }
5982
gfx_v8_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)5983 static int gfx_v8_0_set_clockgating_state(void *handle,
5984 enum amd_clockgating_state state)
5985 {
5986 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5987
5988 if (amdgpu_sriov_vf(adev))
5989 return 0;
5990
5991 switch (adev->asic_type) {
5992 case CHIP_FIJI:
5993 case CHIP_CARRIZO:
5994 case CHIP_STONEY:
5995 gfx_v8_0_update_gfx_clock_gating(adev,
5996 state == AMD_CG_STATE_GATE);
5997 break;
5998 case CHIP_TONGA:
5999 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6000 break;
6001 case CHIP_POLARIS10:
6002 case CHIP_POLARIS11:
6003 case CHIP_POLARIS12:
6004 case CHIP_VEGAM:
6005 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6006 break;
6007 default:
6008 break;
6009 }
6010 return 0;
6011 }
6012
gfx_v8_0_ring_get_rptr(struct amdgpu_ring * ring)6013 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6014 {
6015 return ring->adev->wb.wb[ring->rptr_offs];
6016 }
6017
gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)6018 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6019 {
6020 struct amdgpu_device *adev = ring->adev;
6021
6022 if (ring->use_doorbell)
6023 /* XXX check if swapping is necessary on BE */
6024 return ring->adev->wb.wb[ring->wptr_offs];
6025 else
6026 return RREG32(mmCP_RB0_WPTR);
6027 }
6028
gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)6029 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6030 {
6031 struct amdgpu_device *adev = ring->adev;
6032
6033 if (ring->use_doorbell) {
6034 /* XXX check if swapping is necessary on BE */
6035 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6036 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6037 } else {
6038 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6039 (void)RREG32(mmCP_RB0_WPTR);
6040 }
6041 }
6042
gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)6043 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6044 {
6045 u32 ref_and_mask, reg_mem_engine;
6046
6047 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6048 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6049 switch (ring->me) {
6050 case 1:
6051 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6052 break;
6053 case 2:
6054 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6055 break;
6056 default:
6057 return;
6058 }
6059 reg_mem_engine = 0;
6060 } else {
6061 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6062 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6063 }
6064
6065 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6066 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6067 WAIT_REG_MEM_FUNCTION(3) | /* == */
6068 reg_mem_engine));
6069 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6070 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6071 amdgpu_ring_write(ring, ref_and_mask);
6072 amdgpu_ring_write(ring, ref_and_mask);
6073 amdgpu_ring_write(ring, 0x20); /* poll interval */
6074 }
6075
gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring * ring)6076 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6077 {
6078 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6079 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6080 EVENT_INDEX(4));
6081
6082 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6083 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6084 EVENT_INDEX(0));
6085 }
6086
gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)6087 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6088 struct amdgpu_job *job,
6089 struct amdgpu_ib *ib,
6090 uint32_t flags)
6091 {
6092 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6093 u32 header, control = 0;
6094
6095 if (ib->flags & AMDGPU_IB_FLAG_CE)
6096 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6097 else
6098 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6099
6100 control |= ib->length_dw | (vmid << 24);
6101
6102 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6103 control |= INDIRECT_BUFFER_PRE_ENB(1);
6104
6105 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6106 gfx_v8_0_ring_emit_de_meta(ring);
6107 }
6108
6109 amdgpu_ring_write(ring, header);
6110 amdgpu_ring_write(ring,
6111 #ifdef __BIG_ENDIAN
6112 (2 << 0) |
6113 #endif
6114 (ib->gpu_addr & 0xFFFFFFFC));
6115 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6116 amdgpu_ring_write(ring, control);
6117 }
6118
gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)6119 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6120 struct amdgpu_job *job,
6121 struct amdgpu_ib *ib,
6122 uint32_t flags)
6123 {
6124 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6125 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6126
6127 /* Currently, there is a high possibility to get wave ID mismatch
6128 * between ME and GDS, leading to a hw deadlock, because ME generates
6129 * different wave IDs than the GDS expects. This situation happens
6130 * randomly when at least 5 compute pipes use GDS ordered append.
6131 * The wave IDs generated by ME are also wrong after suspend/resume.
6132 * Those are probably bugs somewhere else in the kernel driver.
6133 *
6134 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6135 * GDS to 0 for this ring (me/pipe).
6136 */
6137 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6138 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6139 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6140 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6141 }
6142
6143 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6144 amdgpu_ring_write(ring,
6145 #ifdef __BIG_ENDIAN
6146 (2 << 0) |
6147 #endif
6148 (ib->gpu_addr & 0xFFFFFFFC));
6149 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6150 amdgpu_ring_write(ring, control);
6151 }
6152
gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6153 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6154 u64 seq, unsigned flags)
6155 {
6156 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6157 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6158
6159 /* Workaround for cache flush problems. First send a dummy EOP
6160 * event down the pipe with seq one below.
6161 */
6162 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6163 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6164 EOP_TC_ACTION_EN |
6165 EOP_TC_WB_ACTION_EN |
6166 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6167 EVENT_INDEX(5)));
6168 amdgpu_ring_write(ring, addr & 0xfffffffc);
6169 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6170 DATA_SEL(1) | INT_SEL(0));
6171 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6172 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6173
6174 /* Then send the real EOP event down the pipe:
6175 * EVENT_WRITE_EOP - flush caches, send int */
6176 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6177 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6178 EOP_TC_ACTION_EN |
6179 EOP_TC_WB_ACTION_EN |
6180 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6181 EVENT_INDEX(5)));
6182 amdgpu_ring_write(ring, addr & 0xfffffffc);
6183 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6184 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6185 amdgpu_ring_write(ring, lower_32_bits(seq));
6186 amdgpu_ring_write(ring, upper_32_bits(seq));
6187
6188 }
6189
gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)6190 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6191 {
6192 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6193 uint32_t seq = ring->fence_drv.sync_seq;
6194 uint64_t addr = ring->fence_drv.gpu_addr;
6195
6196 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6197 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6198 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6199 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6200 amdgpu_ring_write(ring, addr & 0xfffffffc);
6201 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6202 amdgpu_ring_write(ring, seq);
6203 amdgpu_ring_write(ring, 0xffffffff);
6204 amdgpu_ring_write(ring, 4); /* poll interval */
6205 }
6206
gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)6207 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6208 unsigned vmid, uint64_t pd_addr)
6209 {
6210 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6211
6212 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6213
6214 /* wait for the invalidate to complete */
6215 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6216 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6217 WAIT_REG_MEM_FUNCTION(0) | /* always */
6218 WAIT_REG_MEM_ENGINE(0))); /* me */
6219 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6220 amdgpu_ring_write(ring, 0);
6221 amdgpu_ring_write(ring, 0); /* ref */
6222 amdgpu_ring_write(ring, 0); /* mask */
6223 amdgpu_ring_write(ring, 0x20); /* poll interval */
6224
6225 /* compute doesn't have PFP */
6226 if (usepfp) {
6227 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6228 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6229 amdgpu_ring_write(ring, 0x0);
6230 }
6231 }
6232
gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring * ring)6233 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6234 {
6235 return ring->adev->wb.wb[ring->wptr_offs];
6236 }
6237
gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring * ring)6238 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6239 {
6240 struct amdgpu_device *adev = ring->adev;
6241
6242 /* XXX check if swapping is necessary on BE */
6243 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6244 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6245 }
6246
gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring * ring,bool acquire)6247 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6248 bool acquire)
6249 {
6250 struct amdgpu_device *adev = ring->adev;
6251 int pipe_num, tmp, reg;
6252 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6253
6254 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6255
6256 /* first me only has 2 entries, GFX and HP3D */
6257 if (ring->me > 0)
6258 pipe_num -= 2;
6259
6260 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6261 tmp = RREG32(reg);
6262 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6263 WREG32(reg, tmp);
6264 }
6265
gfx_v8_0_pipe_reserve_resources(struct amdgpu_device * adev,struct amdgpu_ring * ring,bool acquire)6266 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6267 struct amdgpu_ring *ring,
6268 bool acquire)
6269 {
6270 int i, pipe;
6271 bool reserve;
6272 struct amdgpu_ring *iring;
6273
6274 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6275 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6276 if (acquire)
6277 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6278 else
6279 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6280
6281 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6282 /* Clear all reservations - everyone reacquires all resources */
6283 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6284 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6285 true);
6286
6287 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6288 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6289 true);
6290 } else {
6291 /* Lower all pipes without a current reservation */
6292 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6293 iring = &adev->gfx.gfx_ring[i];
6294 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6295 iring->me,
6296 iring->pipe,
6297 0);
6298 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6299 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6300 }
6301
6302 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6303 iring = &adev->gfx.compute_ring[i];
6304 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6305 iring->me,
6306 iring->pipe,
6307 0);
6308 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6309 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6310 }
6311 }
6312
6313 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6314 }
6315
gfx_v8_0_hqd_set_priority(struct amdgpu_device * adev,struct amdgpu_ring * ring,bool acquire)6316 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6317 struct amdgpu_ring *ring,
6318 bool acquire)
6319 {
6320 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6321 uint32_t queue_priority = acquire ? 0xf : 0x0;
6322
6323 mutex_lock(&adev->srbm_mutex);
6324 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6325
6326 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6327 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6328
6329 vi_srbm_select(adev, 0, 0, 0, 0);
6330 mutex_unlock(&adev->srbm_mutex);
6331 }
gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring * ring,enum drm_sched_priority priority)6332 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6333 enum drm_sched_priority priority)
6334 {
6335 struct amdgpu_device *adev = ring->adev;
6336 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6337
6338 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6339 return;
6340
6341 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6342 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6343 }
6344
gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6345 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6346 u64 addr, u64 seq,
6347 unsigned flags)
6348 {
6349 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6350 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6351
6352 /* RELEASE_MEM - flush caches, send int */
6353 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6354 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6355 EOP_TC_ACTION_EN |
6356 EOP_TC_WB_ACTION_EN |
6357 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6358 EVENT_INDEX(5)));
6359 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6360 amdgpu_ring_write(ring, addr & 0xfffffffc);
6361 amdgpu_ring_write(ring, upper_32_bits(addr));
6362 amdgpu_ring_write(ring, lower_32_bits(seq));
6363 amdgpu_ring_write(ring, upper_32_bits(seq));
6364 }
6365
gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)6366 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6367 u64 seq, unsigned int flags)
6368 {
6369 /* we only allocate 32bit for each seq wb address */
6370 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6371
6372 /* write fence seq to the "addr" */
6373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6374 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6375 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6376 amdgpu_ring_write(ring, lower_32_bits(addr));
6377 amdgpu_ring_write(ring, upper_32_bits(addr));
6378 amdgpu_ring_write(ring, lower_32_bits(seq));
6379
6380 if (flags & AMDGPU_FENCE_FLAG_INT) {
6381 /* set register to trigger INT */
6382 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6383 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6384 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6385 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6386 amdgpu_ring_write(ring, 0);
6387 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6388 }
6389 }
6390
gfx_v8_ring_emit_sb(struct amdgpu_ring * ring)6391 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6392 {
6393 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6394 amdgpu_ring_write(ring, 0);
6395 }
6396
gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)6397 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6398 {
6399 uint32_t dw2 = 0;
6400
6401 if (amdgpu_sriov_vf(ring->adev))
6402 gfx_v8_0_ring_emit_ce_meta(ring);
6403
6404 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6405 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6406 gfx_v8_0_ring_emit_vgt_flush(ring);
6407 /* set load_global_config & load_global_uconfig */
6408 dw2 |= 0x8001;
6409 /* set load_cs_sh_regs */
6410 dw2 |= 0x01000000;
6411 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6412 dw2 |= 0x10002;
6413
6414 /* set load_ce_ram if preamble presented */
6415 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6416 dw2 |= 0x10000000;
6417 } else {
6418 /* still load_ce_ram if this is the first time preamble presented
6419 * although there is no context switch happens.
6420 */
6421 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6422 dw2 |= 0x10000000;
6423 }
6424
6425 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6426 amdgpu_ring_write(ring, dw2);
6427 amdgpu_ring_write(ring, 0);
6428 }
6429
gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring)6430 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6431 {
6432 unsigned ret;
6433
6434 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6435 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6436 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6437 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6438 ret = ring->wptr & ring->buf_mask;
6439 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6440 return ret;
6441 }
6442
gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring * ring,unsigned offset)6443 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6444 {
6445 unsigned cur;
6446
6447 BUG_ON(offset > ring->buf_mask);
6448 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6449
6450 cur = (ring->wptr & ring->buf_mask) - 1;
6451 if (likely(cur > offset))
6452 ring->ring[offset] = cur - offset;
6453 else
6454 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6455 }
6456
gfx_v8_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg)6457 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6458 {
6459 struct amdgpu_device *adev = ring->adev;
6460 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
6461
6462 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6463 amdgpu_ring_write(ring, 0 | /* src: register*/
6464 (5 << 8) | /* dst: memory */
6465 (1 << 20)); /* write confirm */
6466 amdgpu_ring_write(ring, reg);
6467 amdgpu_ring_write(ring, 0);
6468 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6469 kiq->reg_val_offs * 4));
6470 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6471 kiq->reg_val_offs * 4));
6472 }
6473
gfx_v8_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)6474 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6475 uint32_t val)
6476 {
6477 uint32_t cmd;
6478
6479 switch (ring->funcs->type) {
6480 case AMDGPU_RING_TYPE_GFX:
6481 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6482 break;
6483 case AMDGPU_RING_TYPE_KIQ:
6484 cmd = 1 << 16; /* no inc addr */
6485 break;
6486 default:
6487 cmd = WR_CONFIRM;
6488 break;
6489 }
6490
6491 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6492 amdgpu_ring_write(ring, cmd);
6493 amdgpu_ring_write(ring, reg);
6494 amdgpu_ring_write(ring, 0);
6495 amdgpu_ring_write(ring, val);
6496 }
6497
gfx_v8_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)6498 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6499 {
6500 struct amdgpu_device *adev = ring->adev;
6501 uint32_t value = 0;
6502
6503 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6504 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6505 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6506 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6507 WREG32(mmSQ_CMD, value);
6508 }
6509
gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)6510 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6511 enum amdgpu_interrupt_state state)
6512 {
6513 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6514 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6515 }
6516
gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)6517 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6518 int me, int pipe,
6519 enum amdgpu_interrupt_state state)
6520 {
6521 u32 mec_int_cntl, mec_int_cntl_reg;
6522
6523 /*
6524 * amdgpu controls only the first MEC. That's why this function only
6525 * handles the setting of interrupts for this specific MEC. All other
6526 * pipes' interrupts are set by amdkfd.
6527 */
6528
6529 if (me == 1) {
6530 switch (pipe) {
6531 case 0:
6532 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6533 break;
6534 case 1:
6535 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6536 break;
6537 case 2:
6538 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6539 break;
6540 case 3:
6541 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6542 break;
6543 default:
6544 DRM_DEBUG("invalid pipe %d\n", pipe);
6545 return;
6546 }
6547 } else {
6548 DRM_DEBUG("invalid me %d\n", me);
6549 return;
6550 }
6551
6552 switch (state) {
6553 case AMDGPU_IRQ_STATE_DISABLE:
6554 mec_int_cntl = RREG32(mec_int_cntl_reg);
6555 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6556 WREG32(mec_int_cntl_reg, mec_int_cntl);
6557 break;
6558 case AMDGPU_IRQ_STATE_ENABLE:
6559 mec_int_cntl = RREG32(mec_int_cntl_reg);
6560 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6561 WREG32(mec_int_cntl_reg, mec_int_cntl);
6562 break;
6563 default:
6564 break;
6565 }
6566 }
6567
gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6568 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6569 struct amdgpu_irq_src *source,
6570 unsigned type,
6571 enum amdgpu_interrupt_state state)
6572 {
6573 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6574 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6575
6576 return 0;
6577 }
6578
gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6579 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6580 struct amdgpu_irq_src *source,
6581 unsigned type,
6582 enum amdgpu_interrupt_state state)
6583 {
6584 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6585 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6586
6587 return 0;
6588 }
6589
gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6590 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6591 struct amdgpu_irq_src *src,
6592 unsigned type,
6593 enum amdgpu_interrupt_state state)
6594 {
6595 switch (type) {
6596 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6597 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6598 break;
6599 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6600 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6601 break;
6602 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6603 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6604 break;
6605 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6606 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6607 break;
6608 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6609 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6610 break;
6611 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6612 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6613 break;
6614 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6615 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6616 break;
6617 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6618 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6619 break;
6620 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6621 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6622 break;
6623 default:
6624 break;
6625 }
6626 return 0;
6627 }
6628
gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6629 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6630 struct amdgpu_irq_src *source,
6631 unsigned int type,
6632 enum amdgpu_interrupt_state state)
6633 {
6634 int enable_flag;
6635
6636 switch (state) {
6637 case AMDGPU_IRQ_STATE_DISABLE:
6638 enable_flag = 0;
6639 break;
6640
6641 case AMDGPU_IRQ_STATE_ENABLE:
6642 enable_flag = 1;
6643 break;
6644
6645 default:
6646 return -EINVAL;
6647 }
6648
6649 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6650 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6651 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6652 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6653 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6654 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6655 enable_flag);
6656 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6657 enable_flag);
6658 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6659 enable_flag);
6660 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6661 enable_flag);
6662 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6663 enable_flag);
6664 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6665 enable_flag);
6666 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6667 enable_flag);
6668 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6669 enable_flag);
6670
6671 return 0;
6672 }
6673
gfx_v8_0_set_sq_int_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6674 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6675 struct amdgpu_irq_src *source,
6676 unsigned int type,
6677 enum amdgpu_interrupt_state state)
6678 {
6679 int enable_flag;
6680
6681 switch (state) {
6682 case AMDGPU_IRQ_STATE_DISABLE:
6683 enable_flag = 1;
6684 break;
6685
6686 case AMDGPU_IRQ_STATE_ENABLE:
6687 enable_flag = 0;
6688 break;
6689
6690 default:
6691 return -EINVAL;
6692 }
6693
6694 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6695 enable_flag);
6696
6697 return 0;
6698 }
6699
gfx_v8_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6700 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6701 struct amdgpu_irq_src *source,
6702 struct amdgpu_iv_entry *entry)
6703 {
6704 int i;
6705 u8 me_id, pipe_id, queue_id;
6706 struct amdgpu_ring *ring;
6707
6708 DRM_DEBUG("IH: CP EOP\n");
6709 me_id = (entry->ring_id & 0x0c) >> 2;
6710 pipe_id = (entry->ring_id & 0x03) >> 0;
6711 queue_id = (entry->ring_id & 0x70) >> 4;
6712
6713 switch (me_id) {
6714 case 0:
6715 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6716 break;
6717 case 1:
6718 case 2:
6719 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6720 ring = &adev->gfx.compute_ring[i];
6721 /* Per-queue interrupt is supported for MEC starting from VI.
6722 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6723 */
6724 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6725 amdgpu_fence_process(ring);
6726 }
6727 break;
6728 }
6729 return 0;
6730 }
6731
gfx_v8_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6732 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6733 struct amdgpu_iv_entry *entry)
6734 {
6735 u8 me_id, pipe_id, queue_id;
6736 struct amdgpu_ring *ring;
6737 int i;
6738
6739 me_id = (entry->ring_id & 0x0c) >> 2;
6740 pipe_id = (entry->ring_id & 0x03) >> 0;
6741 queue_id = (entry->ring_id & 0x70) >> 4;
6742
6743 switch (me_id) {
6744 case 0:
6745 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6746 break;
6747 case 1:
6748 case 2:
6749 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6750 ring = &adev->gfx.compute_ring[i];
6751 if (ring->me == me_id && ring->pipe == pipe_id &&
6752 ring->queue == queue_id)
6753 drm_sched_fault(&ring->sched);
6754 }
6755 break;
6756 }
6757 }
6758
gfx_v8_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6759 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6760 struct amdgpu_irq_src *source,
6761 struct amdgpu_iv_entry *entry)
6762 {
6763 DRM_ERROR("Illegal register access in command stream\n");
6764 gfx_v8_0_fault(adev, entry);
6765 return 0;
6766 }
6767
gfx_v8_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6768 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6769 struct amdgpu_irq_src *source,
6770 struct amdgpu_iv_entry *entry)
6771 {
6772 DRM_ERROR("Illegal instruction in command stream\n");
6773 gfx_v8_0_fault(adev, entry);
6774 return 0;
6775 }
6776
gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6777 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6778 struct amdgpu_irq_src *source,
6779 struct amdgpu_iv_entry *entry)
6780 {
6781 DRM_ERROR("CP EDC/ECC error detected.");
6782 return 0;
6783 }
6784
gfx_v8_0_parse_sq_irq(struct amdgpu_device * adev,unsigned ih_data)6785 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6786 {
6787 u32 enc, se_id, sh_id, cu_id;
6788 char type[20];
6789 int sq_edc_source = -1;
6790
6791 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6792 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6793
6794 switch (enc) {
6795 case 0:
6796 DRM_INFO("SQ general purpose intr detected:"
6797 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6798 "host_cmd_overflow %d, cmd_timestamp %d,"
6799 "reg_timestamp %d, thread_trace_buff_full %d,"
6800 "wlt %d, thread_trace %d.\n",
6801 se_id,
6802 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6803 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6804 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6805 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6806 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6807 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6808 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6809 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6810 );
6811 break;
6812 case 1:
6813 case 2:
6814
6815 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6816 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6817
6818 /*
6819 * This function can be called either directly from ISR
6820 * or from BH in which case we can access SQ_EDC_INFO
6821 * instance
6822 */
6823 if (in_task()) {
6824 mutex_lock(&adev->grbm_idx_mutex);
6825 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6826
6827 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6828
6829 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6830 mutex_unlock(&adev->grbm_idx_mutex);
6831 }
6832
6833 if (enc == 1)
6834 snprintf(type, sizeof type, "instruction intr");
6835 else
6836 snprintf(type, sizeof type, "EDC/ECC error");
6837
6838 DRM_INFO(
6839 "SQ %s detected: "
6840 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6841 "trap %s, sq_ed_info.source %s.\n",
6842 type, se_id, sh_id, cu_id,
6843 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6844 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6845 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6846 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6847 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6848 );
6849 break;
6850 default:
6851 DRM_ERROR("SQ invalid encoding type\n.");
6852 }
6853 }
6854
gfx_v8_0_sq_irq_work_func(struct work_struct * work)6855 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6856 {
6857
6858 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6859 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6860
6861 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6862 }
6863
gfx_v8_0_sq_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6864 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6865 struct amdgpu_irq_src *source,
6866 struct amdgpu_iv_entry *entry)
6867 {
6868 unsigned ih_data = entry->src_data[0];
6869
6870 /*
6871 * Try to submit work so SQ_EDC_INFO can be accessed from
6872 * BH. If previous work submission hasn't finished yet
6873 * just print whatever info is possible directly from the ISR.
6874 */
6875 if (work_pending(&adev->gfx.sq_work.work)) {
6876 gfx_v8_0_parse_sq_irq(adev, ih_data);
6877 } else {
6878 adev->gfx.sq_work.ih_data = ih_data;
6879 schedule_work(&adev->gfx.sq_work.work);
6880 }
6881
6882 return 0;
6883 }
6884
6885 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6886 .name = "gfx_v8_0",
6887 .early_init = gfx_v8_0_early_init,
6888 .late_init = gfx_v8_0_late_init,
6889 .sw_init = gfx_v8_0_sw_init,
6890 .sw_fini = gfx_v8_0_sw_fini,
6891 .hw_init = gfx_v8_0_hw_init,
6892 .hw_fini = gfx_v8_0_hw_fini,
6893 .suspend = gfx_v8_0_suspend,
6894 .resume = gfx_v8_0_resume,
6895 .is_idle = gfx_v8_0_is_idle,
6896 .wait_for_idle = gfx_v8_0_wait_for_idle,
6897 .check_soft_reset = gfx_v8_0_check_soft_reset,
6898 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6899 .soft_reset = gfx_v8_0_soft_reset,
6900 .post_soft_reset = gfx_v8_0_post_soft_reset,
6901 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6902 .set_powergating_state = gfx_v8_0_set_powergating_state,
6903 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6904 };
6905
6906 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6907 .type = AMDGPU_RING_TYPE_GFX,
6908 .align_mask = 0xff,
6909 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6910 .support_64bit_ptrs = false,
6911 .get_rptr = gfx_v8_0_ring_get_rptr,
6912 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6913 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6914 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6915 5 + /* COND_EXEC */
6916 7 + /* PIPELINE_SYNC */
6917 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6918 12 + /* FENCE for VM_FLUSH */
6919 20 + /* GDS switch */
6920 4 + /* double SWITCH_BUFFER,
6921 the first COND_EXEC jump to the place just
6922 prior to this double SWITCH_BUFFER */
6923 5 + /* COND_EXEC */
6924 7 + /* HDP_flush */
6925 4 + /* VGT_flush */
6926 14 + /* CE_META */
6927 31 + /* DE_META */
6928 3 + /* CNTX_CTRL */
6929 5 + /* HDP_INVL */
6930 12 + 12 + /* FENCE x2 */
6931 2, /* SWITCH_BUFFER */
6932 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6933 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6934 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6935 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6936 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6937 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6938 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6939 .test_ring = gfx_v8_0_ring_test_ring,
6940 .test_ib = gfx_v8_0_ring_test_ib,
6941 .insert_nop = amdgpu_ring_insert_nop,
6942 .pad_ib = amdgpu_ring_generic_pad_ib,
6943 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6944 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6945 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6946 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6947 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6948 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6949 };
6950
6951 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6952 .type = AMDGPU_RING_TYPE_COMPUTE,
6953 .align_mask = 0xff,
6954 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6955 .support_64bit_ptrs = false,
6956 .get_rptr = gfx_v8_0_ring_get_rptr,
6957 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6958 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6959 .emit_frame_size =
6960 20 + /* gfx_v8_0_ring_emit_gds_switch */
6961 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6962 5 + /* hdp_invalidate */
6963 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6964 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6965 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6966 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6967 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6968 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6969 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6970 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6971 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6972 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6973 .test_ring = gfx_v8_0_ring_test_ring,
6974 .test_ib = gfx_v8_0_ring_test_ib,
6975 .insert_nop = amdgpu_ring_insert_nop,
6976 .pad_ib = amdgpu_ring_generic_pad_ib,
6977 .set_priority = gfx_v8_0_ring_set_priority_compute,
6978 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6979 };
6980
6981 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6982 .type = AMDGPU_RING_TYPE_KIQ,
6983 .align_mask = 0xff,
6984 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6985 .support_64bit_ptrs = false,
6986 .get_rptr = gfx_v8_0_ring_get_rptr,
6987 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6988 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6989 .emit_frame_size =
6990 20 + /* gfx_v8_0_ring_emit_gds_switch */
6991 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6992 5 + /* hdp_invalidate */
6993 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6994 17 + /* gfx_v8_0_ring_emit_vm_flush */
6995 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6996 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6997 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6998 .test_ring = gfx_v8_0_ring_test_ring,
6999 .insert_nop = amdgpu_ring_insert_nop,
7000 .pad_ib = amdgpu_ring_generic_pad_ib,
7001 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7002 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7003 };
7004
gfx_v8_0_set_ring_funcs(struct amdgpu_device * adev)7005 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7006 {
7007 int i;
7008
7009 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7010
7011 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7012 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7013
7014 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7015 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7016 }
7017
7018 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7019 .set = gfx_v8_0_set_eop_interrupt_state,
7020 .process = gfx_v8_0_eop_irq,
7021 };
7022
7023 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7024 .set = gfx_v8_0_set_priv_reg_fault_state,
7025 .process = gfx_v8_0_priv_reg_irq,
7026 };
7027
7028 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7029 .set = gfx_v8_0_set_priv_inst_fault_state,
7030 .process = gfx_v8_0_priv_inst_irq,
7031 };
7032
7033 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7034 .set = gfx_v8_0_set_cp_ecc_int_state,
7035 .process = gfx_v8_0_cp_ecc_error_irq,
7036 };
7037
7038 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7039 .set = gfx_v8_0_set_sq_int_state,
7040 .process = gfx_v8_0_sq_irq,
7041 };
7042
gfx_v8_0_set_irq_funcs(struct amdgpu_device * adev)7043 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7044 {
7045 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7046 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7047
7048 adev->gfx.priv_reg_irq.num_types = 1;
7049 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7050
7051 adev->gfx.priv_inst_irq.num_types = 1;
7052 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7053
7054 adev->gfx.cp_ecc_error_irq.num_types = 1;
7055 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7056
7057 adev->gfx.sq_irq.num_types = 1;
7058 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7059 }
7060
gfx_v8_0_set_rlc_funcs(struct amdgpu_device * adev)7061 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7062 {
7063 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7064 }
7065
gfx_v8_0_set_gds_init(struct amdgpu_device * adev)7066 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7067 {
7068 /* init asci gds info */
7069 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7070 adev->gds.gws_size = 64;
7071 adev->gds.oa_size = 16;
7072 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7073 }
7074
gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7075 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7076 u32 bitmap)
7077 {
7078 u32 data;
7079
7080 if (!bitmap)
7081 return;
7082
7083 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7084 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7085
7086 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7087 }
7088
gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device * adev)7089 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7090 {
7091 u32 data, mask;
7092
7093 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7094 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7095
7096 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7097
7098 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7099 }
7100
gfx_v8_0_get_cu_info(struct amdgpu_device * adev)7101 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7102 {
7103 int i, j, k, counter, active_cu_number = 0;
7104 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7105 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7106 unsigned disable_masks[4 * 2];
7107 u32 ao_cu_num;
7108
7109 memset(cu_info, 0, sizeof(*cu_info));
7110
7111 if (adev->flags & AMD_IS_APU)
7112 ao_cu_num = 2;
7113 else
7114 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7115
7116 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7117
7118 mutex_lock(&adev->grbm_idx_mutex);
7119 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7120 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7121 mask = 1;
7122 ao_bitmap = 0;
7123 counter = 0;
7124 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7125 if (i < 4 && j < 2)
7126 gfx_v8_0_set_user_cu_inactive_bitmap(
7127 adev, disable_masks[i * 2 + j]);
7128 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7129 cu_info->bitmap[i][j] = bitmap;
7130
7131 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7132 if (bitmap & mask) {
7133 if (counter < ao_cu_num)
7134 ao_bitmap |= mask;
7135 counter ++;
7136 }
7137 mask <<= 1;
7138 }
7139 active_cu_number += counter;
7140 if (i < 2 && j < 2)
7141 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7142 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7143 }
7144 }
7145 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7146 mutex_unlock(&adev->grbm_idx_mutex);
7147
7148 cu_info->number = active_cu_number;
7149 cu_info->ao_cu_mask = ao_cu_mask;
7150 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7151 cu_info->max_waves_per_simd = 10;
7152 cu_info->max_scratch_slots_per_cu = 32;
7153 cu_info->wave_front_size = 64;
7154 cu_info->lds_size = 64;
7155 }
7156
7157 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7158 {
7159 .type = AMD_IP_BLOCK_TYPE_GFX,
7160 .major = 8,
7161 .minor = 0,
7162 .rev = 0,
7163 .funcs = &gfx_v8_0_ip_funcs,
7164 };
7165
7166 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7167 {
7168 .type = AMD_IP_BLOCK_TYPE_GFX,
7169 .major = 8,
7170 .minor = 1,
7171 .rev = 0,
7172 .funcs = &gfx_v8_0_ip_funcs,
7173 };
7174
gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring * ring)7175 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7176 {
7177 uint64_t ce_payload_addr;
7178 int cnt_ce;
7179 union {
7180 struct vi_ce_ib_state regular;
7181 struct vi_ce_ib_state_chained_ib chained;
7182 } ce_payload = {};
7183
7184 if (ring->adev->virt.chained_ib_support) {
7185 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7186 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7187 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7188 } else {
7189 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7190 offsetof(struct vi_gfx_meta_data, ce_payload);
7191 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7192 }
7193
7194 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7195 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7196 WRITE_DATA_DST_SEL(8) |
7197 WR_CONFIRM) |
7198 WRITE_DATA_CACHE_POLICY(0));
7199 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7200 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7201 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7202 }
7203
gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring * ring)7204 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7205 {
7206 uint64_t de_payload_addr, gds_addr, csa_addr;
7207 int cnt_de;
7208 union {
7209 struct vi_de_ib_state regular;
7210 struct vi_de_ib_state_chained_ib chained;
7211 } de_payload = {};
7212
7213 csa_addr = amdgpu_csa_vaddr(ring->adev);
7214 gds_addr = csa_addr + 4096;
7215 if (ring->adev->virt.chained_ib_support) {
7216 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7217 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7218 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7219 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7220 } else {
7221 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7222 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7223 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7224 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7225 }
7226
7227 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7228 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7229 WRITE_DATA_DST_SEL(8) |
7230 WR_CONFIRM) |
7231 WRITE_DATA_CACHE_POLICY(0));
7232 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7233 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7234 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7235 }
7236