1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #include "ivsrcid/ivsrcid_vislands30.h" 55 56 #define GFX8_NUM_GFX_RINGS 1 57 #define GFX8_MEC_HPD_SIZE 2048 58 59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 63 64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 73 74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 80 81 /* BPM SERDES CMD */ 82 #define SET_BPM_SERDES_CMD 1 83 #define CLE_BPM_SERDES_CMD 0 84 85 /* BPM Register Address*/ 86 enum { 87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 92 BPM_REG_FGCG_MAX 93 }; 94 95 #define RLC_FormatDirectRegListLength 14 96 97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 103 104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 122 123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 129 130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 153 154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 165 166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 172 173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 174 { 175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 191 }; 192 193 static const u32 golden_settings_tonga_a11[] = 194 { 195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 197 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 198 mmGB_GPU_ID, 0x0000000f, 0x00000000, 199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 205 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 211 }; 212 213 static const u32 tonga_golden_common_all[] = 214 { 215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 223 }; 224 225 static const u32 tonga_mgcg_cgcg_init[] = 226 { 227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 302 }; 303 304 static const u32 golden_settings_vegam_a11[] = 305 { 306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 309 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 316 mmSQ_CONFIG, 0x07f80000, 0x01180000, 317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 318 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 vegam_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 333 }; 334 335 static const u32 golden_settings_polaris11_a11[] = 336 { 337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 340 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 347 mmSQ_CONFIG, 0x07f80000, 0x01180000, 348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 349 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 354 }; 355 356 static const u32 polaris11_golden_common_all[] = 357 { 358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 364 }; 365 366 static const u32 golden_settings_polaris10_a11[] = 367 { 368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 372 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 379 mmSQ_CONFIG, 0x07f80000, 0x07180000, 380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 381 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 385 }; 386 387 static const u32 polaris10_golden_common_all[] = 388 { 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 397 }; 398 399 static const u32 fiji_golden_common_all[] = 400 { 401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 411 }; 412 413 static const u32 golden_settings_fiji_a10[] = 414 { 415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 416 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 422 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 426 }; 427 428 static const u32 fiji_mgcg_cgcg_init[] = 429 { 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 465 }; 466 467 static const u32 golden_settings_iceland_a11[] = 468 { 469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 470 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 471 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 472 mmGB_GPU_ID, 0x0000000f, 0x00000000, 473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 480 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 485 }; 486 487 static const u32 iceland_golden_common_all[] = 488 { 489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 497 }; 498 499 static const u32 iceland_mgcg_cgcg_init[] = 500 { 501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 565 }; 566 567 static const u32 cz_golden_settings_a11[] = 568 { 569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 570 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 571 mmGB_GPU_ID, 0x0000000f, 0x00000000, 572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 581 }; 582 583 static const u32 cz_golden_common_all[] = 584 { 585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 593 }; 594 595 static const u32 cz_mgcg_cgcg_init[] = 596 { 597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 672 }; 673 674 static const u32 stoney_golden_settings_a11[] = 675 { 676 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 677 mmGB_GPU_ID, 0x0000000f, 0x00000000, 678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 686 }; 687 688 static const u32 stoney_golden_common_all[] = 689 { 690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 698 }; 699 700 static const u32 stoney_mgcg_cgcg_init[] = 701 { 702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 707 }; 708 709 710 static const char * const sq_edc_source_names[] = { 711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 718 }; 719 720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 728 729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 730 { 731 switch (adev->asic_type) { 732 case CHIP_TOPAZ: 733 amdgpu_device_program_register_sequence(adev, 734 iceland_mgcg_cgcg_init, 735 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 736 amdgpu_device_program_register_sequence(adev, 737 golden_settings_iceland_a11, 738 ARRAY_SIZE(golden_settings_iceland_a11)); 739 amdgpu_device_program_register_sequence(adev, 740 iceland_golden_common_all, 741 ARRAY_SIZE(iceland_golden_common_all)); 742 break; 743 case CHIP_FIJI: 744 amdgpu_device_program_register_sequence(adev, 745 fiji_mgcg_cgcg_init, 746 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 747 amdgpu_device_program_register_sequence(adev, 748 golden_settings_fiji_a10, 749 ARRAY_SIZE(golden_settings_fiji_a10)); 750 amdgpu_device_program_register_sequence(adev, 751 fiji_golden_common_all, 752 ARRAY_SIZE(fiji_golden_common_all)); 753 break; 754 755 case CHIP_TONGA: 756 amdgpu_device_program_register_sequence(adev, 757 tonga_mgcg_cgcg_init, 758 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 759 amdgpu_device_program_register_sequence(adev, 760 golden_settings_tonga_a11, 761 ARRAY_SIZE(golden_settings_tonga_a11)); 762 amdgpu_device_program_register_sequence(adev, 763 tonga_golden_common_all, 764 ARRAY_SIZE(tonga_golden_common_all)); 765 break; 766 case CHIP_VEGAM: 767 amdgpu_device_program_register_sequence(adev, 768 golden_settings_vegam_a11, 769 ARRAY_SIZE(golden_settings_vegam_a11)); 770 amdgpu_device_program_register_sequence(adev, 771 vegam_golden_common_all, 772 ARRAY_SIZE(vegam_golden_common_all)); 773 break; 774 case CHIP_POLARIS11: 775 case CHIP_POLARIS12: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_polaris11_a11, 778 ARRAY_SIZE(golden_settings_polaris11_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 polaris11_golden_common_all, 781 ARRAY_SIZE(polaris11_golden_common_all)); 782 break; 783 case CHIP_POLARIS10: 784 amdgpu_device_program_register_sequence(adev, 785 golden_settings_polaris10_a11, 786 ARRAY_SIZE(golden_settings_polaris10_a11)); 787 amdgpu_device_program_register_sequence(adev, 788 polaris10_golden_common_all, 789 ARRAY_SIZE(polaris10_golden_common_all)); 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 791 if (adev->pdev->revision == 0xc7 && 792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 797 } 798 break; 799 case CHIP_CARRIZO: 800 amdgpu_device_program_register_sequence(adev, 801 cz_mgcg_cgcg_init, 802 ARRAY_SIZE(cz_mgcg_cgcg_init)); 803 amdgpu_device_program_register_sequence(adev, 804 cz_golden_settings_a11, 805 ARRAY_SIZE(cz_golden_settings_a11)); 806 amdgpu_device_program_register_sequence(adev, 807 cz_golden_common_all, 808 ARRAY_SIZE(cz_golden_common_all)); 809 break; 810 case CHIP_STONEY: 811 amdgpu_device_program_register_sequence(adev, 812 stoney_mgcg_cgcg_init, 813 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 814 amdgpu_device_program_register_sequence(adev, 815 stoney_golden_settings_a11, 816 ARRAY_SIZE(stoney_golden_settings_a11)); 817 amdgpu_device_program_register_sequence(adev, 818 stoney_golden_common_all, 819 ARRAY_SIZE(stoney_golden_common_all)); 820 break; 821 default: 822 break; 823 } 824 } 825 826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 827 { 828 adev->gfx.scratch.num_reg = 8; 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 831 } 832 833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 uint32_t scratch; 837 uint32_t tmp = 0; 838 unsigned i; 839 int r; 840 841 kprintf("gfx_v8_0_ring_test_ring: ring=%p\n", ring); 842 r = amdgpu_gfx_scratch_get(adev, &scratch); 843 if (r) { 844 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 845 return r; 846 } 847 WREG32(scratch, 0xCAFEDEAD); 848 r = amdgpu_ring_alloc(ring, 3); 849 if (r) { 850 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 851 ring->idx, r); 852 amdgpu_gfx_scratch_free(adev, scratch); 853 return r; 854 } 855 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 856 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 857 amdgpu_ring_write(ring, 0xDEADBEEF); 858 amdgpu_ring_commit(ring); 859 860 for (i = 0; i < adev->usec_timeout; i++) { 861 tmp = RREG32(scratch); 862 if (tmp == 0xDEADBEEF) 863 break; 864 DRM_UDELAY(1); 865 } 866 if (i < adev->usec_timeout) { 867 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 868 ring->idx, i); 869 } else { 870 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 871 ring->idx, scratch, tmp); 872 r = -EINVAL; 873 } 874 amdgpu_gfx_scratch_free(adev, scratch); 875 return r; 876 } 877 878 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 879 { 880 struct amdgpu_device *adev = ring->adev; 881 struct amdgpu_ib ib; 882 struct dma_fence *f = NULL; 883 884 unsigned int index; 885 uint64_t gpu_addr; 886 uint32_t tmp; 887 long r; 888 889 r = amdgpu_device_wb_get(adev, &index); 890 if (r) { 891 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 892 return r; 893 } 894 895 gpu_addr = adev->wb.gpu_addr + (index * 4); 896 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 897 memset(&ib, 0, sizeof(ib)); 898 r = amdgpu_ib_get(adev, NULL, 16, &ib); 899 if (r) { 900 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 901 goto err1; 902 } 903 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 904 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 905 ib.ptr[2] = lower_32_bits(gpu_addr); 906 ib.ptr[3] = upper_32_bits(gpu_addr); 907 ib.ptr[4] = 0xDEADBEEF; 908 ib.length_dw = 5; 909 910 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 911 if (r) 912 goto err2; 913 914 r = dma_fence_wait_timeout(f, false, timeout); 915 if (r == 0) { 916 DRM_ERROR("amdgpu: IB test timed out.\n"); 917 r = -ETIMEDOUT; 918 goto err2; 919 } else if (r < 0) { 920 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 921 goto err2; 922 } 923 924 tmp = adev->wb.wb[index]; 925 if (tmp == 0xDEADBEEF) { 926 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 927 r = 0; 928 } else { 929 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 930 r = -EINVAL; 931 } 932 933 err2: 934 amdgpu_ib_free(adev, &ib, NULL); 935 dma_fence_put(f); 936 err1: 937 amdgpu_device_wb_free(adev, index); 938 return r; 939 } 940 941 942 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 943 { 944 release_firmware(adev->gfx.pfp_fw); 945 adev->gfx.pfp_fw = NULL; 946 release_firmware(adev->gfx.me_fw); 947 adev->gfx.me_fw = NULL; 948 release_firmware(adev->gfx.ce_fw); 949 adev->gfx.ce_fw = NULL; 950 release_firmware(adev->gfx.rlc_fw); 951 adev->gfx.rlc_fw = NULL; 952 release_firmware(adev->gfx.mec_fw); 953 adev->gfx.mec_fw = NULL; 954 if ((adev->asic_type != CHIP_STONEY) && 955 (adev->asic_type != CHIP_TOPAZ)) 956 release_firmware(adev->gfx.mec2_fw); 957 adev->gfx.mec2_fw = NULL; 958 959 kfree(adev->gfx.rlc.register_list_format); 960 } 961 962 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 963 { 964 const char *chip_name; 965 char fw_name[30]; 966 int err; 967 struct amdgpu_firmware_info *info = NULL; 968 const struct common_firmware_header *header = NULL; 969 const struct gfx_firmware_header_v1_0 *cp_hdr; 970 const struct rlc_firmware_header_v2_0 *rlc_hdr; 971 unsigned int *tmp = NULL, i; 972 973 DRM_DEBUG("\n"); 974 975 switch (adev->asic_type) { 976 case CHIP_TOPAZ: 977 chip_name = "topaz"; 978 break; 979 case CHIP_TONGA: 980 chip_name = "tonga"; 981 break; 982 case CHIP_CARRIZO: 983 chip_name = "carrizo"; 984 break; 985 case CHIP_FIJI: 986 chip_name = "fiji"; 987 break; 988 case CHIP_STONEY: 989 chip_name = "stoney"; 990 break; 991 case CHIP_POLARIS10: 992 chip_name = "polaris10"; 993 break; 994 case CHIP_POLARIS11: 995 chip_name = "polaris11"; 996 break; 997 case CHIP_POLARIS12: 998 chip_name = "polaris12"; 999 break; 1000 case CHIP_VEGAM: 1001 chip_name = "vegam"; 1002 break; 1003 default: 1004 BUG(); 1005 } 1006 1007 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1008 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 1009 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1010 if (err == -ENOENT) { 1011 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1012 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1013 } 1014 } else { 1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1016 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1017 } 1018 if (err) 1019 goto out; 1020 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1021 if (err) 1022 goto out; 1023 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1024 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1025 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1026 1027 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1029 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1030 if (err == -ENOENT) { 1031 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1032 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1033 } 1034 } else { 1035 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1036 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1037 } 1038 if (err) 1039 goto out; 1040 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1041 if (err) 1042 goto out; 1043 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1044 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1045 1046 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1047 1048 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1049 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1050 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1051 if (err == -ENOENT) { 1052 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1053 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1054 } 1055 } else { 1056 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1057 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1058 } 1059 if (err) 1060 goto out; 1061 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1062 if (err) 1063 goto out; 1064 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1065 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1066 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1067 1068 /* 1069 * Support for MCBP/Virtualization in combination with chained IBs is 1070 * formal released on feature version #46 1071 */ 1072 if (adev->gfx.ce_feature_version >= 46 && 1073 adev->gfx.pfp_feature_version >= 46) { 1074 adev->virt.chained_ib_support = true; 1075 DRM_INFO("Chained IB support enabled!\n"); 1076 } else 1077 adev->virt.chained_ib_support = false; 1078 1079 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1080 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1081 if (err) 1082 goto out; 1083 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1084 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1085 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1086 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1087 1088 adev->gfx.rlc.save_and_restore_offset = 1089 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1090 adev->gfx.rlc.clear_state_descriptor_offset = 1091 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1092 adev->gfx.rlc.avail_scratch_ram_locations = 1093 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1094 adev->gfx.rlc.reg_restore_list_size = 1095 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1096 adev->gfx.rlc.reg_list_format_start = 1097 le32_to_cpu(rlc_hdr->reg_list_format_start); 1098 adev->gfx.rlc.reg_list_format_separate_start = 1099 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1100 adev->gfx.rlc.starting_offsets_start = 1101 le32_to_cpu(rlc_hdr->starting_offsets_start); 1102 adev->gfx.rlc.reg_list_format_size_bytes = 1103 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1104 adev->gfx.rlc.reg_list_size_bytes = 1105 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1106 1107 adev->gfx.rlc.register_list_format = 1108 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1109 adev->gfx.rlc.reg_list_size_bytes, M_DRM, GFP_KERNEL); 1110 1111 if (!adev->gfx.rlc.register_list_format) { 1112 err = -ENOMEM; 1113 goto out; 1114 } 1115 1116 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1117 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1118 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1119 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1120 1121 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1122 1123 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1124 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1125 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1126 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1127 1128 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1129 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1130 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1131 if (err == -ENOENT) { 1132 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1133 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1134 } 1135 } else { 1136 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1137 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1138 } 1139 if (err) 1140 goto out; 1141 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1142 if (err) 1143 goto out; 1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1145 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1146 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1147 1148 if ((adev->asic_type != CHIP_STONEY) && 1149 (adev->asic_type != CHIP_TOPAZ)) { 1150 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1151 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1152 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1153 if (err == -ENOENT) { 1154 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1155 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1156 } 1157 } else { 1158 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1159 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1160 } 1161 if (!err) { 1162 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1163 if (err) 1164 goto out; 1165 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1166 adev->gfx.mec2_fw->data; 1167 adev->gfx.mec2_fw_version = 1168 le32_to_cpu(cp_hdr->header.ucode_version); 1169 adev->gfx.mec2_feature_version = 1170 le32_to_cpu(cp_hdr->ucode_feature_version); 1171 } else { 1172 err = 0; 1173 adev->gfx.mec2_fw = NULL; 1174 } 1175 } 1176 1177 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1178 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1179 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1180 info->fw = adev->gfx.pfp_fw; 1181 header = (const struct common_firmware_header *)info->fw->data; 1182 adev->firmware.fw_size += 1183 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1184 1185 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1186 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1187 info->fw = adev->gfx.me_fw; 1188 header = (const struct common_firmware_header *)info->fw->data; 1189 adev->firmware.fw_size += 1190 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1191 1192 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1193 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1194 info->fw = adev->gfx.ce_fw; 1195 header = (const struct common_firmware_header *)info->fw->data; 1196 adev->firmware.fw_size += 1197 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1198 1199 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1200 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1201 info->fw = adev->gfx.rlc_fw; 1202 header = (const struct common_firmware_header *)info->fw->data; 1203 adev->firmware.fw_size += 1204 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1205 1206 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1207 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1208 info->fw = adev->gfx.mec_fw; 1209 header = (const struct common_firmware_header *)info->fw->data; 1210 adev->firmware.fw_size += 1211 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1212 1213 /* we need account JT in */ 1214 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1215 adev->firmware.fw_size += 1216 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1217 1218 if (amdgpu_sriov_vf(adev)) { 1219 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1220 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1221 info->fw = adev->gfx.mec_fw; 1222 adev->firmware.fw_size += 1223 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1224 } 1225 1226 if (adev->gfx.mec2_fw) { 1227 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1228 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1229 info->fw = adev->gfx.mec2_fw; 1230 header = (const struct common_firmware_header *)info->fw->data; 1231 adev->firmware.fw_size += 1232 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1233 } 1234 1235 } 1236 1237 out: 1238 if (err) { 1239 dev_err(adev->dev, 1240 "gfx8: Failed to load firmware \"%s\"\n", 1241 fw_name); 1242 release_firmware(adev->gfx.pfp_fw); 1243 adev->gfx.pfp_fw = NULL; 1244 release_firmware(adev->gfx.me_fw); 1245 adev->gfx.me_fw = NULL; 1246 release_firmware(adev->gfx.ce_fw); 1247 adev->gfx.ce_fw = NULL; 1248 release_firmware(adev->gfx.rlc_fw); 1249 adev->gfx.rlc_fw = NULL; 1250 release_firmware(adev->gfx.mec_fw); 1251 adev->gfx.mec_fw = NULL; 1252 release_firmware(adev->gfx.mec2_fw); 1253 adev->gfx.mec2_fw = NULL; 1254 } 1255 return err; 1256 } 1257 1258 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1259 volatile u32 *buffer) 1260 { 1261 u32 count = 0, i; 1262 const struct cs_section_def *sect = NULL; 1263 const struct cs_extent_def *ext = NULL; 1264 1265 if (adev->gfx.rlc.cs_data == NULL) 1266 return; 1267 if (buffer == NULL) 1268 return; 1269 1270 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1271 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1272 1273 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1274 buffer[count++] = cpu_to_le32(0x80000000); 1275 buffer[count++] = cpu_to_le32(0x80000000); 1276 1277 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1278 for (ext = sect->section; ext->extent != NULL; ++ext) { 1279 if (sect->id == SECT_CONTEXT) { 1280 buffer[count++] = 1281 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1282 buffer[count++] = cpu_to_le32(ext->reg_index - 1283 PACKET3_SET_CONTEXT_REG_START); 1284 for (i = 0; i < ext->reg_count; i++) 1285 buffer[count++] = cpu_to_le32(ext->extent[i]); 1286 } else { 1287 return; 1288 } 1289 } 1290 } 1291 1292 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1293 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1294 PACKET3_SET_CONTEXT_REG_START); 1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1296 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1297 1298 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1299 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1300 1301 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1302 buffer[count++] = cpu_to_le32(0); 1303 } 1304 1305 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1306 { 1307 const __le32 *fw_data; 1308 volatile u32 *dst_ptr; 1309 int me, i, max_me = 4; 1310 u32 bo_offset = 0; 1311 u32 table_offset, table_size; 1312 1313 if (adev->asic_type == CHIP_CARRIZO) 1314 max_me = 5; 1315 1316 /* write the cp table buffer */ 1317 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1318 for (me = 0; me < max_me; me++) { 1319 if (me == 0) { 1320 const struct gfx_firmware_header_v1_0 *hdr = 1321 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1322 fw_data = (const __le32 *) 1323 (adev->gfx.ce_fw->data + 1324 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1325 table_offset = le32_to_cpu(hdr->jt_offset); 1326 table_size = le32_to_cpu(hdr->jt_size); 1327 } else if (me == 1) { 1328 const struct gfx_firmware_header_v1_0 *hdr = 1329 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1330 fw_data = (const __le32 *) 1331 (adev->gfx.pfp_fw->data + 1332 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1333 table_offset = le32_to_cpu(hdr->jt_offset); 1334 table_size = le32_to_cpu(hdr->jt_size); 1335 } else if (me == 2) { 1336 const struct gfx_firmware_header_v1_0 *hdr = 1337 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1338 fw_data = (const __le32 *) 1339 (adev->gfx.me_fw->data + 1340 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1341 table_offset = le32_to_cpu(hdr->jt_offset); 1342 table_size = le32_to_cpu(hdr->jt_size); 1343 } else if (me == 3) { 1344 const struct gfx_firmware_header_v1_0 *hdr = 1345 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1346 fw_data = (const __le32 *) 1347 (adev->gfx.mec_fw->data + 1348 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1349 table_offset = le32_to_cpu(hdr->jt_offset); 1350 table_size = le32_to_cpu(hdr->jt_size); 1351 } else if (me == 4) { 1352 const struct gfx_firmware_header_v1_0 *hdr = 1353 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1354 fw_data = (const __le32 *) 1355 (adev->gfx.mec2_fw->data + 1356 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1357 table_offset = le32_to_cpu(hdr->jt_offset); 1358 table_size = le32_to_cpu(hdr->jt_size); 1359 } 1360 1361 for (i = 0; i < table_size; i ++) { 1362 dst_ptr[bo_offset + i] = 1363 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1364 } 1365 1366 bo_offset += table_size; 1367 } 1368 } 1369 1370 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1371 { 1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1373 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1374 } 1375 1376 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1377 { 1378 volatile u32 *dst_ptr; 1379 u32 dws; 1380 const struct cs_section_def *cs_data; 1381 int r; 1382 1383 adev->gfx.rlc.cs_data = vi_cs_data; 1384 1385 cs_data = adev->gfx.rlc.cs_data; 1386 1387 if (cs_data) { 1388 /* clear state block */ 1389 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1390 1391 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1392 AMDGPU_GEM_DOMAIN_VRAM, 1393 &adev->gfx.rlc.clear_state_obj, 1394 (u64 *)&adev->gfx.rlc.clear_state_gpu_addr, 1395 (void **)&adev->gfx.rlc.cs_ptr); 1396 if (r) { 1397 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1398 gfx_v8_0_rlc_fini(adev); 1399 return r; 1400 } 1401 1402 /* set up the cs buffer */ 1403 dst_ptr = adev->gfx.rlc.cs_ptr; 1404 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1405 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1406 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1407 } 1408 1409 if ((adev->asic_type == CHIP_CARRIZO) || 1410 (adev->asic_type == CHIP_STONEY)) { 1411 adev->gfx.rlc.cp_table_size = (96 * 5 * 4) + (64 * 1024); /* JT + GDS */ 1412 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1413 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1414 &adev->gfx.rlc.cp_table_obj, 1415 (u64 *)&adev->gfx.rlc.cp_table_gpu_addr, 1416 (void **)&adev->gfx.rlc.cp_table_ptr); 1417 if (r) { 1418 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1419 return r; 1420 } 1421 1422 cz_init_cp_jump_table(adev); 1423 1424 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1425 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1426 } 1427 1428 return 0; 1429 } 1430 1431 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1432 { 1433 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1434 } 1435 1436 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1437 { 1438 int r; 1439 u32 *hpd; 1440 size_t mec_hpd_size; 1441 1442 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1443 1444 /* take ownership of the relevant compute queues */ 1445 amdgpu_gfx_compute_queue_acquire(adev); 1446 1447 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1448 1449 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1450 AMDGPU_GEM_DOMAIN_GTT, 1451 &adev->gfx.mec.hpd_eop_obj, 1452 &adev->gfx.mec.hpd_eop_gpu_addr, 1453 (void **)&hpd); 1454 if (r) { 1455 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1456 return r; 1457 } 1458 1459 memset(hpd, 0, mec_hpd_size); 1460 1461 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1462 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1463 1464 return 0; 1465 } 1466 1467 static const u32 vgpr_init_compute_shader[] = 1468 { 1469 0x7e000209, 0x7e020208, 1470 0x7e040207, 0x7e060206, 1471 0x7e080205, 0x7e0a0204, 1472 0x7e0c0203, 0x7e0e0202, 1473 0x7e100201, 0x7e120200, 1474 0x7e140209, 0x7e160208, 1475 0x7e180207, 0x7e1a0206, 1476 0x7e1c0205, 0x7e1e0204, 1477 0x7e200203, 0x7e220202, 1478 0x7e240201, 0x7e260200, 1479 0x7e280209, 0x7e2a0208, 1480 0x7e2c0207, 0x7e2e0206, 1481 0x7e300205, 0x7e320204, 1482 0x7e340203, 0x7e360202, 1483 0x7e380201, 0x7e3a0200, 1484 0x7e3c0209, 0x7e3e0208, 1485 0x7e400207, 0x7e420206, 1486 0x7e440205, 0x7e460204, 1487 0x7e480203, 0x7e4a0202, 1488 0x7e4c0201, 0x7e4e0200, 1489 0x7e500209, 0x7e520208, 1490 0x7e540207, 0x7e560206, 1491 0x7e580205, 0x7e5a0204, 1492 0x7e5c0203, 0x7e5e0202, 1493 0x7e600201, 0x7e620200, 1494 0x7e640209, 0x7e660208, 1495 0x7e680207, 0x7e6a0206, 1496 0x7e6c0205, 0x7e6e0204, 1497 0x7e700203, 0x7e720202, 1498 0x7e740201, 0x7e760200, 1499 0x7e780209, 0x7e7a0208, 1500 0x7e7c0207, 0x7e7e0206, 1501 0xbf8a0000, 0xbf810000, 1502 }; 1503 1504 static const u32 sgpr_init_compute_shader[] = 1505 { 1506 0xbe8a0100, 0xbe8c0102, 1507 0xbe8e0104, 0xbe900106, 1508 0xbe920108, 0xbe940100, 1509 0xbe960102, 0xbe980104, 1510 0xbe9a0106, 0xbe9c0108, 1511 0xbe9e0100, 0xbea00102, 1512 0xbea20104, 0xbea40106, 1513 0xbea60108, 0xbea80100, 1514 0xbeaa0102, 0xbeac0104, 1515 0xbeae0106, 0xbeb00108, 1516 0xbeb20100, 0xbeb40102, 1517 0xbeb60104, 0xbeb80106, 1518 0xbeba0108, 0xbebc0100, 1519 0xbebe0102, 0xbec00104, 1520 0xbec20106, 0xbec40108, 1521 0xbec60100, 0xbec80102, 1522 0xbee60004, 0xbee70005, 1523 0xbeea0006, 0xbeeb0007, 1524 0xbee80008, 0xbee90009, 1525 0xbefc0000, 0xbf8a0000, 1526 0xbf810000, 0x00000000, 1527 }; 1528 1529 static const u32 vgpr_init_regs[] = 1530 { 1531 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1532 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1533 mmCOMPUTE_NUM_THREAD_X, 256*4, 1534 mmCOMPUTE_NUM_THREAD_Y, 1, 1535 mmCOMPUTE_NUM_THREAD_Z, 1, 1536 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1537 mmCOMPUTE_PGM_RSRC2, 20, 1538 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1539 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1540 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1541 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1542 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1543 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1544 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1545 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1546 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1547 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1548 }; 1549 1550 static const u32 sgpr1_init_regs[] = 1551 { 1552 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1553 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1554 mmCOMPUTE_NUM_THREAD_X, 256*5, 1555 mmCOMPUTE_NUM_THREAD_Y, 1, 1556 mmCOMPUTE_NUM_THREAD_Z, 1, 1557 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1558 mmCOMPUTE_PGM_RSRC2, 20, 1559 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1560 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1561 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1562 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1563 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1564 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1565 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1566 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1567 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1568 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1569 }; 1570 1571 static const u32 sgpr2_init_regs[] = 1572 { 1573 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1574 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1575 mmCOMPUTE_NUM_THREAD_X, 256*5, 1576 mmCOMPUTE_NUM_THREAD_Y, 1, 1577 mmCOMPUTE_NUM_THREAD_Z, 1, 1578 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1579 mmCOMPUTE_PGM_RSRC2, 20, 1580 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1581 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1582 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1583 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1584 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1585 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1586 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1587 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1588 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1589 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1590 }; 1591 1592 static const u32 sec_ded_counter_registers[] = 1593 { 1594 mmCPC_EDC_ATC_CNT, 1595 mmCPC_EDC_SCRATCH_CNT, 1596 mmCPC_EDC_UCODE_CNT, 1597 mmCPF_EDC_ATC_CNT, 1598 mmCPF_EDC_ROQ_CNT, 1599 mmCPF_EDC_TAG_CNT, 1600 mmCPG_EDC_ATC_CNT, 1601 mmCPG_EDC_DMA_CNT, 1602 mmCPG_EDC_TAG_CNT, 1603 mmDC_EDC_CSINVOC_CNT, 1604 mmDC_EDC_RESTORE_CNT, 1605 mmDC_EDC_STATE_CNT, 1606 mmGDS_EDC_CNT, 1607 mmGDS_EDC_GRBM_CNT, 1608 mmGDS_EDC_OA_DED, 1609 mmSPI_EDC_CNT, 1610 mmSQC_ATC_EDC_GATCL1_CNT, 1611 mmSQC_EDC_CNT, 1612 mmSQ_EDC_DED_CNT, 1613 mmSQ_EDC_INFO, 1614 mmSQ_EDC_SEC_CNT, 1615 mmTCC_EDC_CNT, 1616 mmTCP_ATC_EDC_GATCL1_CNT, 1617 mmTCP_EDC_CNT, 1618 mmTD_EDC_CNT 1619 }; 1620 1621 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1622 { 1623 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1624 struct amdgpu_ib ib; 1625 struct dma_fence *f = NULL; 1626 int r, i; 1627 u32 tmp; 1628 unsigned total_size, vgpr_offset, sgpr_offset; 1629 u64 gpu_addr; 1630 1631 /* only supported on CZ */ 1632 if (adev->asic_type != CHIP_CARRIZO) 1633 return 0; 1634 1635 /* bail if the compute ring is not ready */ 1636 if (!ring->ready) 1637 return 0; 1638 1639 tmp = RREG32(mmGB_EDC_MODE); 1640 WREG32(mmGB_EDC_MODE, 0); 1641 1642 total_size = 1643 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1644 total_size += 1645 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1646 total_size += 1647 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1648 total_size = ALIGN(total_size, 256); 1649 vgpr_offset = total_size; 1650 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1651 sgpr_offset = total_size; 1652 total_size += sizeof(sgpr_init_compute_shader); 1653 1654 /* allocate an indirect buffer to put the commands in */ 1655 memset(&ib, 0, sizeof(ib)); 1656 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1657 if (r) { 1658 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1659 return r; 1660 } 1661 1662 /* load the compute shaders */ 1663 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1664 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1665 1666 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1667 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1668 1669 /* init the ib length to 0 */ 1670 ib.length_dw = 0; 1671 1672 /* VGPR */ 1673 /* write the register state for the compute dispatch */ 1674 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1675 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1677 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1678 } 1679 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1680 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1681 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1682 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1683 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1684 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1685 1686 /* write dispatch packet */ 1687 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1688 ib.ptr[ib.length_dw++] = 8; /* x */ 1689 ib.ptr[ib.length_dw++] = 1; /* y */ 1690 ib.ptr[ib.length_dw++] = 1; /* z */ 1691 ib.ptr[ib.length_dw++] = 1692 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1693 1694 /* write CS partial flush packet */ 1695 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1696 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1697 1698 /* SGPR1 */ 1699 /* write the register state for the compute dispatch */ 1700 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1701 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1703 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1704 } 1705 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1706 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1708 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1709 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1710 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1711 1712 /* write dispatch packet */ 1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1714 ib.ptr[ib.length_dw++] = 8; /* x */ 1715 ib.ptr[ib.length_dw++] = 1; /* y */ 1716 ib.ptr[ib.length_dw++] = 1; /* z */ 1717 ib.ptr[ib.length_dw++] = 1718 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1719 1720 /* write CS partial flush packet */ 1721 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1722 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1723 1724 /* SGPR2 */ 1725 /* write the register state for the compute dispatch */ 1726 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1729 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1730 } 1731 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1732 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1734 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1735 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1736 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1737 1738 /* write dispatch packet */ 1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1740 ib.ptr[ib.length_dw++] = 8; /* x */ 1741 ib.ptr[ib.length_dw++] = 1; /* y */ 1742 ib.ptr[ib.length_dw++] = 1; /* z */ 1743 ib.ptr[ib.length_dw++] = 1744 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1745 1746 /* write CS partial flush packet */ 1747 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1748 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1749 1750 /* shedule the ib on the ring */ 1751 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1752 if (r) { 1753 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1754 goto fail; 1755 } 1756 1757 /* wait for the GPU to finish processing the IB */ 1758 r = dma_fence_wait(f, false); 1759 if (r) { 1760 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1761 goto fail; 1762 } 1763 1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1765 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1766 WREG32(mmGB_EDC_MODE, tmp); 1767 1768 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1769 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1770 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1771 1772 1773 /* read back registers to clear the counters */ 1774 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1775 RREG32(sec_ded_counter_registers[i]); 1776 1777 fail: 1778 amdgpu_ib_free(adev, &ib, NULL); 1779 dma_fence_put(f); 1780 1781 return r; 1782 } 1783 1784 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1785 { 1786 u32 gb_addr_config; 1787 u32 mc_shared_chmap, mc_arb_ramcfg; 1788 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1789 u32 tmp; 1790 int ret; 1791 1792 switch (adev->asic_type) { 1793 case CHIP_TOPAZ: 1794 adev->gfx.config.max_shader_engines = 1; 1795 adev->gfx.config.max_tile_pipes = 2; 1796 adev->gfx.config.max_cu_per_sh = 6; 1797 adev->gfx.config.max_sh_per_se = 1; 1798 adev->gfx.config.max_backends_per_se = 2; 1799 adev->gfx.config.max_texture_channel_caches = 2; 1800 adev->gfx.config.max_gprs = 256; 1801 adev->gfx.config.max_gs_threads = 32; 1802 adev->gfx.config.max_hw_contexts = 8; 1803 1804 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1805 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1806 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1807 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1808 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1809 break; 1810 case CHIP_FIJI: 1811 adev->gfx.config.max_shader_engines = 4; 1812 adev->gfx.config.max_tile_pipes = 16; 1813 adev->gfx.config.max_cu_per_sh = 16; 1814 adev->gfx.config.max_sh_per_se = 1; 1815 adev->gfx.config.max_backends_per_se = 4; 1816 adev->gfx.config.max_texture_channel_caches = 16; 1817 adev->gfx.config.max_gprs = 256; 1818 adev->gfx.config.max_gs_threads = 32; 1819 adev->gfx.config.max_hw_contexts = 8; 1820 1821 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1822 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1823 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1824 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1825 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1826 break; 1827 case CHIP_POLARIS11: 1828 case CHIP_POLARIS12: 1829 ret = amdgpu_atombios_get_gfx_info(adev); 1830 if (ret) 1831 return ret; 1832 adev->gfx.config.max_gprs = 256; 1833 adev->gfx.config.max_gs_threads = 32; 1834 adev->gfx.config.max_hw_contexts = 8; 1835 1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1840 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1841 break; 1842 case CHIP_POLARIS10: 1843 case CHIP_VEGAM: 1844 ret = amdgpu_atombios_get_gfx_info(adev); 1845 if (ret) 1846 return ret; 1847 adev->gfx.config.max_gprs = 256; 1848 adev->gfx.config.max_gs_threads = 32; 1849 adev->gfx.config.max_hw_contexts = 8; 1850 1851 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1852 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1853 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1854 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1855 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1856 break; 1857 case CHIP_TONGA: 1858 adev->gfx.config.max_shader_engines = 4; 1859 adev->gfx.config.max_tile_pipes = 8; 1860 adev->gfx.config.max_cu_per_sh = 8; 1861 adev->gfx.config.max_sh_per_se = 1; 1862 adev->gfx.config.max_backends_per_se = 2; 1863 adev->gfx.config.max_texture_channel_caches = 8; 1864 adev->gfx.config.max_gprs = 256; 1865 adev->gfx.config.max_gs_threads = 32; 1866 adev->gfx.config.max_hw_contexts = 8; 1867 1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1872 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1873 break; 1874 case CHIP_CARRIZO: 1875 adev->gfx.config.max_shader_engines = 1; 1876 adev->gfx.config.max_tile_pipes = 2; 1877 adev->gfx.config.max_sh_per_se = 1; 1878 adev->gfx.config.max_backends_per_se = 2; 1879 adev->gfx.config.max_cu_per_sh = 8; 1880 adev->gfx.config.max_texture_channel_caches = 2; 1881 adev->gfx.config.max_gprs = 256; 1882 adev->gfx.config.max_gs_threads = 32; 1883 adev->gfx.config.max_hw_contexts = 8; 1884 1885 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1886 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1887 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1888 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1889 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1890 break; 1891 case CHIP_STONEY: 1892 adev->gfx.config.max_shader_engines = 1; 1893 adev->gfx.config.max_tile_pipes = 2; 1894 adev->gfx.config.max_sh_per_se = 1; 1895 adev->gfx.config.max_backends_per_se = 1; 1896 adev->gfx.config.max_cu_per_sh = 3; 1897 adev->gfx.config.max_texture_channel_caches = 2; 1898 adev->gfx.config.max_gprs = 256; 1899 adev->gfx.config.max_gs_threads = 16; 1900 adev->gfx.config.max_hw_contexts = 8; 1901 1902 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1903 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1904 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1905 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1906 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1907 break; 1908 default: 1909 adev->gfx.config.max_shader_engines = 2; 1910 adev->gfx.config.max_tile_pipes = 4; 1911 adev->gfx.config.max_cu_per_sh = 2; 1912 adev->gfx.config.max_sh_per_se = 1; 1913 adev->gfx.config.max_backends_per_se = 2; 1914 adev->gfx.config.max_texture_channel_caches = 4; 1915 adev->gfx.config.max_gprs = 256; 1916 adev->gfx.config.max_gs_threads = 32; 1917 adev->gfx.config.max_hw_contexts = 8; 1918 1919 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1920 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1921 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1922 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1923 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1924 break; 1925 } 1926 1927 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1928 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1929 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1930 1931 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1932 adev->gfx.config.mem_max_burst_length_bytes = 256; 1933 if (adev->flags & AMD_IS_APU) { 1934 /* Get memory bank mapping mode. */ 1935 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1936 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1937 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1938 1939 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1940 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1941 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1942 1943 /* Validate settings in case only one DIMM installed. */ 1944 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1945 dimm00_addr_map = 0; 1946 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1947 dimm01_addr_map = 0; 1948 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1949 dimm10_addr_map = 0; 1950 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1951 dimm11_addr_map = 0; 1952 1953 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1954 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1955 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1956 adev->gfx.config.mem_row_size_in_kb = 2; 1957 else 1958 adev->gfx.config.mem_row_size_in_kb = 1; 1959 } else { 1960 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1961 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1962 if (adev->gfx.config.mem_row_size_in_kb > 4) 1963 adev->gfx.config.mem_row_size_in_kb = 4; 1964 } 1965 1966 adev->gfx.config.shader_engine_tile_size = 32; 1967 adev->gfx.config.num_gpus = 1; 1968 adev->gfx.config.multi_gpu_tile_size = 64; 1969 1970 /* fix up row size */ 1971 switch (adev->gfx.config.mem_row_size_in_kb) { 1972 case 1: 1973 default: 1974 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1975 break; 1976 case 2: 1977 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1978 break; 1979 case 4: 1980 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1981 break; 1982 } 1983 adev->gfx.config.gb_addr_config = gb_addr_config; 1984 1985 return 0; 1986 } 1987 1988 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1989 int mec, int pipe, int queue) 1990 { 1991 int r; 1992 unsigned irq_type; 1993 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1994 1995 ring = &adev->gfx.compute_ring[ring_id]; 1996 1997 /* mec0 is me1 */ 1998 ring->me = mec + 1; 1999 ring->pipe = pipe; 2000 ring->queue = queue; 2001 2002 ring->ring_obj = NULL; 2003 ring->use_doorbell = true; 2004 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2005 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2006 + (ring_id * GFX8_MEC_HPD_SIZE); 2007 ksprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2008 2009 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2010 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2011 + ring->pipe; 2012 2013 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2014 r = amdgpu_ring_init(adev, ring, 1024, 2015 &adev->gfx.eop_irq, irq_type); 2016 if (r) 2017 return r; 2018 2019 2020 return 0; 2021 } 2022 2023 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2024 2025 static int gfx_v8_0_sw_init(void *handle) 2026 { 2027 int i, j, k, r, ring_id; 2028 struct amdgpu_ring *ring; 2029 struct amdgpu_kiq *kiq; 2030 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2031 2032 switch (adev->asic_type) { 2033 case CHIP_TONGA: 2034 case CHIP_CARRIZO: 2035 case CHIP_FIJI: 2036 case CHIP_POLARIS10: 2037 case CHIP_POLARIS11: 2038 case CHIP_POLARIS12: 2039 case CHIP_VEGAM: 2040 adev->gfx.mec.num_mec = 2; 2041 break; 2042 case CHIP_TOPAZ: 2043 case CHIP_STONEY: 2044 default: 2045 adev->gfx.mec.num_mec = 1; 2046 break; 2047 } 2048 2049 adev->gfx.mec.num_pipe_per_mec = 4; 2050 adev->gfx.mec.num_queue_per_pipe = 8; 2051 2052 /* KIQ event */ 2053 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); 2054 if (r) 2055 return r; 2056 2057 /* EOP Event */ 2058 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 2059 if (r) 2060 return r; 2061 2062 /* Privileged reg */ 2063 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 2064 &adev->gfx.priv_reg_irq); 2065 if (r) 2066 return r; 2067 2068 /* Privileged inst */ 2069 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 2070 &adev->gfx.priv_inst_irq); 2071 if (r) 2072 return r; 2073 2074 /* Add CP EDC/ECC irq */ 2075 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 2076 &adev->gfx.cp_ecc_error_irq); 2077 if (r) 2078 return r; 2079 2080 /* SQ interrupts. */ 2081 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 2082 &adev->gfx.sq_irq); 2083 if (r) { 2084 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2085 return r; 2086 } 2087 2088 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2089 2090 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2091 2092 gfx_v8_0_scratch_init(adev); 2093 2094 r = gfx_v8_0_init_microcode(adev); 2095 if (r) { 2096 DRM_ERROR("Failed to load gfx firmware!\n"); 2097 return r; 2098 } 2099 2100 r = gfx_v8_0_rlc_init(adev); 2101 if (r) { 2102 DRM_ERROR("Failed to init rlc BOs!\n"); 2103 return r; 2104 } 2105 2106 r = gfx_v8_0_mec_init(adev); 2107 if (r) { 2108 DRM_ERROR("Failed to init MEC BOs!\n"); 2109 return r; 2110 } 2111 2112 /* set up the gfx ring */ 2113 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2114 ring = &adev->gfx.gfx_ring[i]; 2115 ring->ring_obj = NULL; 2116 ksprintf(ring->name, "gfx"); 2117 /* no gfx doorbells on iceland */ 2118 if (adev->asic_type != CHIP_TOPAZ) { 2119 ring->use_doorbell = true; 2120 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2121 } 2122 2123 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2124 AMDGPU_CP_IRQ_GFX_EOP); 2125 if (r) 2126 return r; 2127 } 2128 2129 2130 /* set up the compute queues - allocate horizontally across pipes */ 2131 ring_id = 0; 2132 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2133 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2134 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2135 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2136 continue; 2137 2138 r = gfx_v8_0_compute_ring_init(adev, 2139 ring_id, 2140 i, k, j); 2141 if (r) 2142 return r; 2143 2144 ring_id++; 2145 } 2146 } 2147 } 2148 2149 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2150 if (r) { 2151 DRM_ERROR("Failed to init KIQ BOs!\n"); 2152 return r; 2153 } 2154 2155 kiq = &adev->gfx.kiq; 2156 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2157 if (r) 2158 return r; 2159 2160 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2161 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2162 if (r) 2163 return r; 2164 2165 /* reserve GDS, GWS and OA resource for gfx */ 2166 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2167 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2168 &adev->gds.gds_gfx_bo, NULL, NULL); 2169 if (r) 2170 return r; 2171 2172 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2173 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2174 &adev->gds.gws_gfx_bo, NULL, NULL); 2175 if (r) 2176 return r; 2177 2178 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2179 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2180 &adev->gds.oa_gfx_bo, NULL, NULL); 2181 if (r) 2182 return r; 2183 2184 adev->gfx.ce_ram_size = 0x8000; 2185 2186 r = gfx_v8_0_gpu_early_init(adev); 2187 if (r) 2188 return r; 2189 2190 return 0; 2191 } 2192 2193 static int gfx_v8_0_sw_fini(void *handle) 2194 { 2195 int i; 2196 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2197 2198 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2199 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2200 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2201 2202 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2203 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2204 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2205 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2206 2207 amdgpu_gfx_compute_mqd_sw_fini(adev); 2208 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2209 amdgpu_gfx_kiq_fini(adev); 2210 2211 gfx_v8_0_mec_fini(adev); 2212 gfx_v8_0_rlc_fini(adev); 2213 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2214 (u64 *)&adev->gfx.rlc.clear_state_gpu_addr, 2215 (void **)&adev->gfx.rlc.cs_ptr); 2216 if ((adev->asic_type == CHIP_CARRIZO) || 2217 (adev->asic_type == CHIP_STONEY)) { 2218 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2219 (u64 *)&adev->gfx.rlc.cp_table_gpu_addr, 2220 (void **)&adev->gfx.rlc.cp_table_ptr); 2221 } 2222 gfx_v8_0_free_microcode(adev); 2223 2224 return 0; 2225 } 2226 2227 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2228 { 2229 uint32_t *modearray, *mod2array; 2230 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2231 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2232 u32 reg_offset; 2233 2234 modearray = adev->gfx.config.tile_mode_array; 2235 mod2array = adev->gfx.config.macrotile_mode_array; 2236 2237 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2238 modearray[reg_offset] = 0; 2239 2240 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2241 mod2array[reg_offset] = 0; 2242 2243 switch (adev->asic_type) { 2244 case CHIP_TOPAZ: 2245 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2246 PIPE_CONFIG(ADDR_SURF_P2) | 2247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2249 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2250 PIPE_CONFIG(ADDR_SURF_P2) | 2251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2253 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2254 PIPE_CONFIG(ADDR_SURF_P2) | 2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2257 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2258 PIPE_CONFIG(ADDR_SURF_P2) | 2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2261 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2262 PIPE_CONFIG(ADDR_SURF_P2) | 2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2265 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2266 PIPE_CONFIG(ADDR_SURF_P2) | 2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2269 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2270 PIPE_CONFIG(ADDR_SURF_P2) | 2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2273 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2274 PIPE_CONFIG(ADDR_SURF_P2)); 2275 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2279 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2283 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2287 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2288 PIPE_CONFIG(ADDR_SURF_P2) | 2289 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2291 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2292 PIPE_CONFIG(ADDR_SURF_P2) | 2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2295 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2296 PIPE_CONFIG(ADDR_SURF_P2) | 2297 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2299 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2300 PIPE_CONFIG(ADDR_SURF_P2) | 2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2303 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2304 PIPE_CONFIG(ADDR_SURF_P2) | 2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2307 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2308 PIPE_CONFIG(ADDR_SURF_P2) | 2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2311 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2312 PIPE_CONFIG(ADDR_SURF_P2) | 2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2315 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2316 PIPE_CONFIG(ADDR_SURF_P2) | 2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2319 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2320 PIPE_CONFIG(ADDR_SURF_P2) | 2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2323 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2324 PIPE_CONFIG(ADDR_SURF_P2) | 2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2327 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2328 PIPE_CONFIG(ADDR_SURF_P2) | 2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2331 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2332 PIPE_CONFIG(ADDR_SURF_P2) | 2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2335 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2336 PIPE_CONFIG(ADDR_SURF_P2) | 2337 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2339 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2340 PIPE_CONFIG(ADDR_SURF_P2) | 2341 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2343 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2344 PIPE_CONFIG(ADDR_SURF_P2) | 2345 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2347 2348 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2351 NUM_BANKS(ADDR_SURF_8_BANK)); 2352 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2355 NUM_BANKS(ADDR_SURF_8_BANK)); 2356 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2359 NUM_BANKS(ADDR_SURF_8_BANK)); 2360 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2363 NUM_BANKS(ADDR_SURF_8_BANK)); 2364 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2367 NUM_BANKS(ADDR_SURF_8_BANK)); 2368 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2371 NUM_BANKS(ADDR_SURF_8_BANK)); 2372 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2375 NUM_BANKS(ADDR_SURF_8_BANK)); 2376 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2379 NUM_BANKS(ADDR_SURF_16_BANK)); 2380 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2383 NUM_BANKS(ADDR_SURF_16_BANK)); 2384 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2387 NUM_BANKS(ADDR_SURF_16_BANK)); 2388 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2391 NUM_BANKS(ADDR_SURF_16_BANK)); 2392 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2395 NUM_BANKS(ADDR_SURF_16_BANK)); 2396 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2399 NUM_BANKS(ADDR_SURF_16_BANK)); 2400 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2403 NUM_BANKS(ADDR_SURF_8_BANK)); 2404 2405 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2406 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2407 reg_offset != 23) 2408 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2409 2410 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2411 if (reg_offset != 7) 2412 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2413 2414 break; 2415 case CHIP_FIJI: 2416 case CHIP_VEGAM: 2417 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2421 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2425 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2426 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2428 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2429 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2431 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2432 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2433 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2434 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2435 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2436 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2437 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2439 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2440 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2441 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2442 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2443 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2444 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2445 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2446 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2447 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2448 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2449 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2450 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2451 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2452 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2455 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2456 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2459 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2460 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2463 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2464 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2467 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2468 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2471 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2472 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2475 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2476 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2479 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2480 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2483 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2484 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2487 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2488 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2491 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2492 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2495 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2496 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2499 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2500 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2501 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2503 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2504 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2505 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2507 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2508 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2509 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2511 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2512 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2513 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2515 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2516 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2517 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2519 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2520 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2521 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2522 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2523 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2524 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2525 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2526 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2527 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2528 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2529 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2530 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2531 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2532 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2533 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2534 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2535 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2536 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2537 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2538 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2539 2540 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2543 NUM_BANKS(ADDR_SURF_8_BANK)); 2544 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2547 NUM_BANKS(ADDR_SURF_8_BANK)); 2548 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2551 NUM_BANKS(ADDR_SURF_8_BANK)); 2552 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2555 NUM_BANKS(ADDR_SURF_8_BANK)); 2556 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2559 NUM_BANKS(ADDR_SURF_8_BANK)); 2560 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2563 NUM_BANKS(ADDR_SURF_8_BANK)); 2564 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2567 NUM_BANKS(ADDR_SURF_8_BANK)); 2568 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2571 NUM_BANKS(ADDR_SURF_8_BANK)); 2572 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2575 NUM_BANKS(ADDR_SURF_8_BANK)); 2576 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2577 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2578 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2579 NUM_BANKS(ADDR_SURF_8_BANK)); 2580 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2581 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2582 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2583 NUM_BANKS(ADDR_SURF_8_BANK)); 2584 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2585 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2586 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2587 NUM_BANKS(ADDR_SURF_8_BANK)); 2588 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2589 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2590 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2591 NUM_BANKS(ADDR_SURF_8_BANK)); 2592 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2593 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2594 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2595 NUM_BANKS(ADDR_SURF_4_BANK)); 2596 2597 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2598 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2599 2600 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2601 if (reg_offset != 7) 2602 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2603 2604 break; 2605 case CHIP_TONGA: 2606 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2610 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2612 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2614 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2615 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2616 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2617 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2618 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2620 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2621 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2622 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2623 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2624 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2625 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2626 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2627 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2628 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2629 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2630 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2631 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2632 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2633 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2634 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2635 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2636 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2637 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2638 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2639 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2640 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2644 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2648 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2649 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2652 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2656 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2657 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2660 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2661 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2664 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2665 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2668 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2669 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2670 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2672 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2673 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2674 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2676 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2677 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2678 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2680 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2681 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2682 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2684 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2685 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2688 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2689 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2692 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2693 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2696 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2700 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2701 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2702 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2704 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2705 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2706 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2708 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2709 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2710 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2712 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2713 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2714 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2716 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2717 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2718 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2719 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2720 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2721 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2722 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2724 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2725 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2726 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2727 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2728 2729 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2730 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2731 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2732 NUM_BANKS(ADDR_SURF_16_BANK)); 2733 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2736 NUM_BANKS(ADDR_SURF_16_BANK)); 2737 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2740 NUM_BANKS(ADDR_SURF_16_BANK)); 2741 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2744 NUM_BANKS(ADDR_SURF_16_BANK)); 2745 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2748 NUM_BANKS(ADDR_SURF_16_BANK)); 2749 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2752 NUM_BANKS(ADDR_SURF_16_BANK)); 2753 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2756 NUM_BANKS(ADDR_SURF_16_BANK)); 2757 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2760 NUM_BANKS(ADDR_SURF_16_BANK)); 2761 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2764 NUM_BANKS(ADDR_SURF_16_BANK)); 2765 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2768 NUM_BANKS(ADDR_SURF_16_BANK)); 2769 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2770 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2771 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2772 NUM_BANKS(ADDR_SURF_16_BANK)); 2773 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2776 NUM_BANKS(ADDR_SURF_8_BANK)); 2777 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2780 NUM_BANKS(ADDR_SURF_4_BANK)); 2781 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2784 NUM_BANKS(ADDR_SURF_4_BANK)); 2785 2786 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2787 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2788 2789 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2790 if (reg_offset != 7) 2791 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2792 2793 break; 2794 case CHIP_POLARIS11: 2795 case CHIP_POLARIS12: 2796 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2800 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2802 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2804 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2808 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2809 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2810 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2811 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2812 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2814 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2815 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2816 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2818 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2819 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2820 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2821 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2822 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2823 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2824 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2825 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2826 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2827 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2828 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2829 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2830 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2834 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2838 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2839 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2842 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2843 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2846 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2847 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2850 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2854 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2855 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2858 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2862 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2866 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2867 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2868 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2870 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2874 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2878 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2880 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2882 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2883 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2884 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2886 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2888 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2890 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2891 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2892 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2894 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2895 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2896 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2898 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2899 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2900 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2902 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2903 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2904 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2905 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2906 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2907 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2908 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2909 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2910 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2911 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2912 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2914 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2915 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2916 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2917 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2918 2919 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2922 NUM_BANKS(ADDR_SURF_16_BANK)); 2923 2924 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2927 NUM_BANKS(ADDR_SURF_16_BANK)); 2928 2929 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2930 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2931 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2932 NUM_BANKS(ADDR_SURF_16_BANK)); 2933 2934 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2937 NUM_BANKS(ADDR_SURF_16_BANK)); 2938 2939 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2942 NUM_BANKS(ADDR_SURF_16_BANK)); 2943 2944 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2947 NUM_BANKS(ADDR_SURF_16_BANK)); 2948 2949 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2952 NUM_BANKS(ADDR_SURF_16_BANK)); 2953 2954 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2957 NUM_BANKS(ADDR_SURF_16_BANK)); 2958 2959 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2962 NUM_BANKS(ADDR_SURF_16_BANK)); 2963 2964 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2967 NUM_BANKS(ADDR_SURF_16_BANK)); 2968 2969 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2972 NUM_BANKS(ADDR_SURF_16_BANK)); 2973 2974 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2977 NUM_BANKS(ADDR_SURF_16_BANK)); 2978 2979 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2982 NUM_BANKS(ADDR_SURF_8_BANK)); 2983 2984 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2987 NUM_BANKS(ADDR_SURF_4_BANK)); 2988 2989 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2990 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2991 2992 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2993 if (reg_offset != 7) 2994 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2995 2996 break; 2997 case CHIP_POLARIS10: 2998 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3002 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3006 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3007 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3009 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3010 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3014 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3015 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3016 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3017 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3018 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3020 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3021 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3022 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3023 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3024 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3025 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3026 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3027 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3030 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3031 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3032 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3036 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3040 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3044 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3045 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3048 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3049 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3052 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3056 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3058 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3060 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3061 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3062 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3064 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3065 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3066 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3068 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3069 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3070 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3072 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3074 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3076 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3078 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3080 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3082 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3084 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3086 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3088 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3089 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3090 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3092 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3093 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3094 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3096 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3097 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3098 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3100 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3101 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3102 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3104 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3105 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3106 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3108 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3109 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3110 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3112 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3113 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3114 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3116 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3117 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3118 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3120 3121 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3122 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3123 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3124 NUM_BANKS(ADDR_SURF_16_BANK)); 3125 3126 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3129 NUM_BANKS(ADDR_SURF_16_BANK)); 3130 3131 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3134 NUM_BANKS(ADDR_SURF_16_BANK)); 3135 3136 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3139 NUM_BANKS(ADDR_SURF_16_BANK)); 3140 3141 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3144 NUM_BANKS(ADDR_SURF_16_BANK)); 3145 3146 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3149 NUM_BANKS(ADDR_SURF_16_BANK)); 3150 3151 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3154 NUM_BANKS(ADDR_SURF_16_BANK)); 3155 3156 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3159 NUM_BANKS(ADDR_SURF_16_BANK)); 3160 3161 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3164 NUM_BANKS(ADDR_SURF_16_BANK)); 3165 3166 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3169 NUM_BANKS(ADDR_SURF_16_BANK)); 3170 3171 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3174 NUM_BANKS(ADDR_SURF_16_BANK)); 3175 3176 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3179 NUM_BANKS(ADDR_SURF_8_BANK)); 3180 3181 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3184 NUM_BANKS(ADDR_SURF_4_BANK)); 3185 3186 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3189 NUM_BANKS(ADDR_SURF_4_BANK)); 3190 3191 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3192 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3193 3194 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3195 if (reg_offset != 7) 3196 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3197 3198 break; 3199 case CHIP_STONEY: 3200 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3204 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3205 PIPE_CONFIG(ADDR_SURF_P2) | 3206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3208 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3209 PIPE_CONFIG(ADDR_SURF_P2) | 3210 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3212 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3213 PIPE_CONFIG(ADDR_SURF_P2) | 3214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3216 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3217 PIPE_CONFIG(ADDR_SURF_P2) | 3218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3220 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3221 PIPE_CONFIG(ADDR_SURF_P2) | 3222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3224 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3225 PIPE_CONFIG(ADDR_SURF_P2) | 3226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3228 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3229 PIPE_CONFIG(ADDR_SURF_P2)); 3230 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3231 PIPE_CONFIG(ADDR_SURF_P2) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3234 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3235 PIPE_CONFIG(ADDR_SURF_P2) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3238 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3239 PIPE_CONFIG(ADDR_SURF_P2) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3242 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3243 PIPE_CONFIG(ADDR_SURF_P2) | 3244 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3246 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3247 PIPE_CONFIG(ADDR_SURF_P2) | 3248 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3250 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3251 PIPE_CONFIG(ADDR_SURF_P2) | 3252 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3254 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3255 PIPE_CONFIG(ADDR_SURF_P2) | 3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3258 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3259 PIPE_CONFIG(ADDR_SURF_P2) | 3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3262 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3263 PIPE_CONFIG(ADDR_SURF_P2) | 3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3266 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3267 PIPE_CONFIG(ADDR_SURF_P2) | 3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3270 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3271 PIPE_CONFIG(ADDR_SURF_P2) | 3272 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3274 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3275 PIPE_CONFIG(ADDR_SURF_P2) | 3276 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3278 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3279 PIPE_CONFIG(ADDR_SURF_P2) | 3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3282 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3283 PIPE_CONFIG(ADDR_SURF_P2) | 3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3286 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3287 PIPE_CONFIG(ADDR_SURF_P2) | 3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3290 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3291 PIPE_CONFIG(ADDR_SURF_P2) | 3292 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3294 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3295 PIPE_CONFIG(ADDR_SURF_P2) | 3296 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3298 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3299 PIPE_CONFIG(ADDR_SURF_P2) | 3300 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3302 3303 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3306 NUM_BANKS(ADDR_SURF_8_BANK)); 3307 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3310 NUM_BANKS(ADDR_SURF_8_BANK)); 3311 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3314 NUM_BANKS(ADDR_SURF_8_BANK)); 3315 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3318 NUM_BANKS(ADDR_SURF_8_BANK)); 3319 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3322 NUM_BANKS(ADDR_SURF_8_BANK)); 3323 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3326 NUM_BANKS(ADDR_SURF_8_BANK)); 3327 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3330 NUM_BANKS(ADDR_SURF_8_BANK)); 3331 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3334 NUM_BANKS(ADDR_SURF_16_BANK)); 3335 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3338 NUM_BANKS(ADDR_SURF_16_BANK)); 3339 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3342 NUM_BANKS(ADDR_SURF_16_BANK)); 3343 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3346 NUM_BANKS(ADDR_SURF_16_BANK)); 3347 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3350 NUM_BANKS(ADDR_SURF_16_BANK)); 3351 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3354 NUM_BANKS(ADDR_SURF_16_BANK)); 3355 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3358 NUM_BANKS(ADDR_SURF_8_BANK)); 3359 3360 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3361 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3362 reg_offset != 23) 3363 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3364 3365 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3366 if (reg_offset != 7) 3367 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3368 3369 break; 3370 default: 3371 dev_warn(adev->dev, 3372 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3373 adev->asic_type); 3374 3375 case CHIP_CARRIZO: 3376 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3377 PIPE_CONFIG(ADDR_SURF_P2) | 3378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3380 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3381 PIPE_CONFIG(ADDR_SURF_P2) | 3382 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3383 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3384 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3385 PIPE_CONFIG(ADDR_SURF_P2) | 3386 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3387 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3388 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3389 PIPE_CONFIG(ADDR_SURF_P2) | 3390 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3391 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3392 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3393 PIPE_CONFIG(ADDR_SURF_P2) | 3394 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3395 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3396 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3397 PIPE_CONFIG(ADDR_SURF_P2) | 3398 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3399 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3400 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3401 PIPE_CONFIG(ADDR_SURF_P2) | 3402 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3403 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3404 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3405 PIPE_CONFIG(ADDR_SURF_P2)); 3406 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3407 PIPE_CONFIG(ADDR_SURF_P2) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3410 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3411 PIPE_CONFIG(ADDR_SURF_P2) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3414 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3415 PIPE_CONFIG(ADDR_SURF_P2) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3418 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3419 PIPE_CONFIG(ADDR_SURF_P2) | 3420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3422 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3423 PIPE_CONFIG(ADDR_SURF_P2) | 3424 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3426 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3427 PIPE_CONFIG(ADDR_SURF_P2) | 3428 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3430 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3431 PIPE_CONFIG(ADDR_SURF_P2) | 3432 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3434 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3435 PIPE_CONFIG(ADDR_SURF_P2) | 3436 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3438 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3439 PIPE_CONFIG(ADDR_SURF_P2) | 3440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3442 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3443 PIPE_CONFIG(ADDR_SURF_P2) | 3444 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3446 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3447 PIPE_CONFIG(ADDR_SURF_P2) | 3448 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3450 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3451 PIPE_CONFIG(ADDR_SURF_P2) | 3452 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3454 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3455 PIPE_CONFIG(ADDR_SURF_P2) | 3456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3458 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3459 PIPE_CONFIG(ADDR_SURF_P2) | 3460 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3462 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3463 PIPE_CONFIG(ADDR_SURF_P2) | 3464 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3466 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3467 PIPE_CONFIG(ADDR_SURF_P2) | 3468 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3470 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3471 PIPE_CONFIG(ADDR_SURF_P2) | 3472 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3474 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3475 PIPE_CONFIG(ADDR_SURF_P2) | 3476 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3478 3479 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3482 NUM_BANKS(ADDR_SURF_8_BANK)); 3483 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3486 NUM_BANKS(ADDR_SURF_8_BANK)); 3487 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3490 NUM_BANKS(ADDR_SURF_8_BANK)); 3491 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3494 NUM_BANKS(ADDR_SURF_8_BANK)); 3495 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3498 NUM_BANKS(ADDR_SURF_8_BANK)); 3499 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3502 NUM_BANKS(ADDR_SURF_8_BANK)); 3503 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3506 NUM_BANKS(ADDR_SURF_8_BANK)); 3507 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3510 NUM_BANKS(ADDR_SURF_16_BANK)); 3511 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3514 NUM_BANKS(ADDR_SURF_16_BANK)); 3515 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3518 NUM_BANKS(ADDR_SURF_16_BANK)); 3519 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3522 NUM_BANKS(ADDR_SURF_16_BANK)); 3523 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3526 NUM_BANKS(ADDR_SURF_16_BANK)); 3527 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3530 NUM_BANKS(ADDR_SURF_16_BANK)); 3531 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3534 NUM_BANKS(ADDR_SURF_8_BANK)); 3535 3536 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3537 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3538 reg_offset != 23) 3539 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3540 3541 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3542 if (reg_offset != 7) 3543 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3544 3545 break; 3546 } 3547 } 3548 3549 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3550 u32 se_num, u32 sh_num, u32 instance) 3551 { 3552 u32 data; 3553 3554 if (instance == 0xffffffff) 3555 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3556 else 3557 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3558 3559 if (se_num == 0xffffffff) 3560 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3561 else 3562 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3563 3564 if (sh_num == 0xffffffff) 3565 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3566 else 3567 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3568 3569 WREG32(mmGRBM_GFX_INDEX, data); 3570 } 3571 3572 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3573 u32 me, u32 pipe, u32 q) 3574 { 3575 vi_srbm_select(adev, me, pipe, q, 0); 3576 } 3577 3578 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3579 { 3580 u32 data, mask; 3581 3582 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3583 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3584 3585 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3586 3587 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3588 adev->gfx.config.max_sh_per_se); 3589 3590 return (~data) & mask; 3591 } 3592 3593 static void 3594 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3595 { 3596 switch (adev->asic_type) { 3597 case CHIP_FIJI: 3598 case CHIP_VEGAM: 3599 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3600 RB_XSEL2(1) | PKR_MAP(2) | 3601 PKR_XSEL(1) | PKR_YSEL(1) | 3602 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3603 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3604 SE_PAIR_YSEL(2); 3605 break; 3606 case CHIP_TONGA: 3607 case CHIP_POLARIS10: 3608 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3609 SE_XSEL(1) | SE_YSEL(1); 3610 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3611 SE_PAIR_YSEL(2); 3612 break; 3613 case CHIP_TOPAZ: 3614 case CHIP_CARRIZO: 3615 *rconf |= RB_MAP_PKR0(2); 3616 *rconf1 |= 0x0; 3617 break; 3618 case CHIP_POLARIS11: 3619 case CHIP_POLARIS12: 3620 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3621 SE_XSEL(1) | SE_YSEL(1); 3622 *rconf1 |= 0x0; 3623 break; 3624 case CHIP_STONEY: 3625 *rconf |= 0x0; 3626 *rconf1 |= 0x0; 3627 break; 3628 default: 3629 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3630 break; 3631 } 3632 } 3633 3634 static void 3635 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3636 u32 raster_config, u32 raster_config_1, 3637 unsigned rb_mask, unsigned num_rb) 3638 { 3639 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3640 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3641 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3642 unsigned rb_per_se = num_rb / num_se; 3643 unsigned se_mask[4]; 3644 unsigned se; 3645 3646 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3647 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3648 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3649 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3650 3651 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3652 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3653 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3654 3655 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3656 (!se_mask[2] && !se_mask[3]))) { 3657 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3658 3659 if (!se_mask[0] && !se_mask[1]) { 3660 raster_config_1 |= 3661 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3662 } else { 3663 raster_config_1 |= 3664 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3665 } 3666 } 3667 3668 for (se = 0; se < num_se; se++) { 3669 unsigned raster_config_se = raster_config; 3670 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3671 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3672 int idx = (se / 2) * 2; 3673 3674 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3675 raster_config_se &= ~SE_MAP_MASK; 3676 3677 if (!se_mask[idx]) { 3678 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3679 } else { 3680 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3681 } 3682 } 3683 3684 pkr0_mask &= rb_mask; 3685 pkr1_mask &= rb_mask; 3686 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3687 raster_config_se &= ~PKR_MAP_MASK; 3688 3689 if (!pkr0_mask) { 3690 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3691 } else { 3692 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3693 } 3694 } 3695 3696 if (rb_per_se >= 2) { 3697 unsigned rb0_mask = 1 << (se * rb_per_se); 3698 unsigned rb1_mask = rb0_mask << 1; 3699 3700 rb0_mask &= rb_mask; 3701 rb1_mask &= rb_mask; 3702 if (!rb0_mask || !rb1_mask) { 3703 raster_config_se &= ~RB_MAP_PKR0_MASK; 3704 3705 if (!rb0_mask) { 3706 raster_config_se |= 3707 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3708 } else { 3709 raster_config_se |= 3710 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3711 } 3712 } 3713 3714 if (rb_per_se > 2) { 3715 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3716 rb1_mask = rb0_mask << 1; 3717 rb0_mask &= rb_mask; 3718 rb1_mask &= rb_mask; 3719 if (!rb0_mask || !rb1_mask) { 3720 raster_config_se &= ~RB_MAP_PKR1_MASK; 3721 3722 if (!rb0_mask) { 3723 raster_config_se |= 3724 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3725 } else { 3726 raster_config_se |= 3727 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3728 } 3729 } 3730 } 3731 } 3732 3733 /* GRBM_GFX_INDEX has a different offset on VI */ 3734 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3735 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3736 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3737 } 3738 3739 /* GRBM_GFX_INDEX has a different offset on VI */ 3740 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3741 } 3742 3743 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3744 { 3745 int i, j; 3746 u32 data; 3747 u32 raster_config = 0, raster_config_1 = 0; 3748 u32 active_rbs = 0; 3749 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3750 adev->gfx.config.max_sh_per_se; 3751 unsigned num_rb_pipes; 3752 3753 mutex_lock(&adev->grbm_idx_mutex); 3754 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3755 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3756 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3757 data = gfx_v8_0_get_rb_active_bitmap(adev); 3758 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3759 rb_bitmap_width_per_sh); 3760 } 3761 } 3762 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3763 3764 adev->gfx.config.backend_enable_mask = active_rbs; 3765 adev->gfx.config.num_rbs = hweight32(active_rbs); 3766 3767 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3768 adev->gfx.config.max_shader_engines, 16); 3769 3770 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3771 3772 if (!adev->gfx.config.backend_enable_mask || 3773 adev->gfx.config.num_rbs >= num_rb_pipes) { 3774 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3775 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3776 } else { 3777 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3778 adev->gfx.config.backend_enable_mask, 3779 num_rb_pipes); 3780 } 3781 3782 /* cache the values for userspace */ 3783 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3784 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3785 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3786 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3787 RREG32(mmCC_RB_BACKEND_DISABLE); 3788 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3789 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3790 adev->gfx.config.rb_config[i][j].raster_config = 3791 RREG32(mmPA_SC_RASTER_CONFIG); 3792 adev->gfx.config.rb_config[i][j].raster_config_1 = 3793 RREG32(mmPA_SC_RASTER_CONFIG_1); 3794 } 3795 } 3796 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3797 mutex_unlock(&adev->grbm_idx_mutex); 3798 } 3799 3800 /** 3801 * gfx_v8_0_init_compute_vmid - gart enable 3802 * 3803 * @adev: amdgpu_device pointer 3804 * 3805 * Initialize compute vmid sh_mem registers 3806 * 3807 */ 3808 #define DEFAULT_SH_MEM_BASES (0x6000) 3809 #define FIRST_COMPUTE_VMID (8) 3810 #define LAST_COMPUTE_VMID (16) 3811 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3812 { 3813 int i; 3814 uint32_t sh_mem_config; 3815 uint32_t sh_mem_bases; 3816 3817 /* 3818 * Configure apertures: 3819 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3820 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3821 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3822 */ 3823 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3824 3825 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3826 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3827 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3828 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3829 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3830 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3831 3832 mutex_lock(&adev->srbm_mutex); 3833 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3834 vi_srbm_select(adev, 0, 0, 0, i); 3835 /* CP and shaders */ 3836 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3837 WREG32(mmSH_MEM_APE1_BASE, 1); 3838 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3839 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3840 } 3841 vi_srbm_select(adev, 0, 0, 0, 0); 3842 mutex_unlock(&adev->srbm_mutex); 3843 } 3844 3845 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3846 { 3847 switch (adev->asic_type) { 3848 default: 3849 adev->gfx.config.double_offchip_lds_buf = 1; 3850 break; 3851 case CHIP_CARRIZO: 3852 case CHIP_STONEY: 3853 adev->gfx.config.double_offchip_lds_buf = 0; 3854 break; 3855 } 3856 } 3857 3858 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3859 { 3860 u32 tmp, sh_static_mem_cfg; 3861 int i; 3862 3863 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3864 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3865 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3866 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3867 3868 gfx_v8_0_tiling_mode_table_init(adev); 3869 gfx_v8_0_setup_rb(adev); 3870 gfx_v8_0_get_cu_info(adev); 3871 gfx_v8_0_config_init(adev); 3872 3873 /* XXX SH_MEM regs */ 3874 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3875 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3876 SWIZZLE_ENABLE, 1); 3877 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3878 ELEMENT_SIZE, 1); 3879 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3880 INDEX_STRIDE, 3); 3881 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3882 3883 mutex_lock(&adev->srbm_mutex); 3884 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3885 vi_srbm_select(adev, 0, 0, 0, i); 3886 /* CP and shaders */ 3887 if (i == 0) { 3888 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3889 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3890 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3891 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3892 WREG32(mmSH_MEM_CONFIG, tmp); 3893 WREG32(mmSH_MEM_BASES, 0); 3894 } else { 3895 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3896 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3897 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3898 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3899 WREG32(mmSH_MEM_CONFIG, tmp); 3900 tmp = adev->gmc.shared_aperture_start >> 48; 3901 WREG32(mmSH_MEM_BASES, tmp); 3902 } 3903 3904 WREG32(mmSH_MEM_APE1_BASE, 1); 3905 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3906 } 3907 vi_srbm_select(adev, 0, 0, 0, 0); 3908 mutex_unlock(&adev->srbm_mutex); 3909 3910 gfx_v8_0_init_compute_vmid(adev); 3911 3912 mutex_lock(&adev->grbm_idx_mutex); 3913 /* 3914 * making sure that the following register writes will be broadcasted 3915 * to all the shaders 3916 */ 3917 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3918 3919 WREG32(mmPA_SC_FIFO_SIZE, 3920 (adev->gfx.config.sc_prim_fifo_size_frontend << 3921 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3922 (adev->gfx.config.sc_prim_fifo_size_backend << 3923 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3924 (adev->gfx.config.sc_hiz_tile_fifo_size << 3925 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3926 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3927 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3928 3929 tmp = RREG32(mmSPI_ARB_PRIORITY); 3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3931 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3932 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3933 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3934 WREG32(mmSPI_ARB_PRIORITY, tmp); 3935 3936 mutex_unlock(&adev->grbm_idx_mutex); 3937 3938 } 3939 3940 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3941 { 3942 u32 i, j, k; 3943 u32 mask; 3944 3945 mutex_lock(&adev->grbm_idx_mutex); 3946 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3947 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3948 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3949 for (k = 0; k < adev->usec_timeout; k++) { 3950 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3951 break; 3952 udelay(1); 3953 } 3954 if (k == adev->usec_timeout) { 3955 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3956 0xffffffff, 0xffffffff); 3957 mutex_unlock(&adev->grbm_idx_mutex); 3958 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3959 i, j); 3960 return; 3961 } 3962 } 3963 } 3964 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3965 mutex_unlock(&adev->grbm_idx_mutex); 3966 3967 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3968 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3969 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3970 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3971 for (k = 0; k < adev->usec_timeout; k++) { 3972 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3973 break; 3974 udelay(1); 3975 } 3976 } 3977 3978 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3979 bool enable) 3980 { 3981 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3982 3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3986 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3987 3988 WREG32(mmCP_INT_CNTL_RING0, tmp); 3989 } 3990 3991 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3992 { 3993 /* csib */ 3994 WREG32(mmRLC_CSIB_ADDR_HI, 3995 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3996 WREG32(mmRLC_CSIB_ADDR_LO, 3997 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3998 WREG32(mmRLC_CSIB_LENGTH, 3999 adev->gfx.rlc.clear_state_size); 4000 } 4001 4002 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4003 int ind_offset, 4004 int list_size, 4005 int *unique_indices, 4006 int *indices_count, 4007 int max_indices, 4008 int *ind_start_offsets, 4009 int *offset_count, 4010 int max_offset) 4011 { 4012 int indices; 4013 bool new_entry = true; 4014 4015 for (; ind_offset < list_size; ind_offset++) { 4016 4017 if (new_entry) { 4018 new_entry = false; 4019 ind_start_offsets[*offset_count] = ind_offset; 4020 *offset_count = *offset_count + 1; 4021 BUG_ON(*offset_count >= max_offset); 4022 } 4023 4024 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4025 new_entry = true; 4026 continue; 4027 } 4028 4029 ind_offset += 2; 4030 4031 /* look for the matching indice */ 4032 for (indices = 0; 4033 indices < *indices_count; 4034 indices++) { 4035 if (unique_indices[indices] == 4036 register_list_format[ind_offset]) 4037 break; 4038 } 4039 4040 if (indices >= *indices_count) { 4041 unique_indices[*indices_count] = 4042 register_list_format[ind_offset]; 4043 indices = *indices_count; 4044 *indices_count = *indices_count + 1; 4045 BUG_ON(*indices_count >= max_indices); 4046 } 4047 4048 register_list_format[ind_offset] = indices; 4049 } 4050 } 4051 4052 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4053 { 4054 int i, temp, data; 4055 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4056 int indices_count = 0; 4057 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4058 int offset_count = 0; 4059 4060 int list_size; 4061 unsigned int *register_list_format = 4062 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, M_DRM, GFP_KERNEL); 4063 if (!register_list_format) 4064 return -ENOMEM; 4065 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4066 adev->gfx.rlc.reg_list_format_size_bytes); 4067 4068 gfx_v8_0_parse_ind_reg_list(register_list_format, 4069 RLC_FormatDirectRegListLength, 4070 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4071 unique_indices, 4072 &indices_count, 4073 ARRAY_SIZE(unique_indices), 4074 indirect_start_offsets, 4075 &offset_count, 4076 ARRAY_SIZE(indirect_start_offsets)); 4077 4078 /* save and restore list */ 4079 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4080 4081 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4082 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4083 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4084 4085 /* indirect list */ 4086 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4087 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4088 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4089 4090 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4091 list_size = list_size >> 1; 4092 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4093 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4094 4095 /* starting offsets starts */ 4096 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4097 adev->gfx.rlc.starting_offsets_start); 4098 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4099 WREG32(mmRLC_GPM_SCRATCH_DATA, 4100 indirect_start_offsets[i]); 4101 4102 /* unique indices */ 4103 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4104 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4105 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4106 if (unique_indices[i] != 0) { 4107 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4108 WREG32(data + i, unique_indices[i] >> 20); 4109 } 4110 } 4111 kfree(register_list_format); 4112 4113 return 0; 4114 } 4115 4116 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4117 { 4118 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4119 } 4120 4121 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4122 { 4123 uint32_t data; 4124 4125 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4126 4127 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4128 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4129 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4130 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4131 WREG32(mmRLC_PG_DELAY, data); 4132 4133 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4134 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4135 4136 } 4137 4138 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4139 bool enable) 4140 { 4141 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4142 } 4143 4144 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4145 bool enable) 4146 { 4147 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4148 } 4149 4150 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4151 { 4152 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4153 } 4154 4155 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4156 { 4157 if ((adev->asic_type == CHIP_CARRIZO) || 4158 (adev->asic_type == CHIP_STONEY)) { 4159 gfx_v8_0_init_csb(adev); 4160 gfx_v8_0_init_save_restore_list(adev); 4161 gfx_v8_0_enable_save_restore_machine(adev); 4162 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4163 gfx_v8_0_init_power_gating(adev); 4164 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4165 } else if ((adev->asic_type == CHIP_POLARIS11) || 4166 (adev->asic_type == CHIP_POLARIS12) || 4167 (adev->asic_type == CHIP_VEGAM)) { 4168 gfx_v8_0_init_csb(adev); 4169 gfx_v8_0_init_save_restore_list(adev); 4170 gfx_v8_0_enable_save_restore_machine(adev); 4171 gfx_v8_0_init_power_gating(adev); 4172 } 4173 4174 } 4175 4176 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4177 { 4178 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4179 4180 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4181 gfx_v8_0_wait_for_rlc_serdes(adev); 4182 } 4183 4184 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4185 { 4186 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4187 udelay(50); 4188 4189 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4190 udelay(50); 4191 } 4192 4193 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4194 { 4195 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4196 4197 /* carrizo do enable cp interrupt after cp inited */ 4198 if (!(adev->flags & AMD_IS_APU)) 4199 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4200 4201 udelay(50); 4202 } 4203 4204 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4205 { 4206 const struct rlc_firmware_header_v2_0 *hdr; 4207 const __le32 *fw_data; 4208 unsigned i, fw_size; 4209 4210 if (!adev->gfx.rlc_fw) 4211 return -EINVAL; 4212 4213 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4214 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4215 4216 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4217 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4218 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4219 4220 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4221 for (i = 0; i < fw_size; i++) 4222 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4223 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4224 4225 return 0; 4226 } 4227 4228 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4229 { 4230 int r; 4231 u32 tmp; 4232 4233 gfx_v8_0_rlc_stop(adev); 4234 4235 /* disable CG */ 4236 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4237 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4238 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4239 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4240 if (adev->asic_type == CHIP_POLARIS11 || 4241 adev->asic_type == CHIP_POLARIS10 || 4242 adev->asic_type == CHIP_POLARIS12 || 4243 adev->asic_type == CHIP_VEGAM) { 4244 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4245 tmp &= ~0x3; 4246 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4247 } 4248 4249 /* disable PG */ 4250 WREG32(mmRLC_PG_CNTL, 0); 4251 4252 gfx_v8_0_rlc_reset(adev); 4253 gfx_v8_0_init_pg(adev); 4254 4255 4256 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4257 /* legacy rlc firmware loading */ 4258 r = gfx_v8_0_rlc_load_microcode(adev); 4259 if (r) 4260 return r; 4261 } 4262 4263 gfx_v8_0_rlc_start(adev); 4264 4265 return 0; 4266 } 4267 4268 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4269 { 4270 int i; 4271 u32 tmp = RREG32(mmCP_ME_CNTL); 4272 4273 if (enable) { 4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4276 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4277 } else { 4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4280 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4281 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4282 adev->gfx.gfx_ring[i].ready = false; 4283 } 4284 WREG32(mmCP_ME_CNTL, tmp); 4285 udelay(50); 4286 } 4287 4288 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4289 { 4290 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4291 const struct gfx_firmware_header_v1_0 *ce_hdr; 4292 const struct gfx_firmware_header_v1_0 *me_hdr; 4293 const __le32 *fw_data; 4294 unsigned i, fw_size; 4295 4296 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4297 return -EINVAL; 4298 4299 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4300 adev->gfx.pfp_fw->data; 4301 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4302 adev->gfx.ce_fw->data; 4303 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4304 adev->gfx.me_fw->data; 4305 4306 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4307 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4308 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4309 4310 gfx_v8_0_cp_gfx_enable(adev, false); 4311 4312 /* PFP */ 4313 fw_data = (const __le32 *) 4314 (adev->gfx.pfp_fw->data + 4315 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4316 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4317 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4318 for (i = 0; i < fw_size; i++) 4319 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4320 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4321 4322 /* CE */ 4323 fw_data = (const __le32 *) 4324 (adev->gfx.ce_fw->data + 4325 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4326 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4327 WREG32(mmCP_CE_UCODE_ADDR, 0); 4328 for (i = 0; i < fw_size; i++) 4329 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4330 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4331 4332 /* ME */ 4333 fw_data = (const __le32 *) 4334 (adev->gfx.me_fw->data + 4335 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4336 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4337 WREG32(mmCP_ME_RAM_WADDR, 0); 4338 for (i = 0; i < fw_size; i++) 4339 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4340 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4341 4342 return 0; 4343 } 4344 4345 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4346 { 4347 u32 count = 0; 4348 const struct cs_section_def *sect = NULL; 4349 const struct cs_extent_def *ext = NULL; 4350 4351 /* begin clear state */ 4352 count += 2; 4353 /* context control state */ 4354 count += 3; 4355 4356 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4357 for (ext = sect->section; ext->extent != NULL; ++ext) { 4358 if (sect->id == SECT_CONTEXT) 4359 count += 2 + ext->reg_count; 4360 else 4361 return 0; 4362 } 4363 } 4364 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4365 count += 4; 4366 /* end clear state */ 4367 count += 2; 4368 /* clear state */ 4369 count += 2; 4370 4371 return count; 4372 } 4373 4374 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4375 { 4376 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4377 const struct cs_section_def *sect = NULL; 4378 const struct cs_extent_def *ext = NULL; 4379 int r, i; 4380 4381 /* init the CP */ 4382 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4383 WREG32(mmCP_ENDIAN_SWAP, 0); 4384 WREG32(mmCP_DEVICE_ID, 1); 4385 4386 gfx_v8_0_cp_gfx_enable(adev, true); 4387 4388 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4389 if (r) { 4390 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4391 return r; 4392 } 4393 4394 /* clear state buffer */ 4395 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4396 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4397 4398 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4399 amdgpu_ring_write(ring, 0x80000000); 4400 amdgpu_ring_write(ring, 0x80000000); 4401 4402 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4403 for (ext = sect->section; ext->extent != NULL; ++ext) { 4404 if (sect->id == SECT_CONTEXT) { 4405 amdgpu_ring_write(ring, 4406 PACKET3(PACKET3_SET_CONTEXT_REG, 4407 ext->reg_count)); 4408 amdgpu_ring_write(ring, 4409 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4410 for (i = 0; i < ext->reg_count; i++) 4411 amdgpu_ring_write(ring, ext->extent[i]); 4412 } 4413 } 4414 } 4415 4416 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4417 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4418 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4419 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4420 4421 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4422 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4423 4424 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4425 amdgpu_ring_write(ring, 0); 4426 4427 /* init the CE partitions */ 4428 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4429 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4430 amdgpu_ring_write(ring, 0x8000); 4431 amdgpu_ring_write(ring, 0x8000); 4432 4433 amdgpu_ring_commit(ring); 4434 4435 return 0; 4436 } 4437 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4438 { 4439 u32 tmp; 4440 /* no gfx doorbells on iceland */ 4441 if (adev->asic_type == CHIP_TOPAZ) 4442 return; 4443 4444 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4445 4446 if (ring->use_doorbell) { 4447 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4448 DOORBELL_OFFSET, ring->doorbell_index); 4449 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4450 DOORBELL_HIT, 0); 4451 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4452 DOORBELL_EN, 1); 4453 } else { 4454 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4455 } 4456 4457 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4458 4459 if (adev->flags & AMD_IS_APU) 4460 return; 4461 4462 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4463 DOORBELL_RANGE_LOWER, 4464 AMDGPU_DOORBELL_GFX_RING0); 4465 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4466 4467 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4468 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4469 } 4470 4471 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4472 { 4473 struct amdgpu_ring *ring; 4474 u32 tmp; 4475 u32 rb_bufsz; 4476 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4477 int r; 4478 4479 /* Set the write pointer delay */ 4480 WREG32(mmCP_RB_WPTR_DELAY, 0); 4481 4482 /* set the RB to use vmid 0 */ 4483 WREG32(mmCP_RB_VMID, 0); 4484 4485 /* Set ring buffer size */ 4486 ring = &adev->gfx.gfx_ring[0]; 4487 rb_bufsz = order_base_2(ring->ring_size / 8); 4488 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4489 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4491 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4492 #ifdef __BIG_ENDIAN 4493 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4494 #endif 4495 WREG32(mmCP_RB0_CNTL, tmp); 4496 4497 /* Initialize the ring buffer's read and write pointers */ 4498 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4499 ring->wptr = 0; 4500 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4501 4502 /* set the wb address wether it's enabled or not */ 4503 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4504 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4505 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4506 4507 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4508 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4509 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4510 mdelay(1); 4511 WREG32(mmCP_RB0_CNTL, tmp); 4512 4513 rb_addr = ring->gpu_addr >> 8; 4514 WREG32(mmCP_RB0_BASE, rb_addr); 4515 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4516 4517 gfx_v8_0_set_cpg_door_bell(adev, ring); 4518 /* start the ring */ 4519 amdgpu_ring_clear_ring(ring); 4520 gfx_v8_0_cp_gfx_start(adev); 4521 ring->ready = true; 4522 r = amdgpu_ring_test_ring(ring); 4523 if (r) 4524 ring->ready = false; 4525 4526 return r; 4527 } 4528 4529 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4530 { 4531 int i; 4532 4533 if (enable) { 4534 WREG32(mmCP_MEC_CNTL, 0); 4535 } else { 4536 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4537 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4538 adev->gfx.compute_ring[i].ready = false; 4539 adev->gfx.kiq.ring.ready = false; 4540 } 4541 udelay(50); 4542 } 4543 4544 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4545 { 4546 const struct gfx_firmware_header_v1_0 *mec_hdr; 4547 const __le32 *fw_data; 4548 unsigned i, fw_size; 4549 4550 if (!adev->gfx.mec_fw) 4551 return -EINVAL; 4552 4553 gfx_v8_0_cp_compute_enable(adev, false); 4554 4555 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4556 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4557 4558 fw_data = (const __le32 *) 4559 (adev->gfx.mec_fw->data + 4560 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4561 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4562 4563 /* MEC1 */ 4564 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4565 for (i = 0; i < fw_size; i++) 4566 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4567 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4568 4569 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4570 if (adev->gfx.mec2_fw) { 4571 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4572 4573 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4574 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4575 4576 fw_data = (const __le32 *) 4577 (adev->gfx.mec2_fw->data + 4578 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4579 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4580 4581 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4582 for (i = 0; i < fw_size; i++) 4583 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4584 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4585 } 4586 4587 return 0; 4588 } 4589 4590 /* KIQ functions */ 4591 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4592 { 4593 uint32_t tmp; 4594 struct amdgpu_device *adev = ring->adev; 4595 4596 /* tell RLC which is KIQ queue */ 4597 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4598 tmp &= 0xffffff00; 4599 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4600 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4601 tmp |= 0x80; 4602 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4603 } 4604 4605 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4606 { 4607 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4608 uint32_t scratch, tmp = 0; 4609 uint64_t queue_mask = 0; 4610 int r, i; 4611 4612 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4613 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4614 continue; 4615 4616 /* This situation may be hit in the future if a new HW 4617 * generation exposes more than 64 queues. If so, the 4618 * definition of queue_mask needs updating */ 4619 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4620 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4621 break; 4622 } 4623 4624 queue_mask |= (1ull << i); 4625 } 4626 4627 r = amdgpu_gfx_scratch_get(adev, &scratch); 4628 if (r) { 4629 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4630 return r; 4631 } 4632 WREG32(scratch, 0xCAFEDEAD); 4633 4634 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4635 if (r) { 4636 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4637 amdgpu_gfx_scratch_free(adev, scratch); 4638 return r; 4639 } 4640 /* set resources */ 4641 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4642 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4643 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4644 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4645 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4646 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4647 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4648 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4649 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4650 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4651 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4652 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4653 4654 /* map queues */ 4655 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4656 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4657 amdgpu_ring_write(kiq_ring, 4658 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4659 amdgpu_ring_write(kiq_ring, 4660 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4661 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4662 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4663 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4664 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4665 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4666 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4667 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4668 } 4669 /* write to scratch for completion */ 4670 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4671 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4672 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4673 amdgpu_ring_commit(kiq_ring); 4674 4675 for (i = 0; i < adev->usec_timeout; i++) { 4676 tmp = RREG32(scratch); 4677 if (tmp == 0xDEADBEEF) 4678 break; 4679 DRM_UDELAY(1); 4680 } 4681 if (i >= adev->usec_timeout) { 4682 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4683 scratch, tmp); 4684 r = -EINVAL; 4685 } 4686 amdgpu_gfx_scratch_free(adev, scratch); 4687 4688 return r; 4689 } 4690 4691 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4692 { 4693 int i, r = 0; 4694 4695 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4696 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4697 for (i = 0; i < adev->usec_timeout; i++) { 4698 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4699 break; 4700 udelay(1); 4701 } 4702 if (i == adev->usec_timeout) 4703 r = -ETIMEDOUT; 4704 } 4705 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4706 WREG32(mmCP_HQD_PQ_RPTR, 0); 4707 WREG32(mmCP_HQD_PQ_WPTR, 0); 4708 4709 return r; 4710 } 4711 4712 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4713 { 4714 struct amdgpu_device *adev = ring->adev; 4715 struct vi_mqd *mqd = ring->mqd_ptr; 4716 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4717 uint32_t tmp; 4718 4719 mqd->header = 0xC0310800; 4720 mqd->compute_pipelinestat_enable = 0x00000001; 4721 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4722 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4723 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4724 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4725 mqd->compute_misc_reserved = 0x00000003; 4726 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4727 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4728 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4729 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4730 eop_base_addr = ring->eop_gpu_addr >> 8; 4731 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4732 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4733 4734 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4735 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4736 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4737 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4738 4739 mqd->cp_hqd_eop_control = tmp; 4740 4741 /* enable doorbell? */ 4742 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4743 CP_HQD_PQ_DOORBELL_CONTROL, 4744 DOORBELL_EN, 4745 ring->use_doorbell ? 1 : 0); 4746 4747 mqd->cp_hqd_pq_doorbell_control = tmp; 4748 4749 /* set the pointer to the MQD */ 4750 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4751 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4752 4753 /* set MQD vmid to 0 */ 4754 tmp = RREG32(mmCP_MQD_CONTROL); 4755 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4756 mqd->cp_mqd_control = tmp; 4757 4758 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4759 hqd_gpu_addr = ring->gpu_addr >> 8; 4760 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4761 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4762 4763 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4764 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4766 (order_base_2(ring->ring_size / 4) - 1)); 4767 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4768 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4769 #ifdef __BIG_ENDIAN 4770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4771 #endif 4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4774 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4775 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4776 mqd->cp_hqd_pq_control = tmp; 4777 4778 /* set the wb address whether it's enabled or not */ 4779 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4780 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4781 mqd->cp_hqd_pq_rptr_report_addr_hi = 4782 upper_32_bits(wb_gpu_addr) & 0xffff; 4783 4784 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4785 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4786 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4787 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4788 4789 tmp = 0; 4790 /* enable the doorbell if requested */ 4791 if (ring->use_doorbell) { 4792 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4794 DOORBELL_OFFSET, ring->doorbell_index); 4795 4796 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4797 DOORBELL_EN, 1); 4798 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4799 DOORBELL_SOURCE, 0); 4800 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4801 DOORBELL_HIT, 0); 4802 } 4803 4804 mqd->cp_hqd_pq_doorbell_control = tmp; 4805 4806 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4807 ring->wptr = 0; 4808 mqd->cp_hqd_pq_wptr = ring->wptr; 4809 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4810 4811 /* set the vmid for the queue */ 4812 mqd->cp_hqd_vmid = 0; 4813 4814 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4815 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4816 mqd->cp_hqd_persistent_state = tmp; 4817 4818 /* set MTYPE */ 4819 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4820 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4821 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4822 mqd->cp_hqd_ib_control = tmp; 4823 4824 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4825 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4826 mqd->cp_hqd_iq_timer = tmp; 4827 4828 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4829 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4830 mqd->cp_hqd_ctx_save_control = tmp; 4831 4832 /* defaults */ 4833 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4834 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4835 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4836 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4837 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4838 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4839 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4840 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4841 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4842 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4843 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4844 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4845 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4846 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4847 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4848 4849 /* activate the queue */ 4850 mqd->cp_hqd_active = 1; 4851 4852 return 0; 4853 } 4854 4855 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4856 struct vi_mqd *mqd); 4857 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4858 struct vi_mqd *mqd) 4859 { 4860 uint32_t mqd_reg; 4861 uint32_t *mqd_data; 4862 4863 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4864 mqd_data = &mqd->cp_mqd_base_addr_lo; 4865 4866 /* disable wptr polling */ 4867 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4868 4869 /* program all HQD registers */ 4870 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4871 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4872 4873 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4874 * This is safe since EOP RPTR==WPTR for any inactive HQD 4875 * on ASICs that do not support context-save. 4876 * EOP writes/reads can start anywhere in the ring. 4877 */ 4878 if (adev->asic_type != CHIP_TONGA) { 4879 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4880 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4881 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4882 } 4883 4884 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4885 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4886 4887 /* activate the HQD */ 4888 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4889 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4890 4891 return 0; 4892 } 4893 4894 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4895 { 4896 struct amdgpu_device *adev = ring->adev; 4897 struct vi_mqd *mqd = ring->mqd_ptr; 4898 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4899 4900 gfx_v8_0_kiq_setting(ring); 4901 4902 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4903 /* reset MQD to a clean status */ 4904 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4905 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4906 4907 /* reset ring buffer */ 4908 ring->wptr = 0; 4909 amdgpu_ring_clear_ring(ring); 4910 mutex_lock(&adev->srbm_mutex); 4911 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4912 gfx_v8_0_mqd_commit(adev, mqd); 4913 vi_srbm_select(adev, 0, 0, 0, 0); 4914 mutex_unlock(&adev->srbm_mutex); 4915 } else { 4916 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4917 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4918 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4919 mutex_lock(&adev->srbm_mutex); 4920 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4921 gfx_v8_0_mqd_init(ring); 4922 gfx_v8_0_mqd_commit(adev, mqd); 4923 vi_srbm_select(adev, 0, 0, 0, 0); 4924 mutex_unlock(&adev->srbm_mutex); 4925 4926 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4927 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4928 } 4929 4930 return 0; 4931 } 4932 4933 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4934 { 4935 struct amdgpu_device *adev = ring->adev; 4936 struct vi_mqd *mqd = ring->mqd_ptr; 4937 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4938 4939 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4940 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4941 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4942 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4943 mutex_lock(&adev->srbm_mutex); 4944 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4945 gfx_v8_0_mqd_init(ring); 4946 vi_srbm_select(adev, 0, 0, 0, 0); 4947 mutex_unlock(&adev->srbm_mutex); 4948 4949 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4950 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4951 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4952 /* reset MQD to a clean status */ 4953 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4954 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4955 /* reset ring buffer */ 4956 ring->wptr = 0; 4957 amdgpu_ring_clear_ring(ring); 4958 } else { 4959 amdgpu_ring_clear_ring(ring); 4960 } 4961 return 0; 4962 } 4963 4964 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4965 { 4966 if (adev->asic_type > CHIP_TONGA) { 4967 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4968 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4969 } 4970 /* enable doorbells */ 4971 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4972 } 4973 4974 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4975 { 4976 struct amdgpu_ring *ring = NULL; 4977 int r = 0, i; 4978 4979 gfx_v8_0_cp_compute_enable(adev, true); 4980 4981 ring = &adev->gfx.kiq.ring; 4982 4983 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4984 if (unlikely(r != 0)) 4985 goto done; 4986 4987 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4988 if (!r) { 4989 r = gfx_v8_0_kiq_init_queue(ring); 4990 amdgpu_bo_kunmap(ring->mqd_obj); 4991 ring->mqd_ptr = NULL; 4992 } 4993 amdgpu_bo_unreserve(ring->mqd_obj); 4994 if (r) 4995 goto done; 4996 4997 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4998 ring = &adev->gfx.compute_ring[i]; 4999 5000 r = amdgpu_bo_reserve(ring->mqd_obj, false); 5001 if (unlikely(r != 0)) 5002 goto done; 5003 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 5004 if (!r) { 5005 r = gfx_v8_0_kcq_init_queue(ring); 5006 amdgpu_bo_kunmap(ring->mqd_obj); 5007 ring->mqd_ptr = NULL; 5008 } 5009 amdgpu_bo_unreserve(ring->mqd_obj); 5010 if (r) 5011 goto done; 5012 } 5013 5014 gfx_v8_0_set_mec_doorbell_range(adev); 5015 5016 r = gfx_v8_0_kiq_kcq_enable(adev); 5017 if (r) 5018 goto done; 5019 5020 /* Test KIQ */ 5021 ring = &adev->gfx.kiq.ring; 5022 ring->ready = true; 5023 r = amdgpu_ring_test_ring(ring); 5024 if (r) { 5025 ring->ready = false; 5026 goto done; 5027 } 5028 5029 /* Test KCQs */ 5030 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5031 ring = &adev->gfx.compute_ring[i]; 5032 ring->ready = true; 5033 r = amdgpu_ring_test_ring(ring); 5034 if (r) 5035 ring->ready = false; 5036 } 5037 5038 done: 5039 return r; 5040 } 5041 5042 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5043 { 5044 int r; 5045 5046 if (!(adev->flags & AMD_IS_APU)) 5047 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5048 5049 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 5050 /* legacy firmware loading */ 5051 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5052 if (r) 5053 return r; 5054 5055 r = gfx_v8_0_cp_compute_load_microcode(adev); 5056 if (r) 5057 return r; 5058 } 5059 5060 r = gfx_v8_0_cp_gfx_resume(adev); 5061 if (r) 5062 return r; 5063 5064 r = gfx_v8_0_kiq_resume(adev); 5065 if (r) 5066 return r; 5067 5068 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5069 5070 return 0; 5071 } 5072 5073 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5074 { 5075 gfx_v8_0_cp_gfx_enable(adev, enable); 5076 gfx_v8_0_cp_compute_enable(adev, enable); 5077 } 5078 5079 static int gfx_v8_0_hw_init(void *handle) 5080 { 5081 int r; 5082 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5083 5084 gfx_v8_0_init_golden_registers(adev); 5085 gfx_v8_0_gpu_init(adev); 5086 5087 r = gfx_v8_0_rlc_resume(adev); 5088 if (r) 5089 return r; 5090 5091 r = gfx_v8_0_cp_resume(adev); 5092 5093 return r; 5094 } 5095 5096 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 5097 { 5098 struct amdgpu_device *adev = kiq_ring->adev; 5099 uint32_t scratch, tmp = 0; 5100 int r, i; 5101 5102 r = amdgpu_gfx_scratch_get(adev, &scratch); 5103 if (r) { 5104 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5105 return r; 5106 } 5107 WREG32(scratch, 0xCAFEDEAD); 5108 5109 r = amdgpu_ring_alloc(kiq_ring, 10); 5110 if (r) { 5111 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5112 amdgpu_gfx_scratch_free(adev, scratch); 5113 return r; 5114 } 5115 5116 /* unmap queues */ 5117 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5118 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5119 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5120 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5121 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5122 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5123 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5124 amdgpu_ring_write(kiq_ring, 0); 5125 amdgpu_ring_write(kiq_ring, 0); 5126 amdgpu_ring_write(kiq_ring, 0); 5127 /* write to scratch for completion */ 5128 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5129 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5130 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5131 amdgpu_ring_commit(kiq_ring); 5132 5133 for (i = 0; i < adev->usec_timeout; i++) { 5134 tmp = RREG32(scratch); 5135 if (tmp == 0xDEADBEEF) 5136 break; 5137 DRM_UDELAY(1); 5138 } 5139 if (i >= adev->usec_timeout) { 5140 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5141 r = -EINVAL; 5142 } 5143 amdgpu_gfx_scratch_free(adev, scratch); 5144 return r; 5145 } 5146 5147 static int gfx_v8_0_hw_fini(void *handle) 5148 { 5149 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5150 int i; 5151 5152 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5153 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5154 5155 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 5156 5157 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 5158 5159 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5160 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5161 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5162 5163 if (amdgpu_sriov_vf(adev)) { 5164 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5165 return 0; 5166 } 5167 gfx_v8_0_cp_enable(adev, false); 5168 gfx_v8_0_rlc_stop(adev); 5169 5170 amdgpu_device_ip_set_powergating_state(adev, 5171 AMD_IP_BLOCK_TYPE_GFX, 5172 AMD_PG_STATE_UNGATE); 5173 5174 return 0; 5175 } 5176 5177 static int gfx_v8_0_suspend(void *handle) 5178 { 5179 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5180 adev->gfx.in_suspend = true; 5181 return gfx_v8_0_hw_fini(adev); 5182 } 5183 5184 static int gfx_v8_0_resume(void *handle) 5185 { 5186 int r; 5187 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5188 5189 r = gfx_v8_0_hw_init(adev); 5190 adev->gfx.in_suspend = false; 5191 return r; 5192 } 5193 5194 static bool gfx_v8_0_is_idle(void *handle) 5195 { 5196 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5197 5198 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5199 return false; 5200 else 5201 return true; 5202 } 5203 5204 static int gfx_v8_0_wait_for_idle(void *handle) 5205 { 5206 unsigned i; 5207 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5208 5209 for (i = 0; i < adev->usec_timeout; i++) { 5210 if (gfx_v8_0_is_idle(handle)) 5211 return 0; 5212 5213 udelay(1); 5214 } 5215 return -ETIMEDOUT; 5216 } 5217 5218 static bool gfx_v8_0_check_soft_reset(void *handle) 5219 { 5220 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5221 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5222 u32 tmp; 5223 5224 /* GRBM_STATUS */ 5225 tmp = RREG32(mmGRBM_STATUS); 5226 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5227 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5228 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5229 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5230 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5231 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5232 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5233 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5234 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5235 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5236 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5237 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5238 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5239 } 5240 5241 /* GRBM_STATUS2 */ 5242 tmp = RREG32(mmGRBM_STATUS2); 5243 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5244 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5245 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5246 5247 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5248 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5249 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5250 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5251 SOFT_RESET_CPF, 1); 5252 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5253 SOFT_RESET_CPC, 1); 5254 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5255 SOFT_RESET_CPG, 1); 5256 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5257 SOFT_RESET_GRBM, 1); 5258 } 5259 5260 /* SRBM_STATUS */ 5261 tmp = RREG32(mmSRBM_STATUS); 5262 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5263 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5264 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5265 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5266 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5267 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5268 5269 if (grbm_soft_reset || srbm_soft_reset) { 5270 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5271 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5272 return true; 5273 } else { 5274 adev->gfx.grbm_soft_reset = 0; 5275 adev->gfx.srbm_soft_reset = 0; 5276 return false; 5277 } 5278 } 5279 5280 static int gfx_v8_0_pre_soft_reset(void *handle) 5281 { 5282 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5283 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5284 5285 if ((!adev->gfx.grbm_soft_reset) && 5286 (!adev->gfx.srbm_soft_reset)) 5287 return 0; 5288 5289 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5290 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5291 5292 /* stop the rlc */ 5293 gfx_v8_0_rlc_stop(adev); 5294 5295 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5296 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5297 /* Disable GFX parsing/prefetching */ 5298 gfx_v8_0_cp_gfx_enable(adev, false); 5299 5300 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5301 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5302 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5303 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5304 int i; 5305 5306 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5307 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5308 5309 mutex_lock(&adev->srbm_mutex); 5310 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5311 gfx_v8_0_deactivate_hqd(adev, 2); 5312 vi_srbm_select(adev, 0, 0, 0, 0); 5313 mutex_unlock(&adev->srbm_mutex); 5314 } 5315 /* Disable MEC parsing/prefetching */ 5316 gfx_v8_0_cp_compute_enable(adev, false); 5317 } 5318 5319 return 0; 5320 } 5321 5322 static int gfx_v8_0_soft_reset(void *handle) 5323 { 5324 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5325 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5326 u32 tmp; 5327 5328 if ((!adev->gfx.grbm_soft_reset) && 5329 (!adev->gfx.srbm_soft_reset)) 5330 return 0; 5331 5332 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5333 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5334 5335 if (grbm_soft_reset || srbm_soft_reset) { 5336 tmp = RREG32(mmGMCON_DEBUG); 5337 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5338 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5339 WREG32(mmGMCON_DEBUG, tmp); 5340 udelay(50); 5341 } 5342 5343 if (grbm_soft_reset) { 5344 tmp = RREG32(mmGRBM_SOFT_RESET); 5345 tmp |= grbm_soft_reset; 5346 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5347 WREG32(mmGRBM_SOFT_RESET, tmp); 5348 tmp = RREG32(mmGRBM_SOFT_RESET); 5349 5350 udelay(50); 5351 5352 tmp &= ~grbm_soft_reset; 5353 WREG32(mmGRBM_SOFT_RESET, tmp); 5354 tmp = RREG32(mmGRBM_SOFT_RESET); 5355 } 5356 5357 if (srbm_soft_reset) { 5358 tmp = RREG32(mmSRBM_SOFT_RESET); 5359 tmp |= srbm_soft_reset; 5360 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5361 WREG32(mmSRBM_SOFT_RESET, tmp); 5362 tmp = RREG32(mmSRBM_SOFT_RESET); 5363 5364 udelay(50); 5365 5366 tmp &= ~srbm_soft_reset; 5367 WREG32(mmSRBM_SOFT_RESET, tmp); 5368 tmp = RREG32(mmSRBM_SOFT_RESET); 5369 } 5370 5371 if (grbm_soft_reset || srbm_soft_reset) { 5372 tmp = RREG32(mmGMCON_DEBUG); 5373 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5374 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5375 WREG32(mmGMCON_DEBUG, tmp); 5376 } 5377 5378 /* Wait a little for things to settle down */ 5379 udelay(50); 5380 5381 return 0; 5382 } 5383 5384 static int gfx_v8_0_post_soft_reset(void *handle) 5385 { 5386 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5387 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5388 5389 if ((!adev->gfx.grbm_soft_reset) && 5390 (!adev->gfx.srbm_soft_reset)) 5391 return 0; 5392 5393 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5394 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5395 5396 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5397 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5398 gfx_v8_0_cp_gfx_resume(adev); 5399 5400 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5401 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5402 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5403 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5404 int i; 5405 5406 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5407 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5408 5409 mutex_lock(&adev->srbm_mutex); 5410 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5411 gfx_v8_0_deactivate_hqd(adev, 2); 5412 vi_srbm_select(adev, 0, 0, 0, 0); 5413 mutex_unlock(&adev->srbm_mutex); 5414 } 5415 gfx_v8_0_kiq_resume(adev); 5416 } 5417 gfx_v8_0_rlc_start(adev); 5418 5419 return 0; 5420 } 5421 5422 /** 5423 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5424 * 5425 * @adev: amdgpu_device pointer 5426 * 5427 * Fetches a GPU clock counter snapshot. 5428 * Returns the 64 bit clock counter snapshot. 5429 */ 5430 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5431 { 5432 uint64_t clock; 5433 5434 mutex_lock(&adev->gfx.gpu_clock_mutex); 5435 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5436 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5437 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5438 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5439 return clock; 5440 } 5441 5442 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5443 uint32_t vmid, 5444 uint32_t gds_base, uint32_t gds_size, 5445 uint32_t gws_base, uint32_t gws_size, 5446 uint32_t oa_base, uint32_t oa_size) 5447 { 5448 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5449 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5450 5451 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5452 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5453 5454 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5455 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5456 5457 /* GDS Base */ 5458 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5459 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5460 WRITE_DATA_DST_SEL(0))); 5461 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5462 amdgpu_ring_write(ring, 0); 5463 amdgpu_ring_write(ring, gds_base); 5464 5465 /* GDS Size */ 5466 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5467 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5468 WRITE_DATA_DST_SEL(0))); 5469 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5470 amdgpu_ring_write(ring, 0); 5471 amdgpu_ring_write(ring, gds_size); 5472 5473 /* GWS */ 5474 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5475 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5476 WRITE_DATA_DST_SEL(0))); 5477 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5478 amdgpu_ring_write(ring, 0); 5479 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5480 5481 /* OA */ 5482 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5483 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5484 WRITE_DATA_DST_SEL(0))); 5485 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5486 amdgpu_ring_write(ring, 0); 5487 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5488 } 5489 5490 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5491 { 5492 WREG32(mmSQ_IND_INDEX, 5493 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5494 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5495 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5496 (SQ_IND_INDEX__FORCE_READ_MASK)); 5497 return RREG32(mmSQ_IND_DATA); 5498 } 5499 5500 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5501 uint32_t wave, uint32_t thread, 5502 uint32_t regno, uint32_t num, uint32_t *out) 5503 { 5504 WREG32(mmSQ_IND_INDEX, 5505 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5506 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5507 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5508 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5509 (SQ_IND_INDEX__FORCE_READ_MASK) | 5510 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5511 while (num--) 5512 *(out++) = RREG32(mmSQ_IND_DATA); 5513 } 5514 5515 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5516 { 5517 /* type 0 wave data */ 5518 dst[(*no_fields)++] = 0; 5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5532 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5533 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5534 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5535 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5536 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5537 } 5538 5539 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5540 uint32_t wave, uint32_t start, 5541 uint32_t size, uint32_t *dst) 5542 { 5543 wave_read_regs( 5544 adev, simd, wave, 0, 5545 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5546 } 5547 5548 5549 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5550 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5551 .select_se_sh = &gfx_v8_0_select_se_sh, 5552 .read_wave_data = &gfx_v8_0_read_wave_data, 5553 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5554 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5555 }; 5556 5557 static int gfx_v8_0_early_init(void *handle) 5558 { 5559 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5560 5561 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5562 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5563 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5564 gfx_v8_0_set_ring_funcs(adev); 5565 gfx_v8_0_set_irq_funcs(adev); 5566 gfx_v8_0_set_gds_init(adev); 5567 gfx_v8_0_set_rlc_funcs(adev); 5568 5569 return 0; 5570 } 5571 5572 static int gfx_v8_0_late_init(void *handle) 5573 { 5574 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5575 int r; 5576 5577 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5578 if (r) 5579 return r; 5580 5581 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5582 if (r) 5583 return r; 5584 5585 /* requires IBs so do in late init after IB pool is initialized */ 5586 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5587 if (r) 5588 return r; 5589 5590 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5591 if (r) { 5592 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5593 return r; 5594 } 5595 5596 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5597 if (r) { 5598 DRM_ERROR( 5599 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5600 r); 5601 return r; 5602 } 5603 5604 return 0; 5605 } 5606 5607 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5608 bool enable) 5609 { 5610 if (((adev->asic_type == CHIP_POLARIS11) || 5611 (adev->asic_type == CHIP_POLARIS12) || 5612 (adev->asic_type == CHIP_VEGAM)) && 5613 adev->powerplay.pp_funcs->set_powergating_by_smu) 5614 /* Send msg to SMU via Powerplay */ 5615 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5616 5617 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5618 } 5619 5620 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5621 bool enable) 5622 { 5623 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5624 } 5625 5626 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5627 bool enable) 5628 { 5629 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5630 } 5631 5632 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5633 bool enable) 5634 { 5635 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5636 } 5637 5638 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5639 bool enable) 5640 { 5641 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5642 5643 /* Read any GFX register to wake up GFX. */ 5644 if (!enable) 5645 RREG32(mmDB_RENDER_CONTROL); 5646 } 5647 5648 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5649 bool enable) 5650 { 5651 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5652 cz_enable_gfx_cg_power_gating(adev, true); 5653 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5654 cz_enable_gfx_pipeline_power_gating(adev, true); 5655 } else { 5656 cz_enable_gfx_cg_power_gating(adev, false); 5657 cz_enable_gfx_pipeline_power_gating(adev, false); 5658 } 5659 } 5660 5661 static int gfx_v8_0_set_powergating_state(void *handle, 5662 enum amd_powergating_state state) 5663 { 5664 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5665 bool enable = (state == AMD_PG_STATE_GATE); 5666 5667 if (amdgpu_sriov_vf(adev)) 5668 return 0; 5669 5670 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5671 AMD_PG_SUPPORT_RLC_SMU_HS | 5672 AMD_PG_SUPPORT_CP | 5673 AMD_PG_SUPPORT_GFX_DMG)) 5674 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5675 switch (adev->asic_type) { 5676 case CHIP_CARRIZO: 5677 case CHIP_STONEY: 5678 5679 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5680 cz_enable_sck_slow_down_on_power_up(adev, true); 5681 cz_enable_sck_slow_down_on_power_down(adev, true); 5682 } else { 5683 cz_enable_sck_slow_down_on_power_up(adev, false); 5684 cz_enable_sck_slow_down_on_power_down(adev, false); 5685 } 5686 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5687 cz_enable_cp_power_gating(adev, true); 5688 else 5689 cz_enable_cp_power_gating(adev, false); 5690 5691 cz_update_gfx_cg_power_gating(adev, enable); 5692 5693 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5694 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5695 else 5696 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5697 5698 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5699 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5700 else 5701 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5702 break; 5703 case CHIP_POLARIS11: 5704 case CHIP_POLARIS12: 5705 case CHIP_VEGAM: 5706 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5707 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5708 else 5709 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5710 5711 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5712 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5713 else 5714 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5715 5716 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5717 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5718 else 5719 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5720 break; 5721 default: 5722 break; 5723 } 5724 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5725 AMD_PG_SUPPORT_RLC_SMU_HS | 5726 AMD_PG_SUPPORT_CP | 5727 AMD_PG_SUPPORT_GFX_DMG)) 5728 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5729 return 0; 5730 } 5731 5732 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5733 { 5734 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5735 int data; 5736 5737 if (amdgpu_sriov_vf(adev)) 5738 *flags = 0; 5739 5740 /* AMD_CG_SUPPORT_GFX_MGCG */ 5741 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5742 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5743 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5744 5745 /* AMD_CG_SUPPORT_GFX_CGLG */ 5746 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5747 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5748 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5749 5750 /* AMD_CG_SUPPORT_GFX_CGLS */ 5751 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5752 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5753 5754 /* AMD_CG_SUPPORT_GFX_CGTS */ 5755 data = RREG32(mmCGTS_SM_CTRL_REG); 5756 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5757 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5758 5759 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5760 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5761 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5762 5763 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5764 data = RREG32(mmRLC_MEM_SLP_CNTL); 5765 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5766 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5767 5768 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5769 data = RREG32(mmCP_MEM_SLP_CNTL); 5770 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5771 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5772 } 5773 5774 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5775 uint32_t reg_addr, uint32_t cmd) 5776 { 5777 uint32_t data; 5778 5779 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5780 5781 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5782 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5783 5784 data = RREG32(mmRLC_SERDES_WR_CTRL); 5785 if (adev->asic_type == CHIP_STONEY) 5786 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5787 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5788 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5789 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5790 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5791 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5792 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5793 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5794 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5795 else 5796 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5797 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5798 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5799 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5800 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5801 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5802 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5803 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5804 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5805 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5806 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5807 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5808 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5809 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5810 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5811 5812 WREG32(mmRLC_SERDES_WR_CTRL, data); 5813 } 5814 5815 #define MSG_ENTER_RLC_SAFE_MODE 1 5816 #define MSG_EXIT_RLC_SAFE_MODE 0 5817 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5818 #define RLC_GPR_REG2__REQ__SHIFT 0 5819 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5820 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5821 5822 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5823 { 5824 u32 data; 5825 unsigned i; 5826 5827 data = RREG32(mmRLC_CNTL); 5828 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5829 return; 5830 5831 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5832 data |= RLC_SAFE_MODE__CMD_MASK; 5833 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5834 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5835 WREG32(mmRLC_SAFE_MODE, data); 5836 5837 for (i = 0; i < adev->usec_timeout; i++) { 5838 if ((RREG32(mmRLC_GPM_STAT) & 5839 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5840 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5841 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5842 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5843 break; 5844 udelay(1); 5845 } 5846 5847 for (i = 0; i < adev->usec_timeout; i++) { 5848 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5849 break; 5850 udelay(1); 5851 } 5852 adev->gfx.rlc.in_safe_mode = true; 5853 } 5854 } 5855 5856 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5857 { 5858 u32 data = 0; 5859 unsigned i; 5860 5861 data = RREG32(mmRLC_CNTL); 5862 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5863 return; 5864 5865 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5866 if (adev->gfx.rlc.in_safe_mode) { 5867 data |= RLC_SAFE_MODE__CMD_MASK; 5868 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5869 WREG32(mmRLC_SAFE_MODE, data); 5870 adev->gfx.rlc.in_safe_mode = false; 5871 } 5872 } 5873 5874 for (i = 0; i < adev->usec_timeout; i++) { 5875 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5876 break; 5877 udelay(1); 5878 } 5879 } 5880 5881 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5882 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5883 .exit_safe_mode = iceland_exit_rlc_safe_mode 5884 }; 5885 5886 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5887 bool enable) 5888 { 5889 uint32_t temp, data; 5890 5891 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5892 5893 /* It is disabled by HW by default */ 5894 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5895 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5896 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5897 /* 1 - RLC memory Light sleep */ 5898 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5899 5900 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5901 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5902 } 5903 5904 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5905 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5906 if (adev->flags & AMD_IS_APU) 5907 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5908 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5909 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5910 else 5911 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5912 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5913 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5914 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5915 5916 if (temp != data) 5917 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5918 5919 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5920 gfx_v8_0_wait_for_rlc_serdes(adev); 5921 5922 /* 5 - clear mgcg override */ 5923 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5924 5925 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5926 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5927 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5928 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5929 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5930 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5931 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5932 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5933 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5934 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5935 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5936 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5937 if (temp != data) 5938 WREG32(mmCGTS_SM_CTRL_REG, data); 5939 } 5940 udelay(50); 5941 5942 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5943 gfx_v8_0_wait_for_rlc_serdes(adev); 5944 } else { 5945 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5946 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5947 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5948 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5949 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5950 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5951 if (temp != data) 5952 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5953 5954 /* 2 - disable MGLS in RLC */ 5955 data = RREG32(mmRLC_MEM_SLP_CNTL); 5956 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5957 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5958 WREG32(mmRLC_MEM_SLP_CNTL, data); 5959 } 5960 5961 /* 3 - disable MGLS in CP */ 5962 data = RREG32(mmCP_MEM_SLP_CNTL); 5963 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5964 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5965 WREG32(mmCP_MEM_SLP_CNTL, data); 5966 } 5967 5968 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5969 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5970 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5971 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5972 if (temp != data) 5973 WREG32(mmCGTS_SM_CTRL_REG, data); 5974 5975 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5976 gfx_v8_0_wait_for_rlc_serdes(adev); 5977 5978 /* 6 - set mgcg override */ 5979 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5980 5981 udelay(50); 5982 5983 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5984 gfx_v8_0_wait_for_rlc_serdes(adev); 5985 } 5986 5987 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5988 } 5989 5990 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5991 bool enable) 5992 { 5993 uint32_t temp, temp1, data, data1; 5994 5995 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5996 5997 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5998 5999 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 6000 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6001 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 6002 if (temp1 != data1) 6003 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6004 6005 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6006 gfx_v8_0_wait_for_rlc_serdes(adev); 6007 6008 /* 2 - clear cgcg override */ 6009 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6010 6011 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6012 gfx_v8_0_wait_for_rlc_serdes(adev); 6013 6014 /* 3 - write cmd to set CGLS */ 6015 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6016 6017 /* 4 - enable cgcg */ 6018 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6019 6020 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6021 /* enable cgls*/ 6022 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6023 6024 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6025 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6026 6027 if (temp1 != data1) 6028 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6029 } else { 6030 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6031 } 6032 6033 if (temp != data) 6034 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6035 6036 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6037 * Cmp_busy/GFX_Idle interrupts 6038 */ 6039 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6040 } else { 6041 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6042 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6043 6044 /* TEST CGCG */ 6045 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6046 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6047 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6048 if (temp1 != data1) 6049 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6050 6051 /* read gfx register to wake up cgcg */ 6052 RREG32(mmCB_CGTT_SCLK_CTRL); 6053 RREG32(mmCB_CGTT_SCLK_CTRL); 6054 RREG32(mmCB_CGTT_SCLK_CTRL); 6055 RREG32(mmCB_CGTT_SCLK_CTRL); 6056 6057 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6058 gfx_v8_0_wait_for_rlc_serdes(adev); 6059 6060 /* write cmd to Set CGCG Overrride */ 6061 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6062 6063 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6064 gfx_v8_0_wait_for_rlc_serdes(adev); 6065 6066 /* write cmd to Clear CGLS */ 6067 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6068 6069 /* disable cgcg, cgls should be disabled too. */ 6070 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6071 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6072 if (temp != data) 6073 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6074 /* enable interrupts again for PG */ 6075 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6076 } 6077 6078 gfx_v8_0_wait_for_rlc_serdes(adev); 6079 6080 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6081 } 6082 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6083 bool enable) 6084 { 6085 if (enable) { 6086 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6087 * === MGCG + MGLS + TS(CG/LS) === 6088 */ 6089 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6090 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6091 } else { 6092 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6093 * === CGCG + CGLS === 6094 */ 6095 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6096 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6097 } 6098 return 0; 6099 } 6100 6101 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6102 enum amd_clockgating_state state) 6103 { 6104 uint32_t msg_id, pp_state = 0; 6105 uint32_t pp_support_state = 0; 6106 6107 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6108 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6109 pp_support_state = PP_STATE_SUPPORT_LS; 6110 pp_state = PP_STATE_LS; 6111 } 6112 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6113 pp_support_state |= PP_STATE_SUPPORT_CG; 6114 pp_state |= PP_STATE_CG; 6115 } 6116 if (state == AMD_CG_STATE_UNGATE) 6117 pp_state = 0; 6118 6119 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6120 PP_BLOCK_GFX_CG, 6121 pp_support_state, 6122 pp_state); 6123 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6124 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6125 } 6126 6127 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6128 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6129 pp_support_state = PP_STATE_SUPPORT_LS; 6130 pp_state = PP_STATE_LS; 6131 } 6132 6133 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6134 pp_support_state |= PP_STATE_SUPPORT_CG; 6135 pp_state |= PP_STATE_CG; 6136 } 6137 6138 if (state == AMD_CG_STATE_UNGATE) 6139 pp_state = 0; 6140 6141 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6142 PP_BLOCK_GFX_MG, 6143 pp_support_state, 6144 pp_state); 6145 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6146 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6147 } 6148 6149 return 0; 6150 } 6151 6152 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6153 enum amd_clockgating_state state) 6154 { 6155 6156 uint32_t msg_id, pp_state = 0; 6157 uint32_t pp_support_state = 0; 6158 6159 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6160 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6161 pp_support_state = PP_STATE_SUPPORT_LS; 6162 pp_state = PP_STATE_LS; 6163 } 6164 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6165 pp_support_state |= PP_STATE_SUPPORT_CG; 6166 pp_state |= PP_STATE_CG; 6167 } 6168 if (state == AMD_CG_STATE_UNGATE) 6169 pp_state = 0; 6170 6171 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6172 PP_BLOCK_GFX_CG, 6173 pp_support_state, 6174 pp_state); 6175 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6176 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6177 } 6178 6179 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6180 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6181 pp_support_state = PP_STATE_SUPPORT_LS; 6182 pp_state = PP_STATE_LS; 6183 } 6184 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6185 pp_support_state |= PP_STATE_SUPPORT_CG; 6186 pp_state |= PP_STATE_CG; 6187 } 6188 if (state == AMD_CG_STATE_UNGATE) 6189 pp_state = 0; 6190 6191 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6192 PP_BLOCK_GFX_3D, 6193 pp_support_state, 6194 pp_state); 6195 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6196 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6197 } 6198 6199 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6200 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6201 pp_support_state = PP_STATE_SUPPORT_LS; 6202 pp_state = PP_STATE_LS; 6203 } 6204 6205 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6206 pp_support_state |= PP_STATE_SUPPORT_CG; 6207 pp_state |= PP_STATE_CG; 6208 } 6209 6210 if (state == AMD_CG_STATE_UNGATE) 6211 pp_state = 0; 6212 6213 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6214 PP_BLOCK_GFX_MG, 6215 pp_support_state, 6216 pp_state); 6217 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6218 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6219 } 6220 6221 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6222 pp_support_state = PP_STATE_SUPPORT_LS; 6223 6224 if (state == AMD_CG_STATE_UNGATE) 6225 pp_state = 0; 6226 else 6227 pp_state = PP_STATE_LS; 6228 6229 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6230 PP_BLOCK_GFX_RLC, 6231 pp_support_state, 6232 pp_state); 6233 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6234 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6235 } 6236 6237 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6238 pp_support_state = PP_STATE_SUPPORT_LS; 6239 6240 if (state == AMD_CG_STATE_UNGATE) 6241 pp_state = 0; 6242 else 6243 pp_state = PP_STATE_LS; 6244 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6245 PP_BLOCK_GFX_CP, 6246 pp_support_state, 6247 pp_state); 6248 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6249 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6250 } 6251 6252 return 0; 6253 } 6254 6255 static int gfx_v8_0_set_clockgating_state(void *handle, 6256 enum amd_clockgating_state state) 6257 { 6258 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6259 6260 if (amdgpu_sriov_vf(adev)) 6261 return 0; 6262 6263 switch (adev->asic_type) { 6264 case CHIP_FIJI: 6265 case CHIP_CARRIZO: 6266 case CHIP_STONEY: 6267 gfx_v8_0_update_gfx_clock_gating(adev, 6268 state == AMD_CG_STATE_GATE); 6269 break; 6270 case CHIP_TONGA: 6271 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6272 break; 6273 case CHIP_POLARIS10: 6274 case CHIP_POLARIS11: 6275 case CHIP_POLARIS12: 6276 case CHIP_VEGAM: 6277 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6278 break; 6279 default: 6280 break; 6281 } 6282 return 0; 6283 } 6284 6285 static uint64_t gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6286 { 6287 return ring->adev->wb.wb[ring->rptr_offs]; 6288 } 6289 6290 static uint64_t gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6291 { 6292 struct amdgpu_device *adev = ring->adev; 6293 6294 if (ring->use_doorbell) 6295 /* XXX check if swapping is necessary on BE */ 6296 return ring->adev->wb.wb[ring->wptr_offs]; 6297 else 6298 return RREG32(mmCP_RB0_WPTR); 6299 } 6300 6301 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6302 { 6303 struct amdgpu_device *adev = ring->adev; 6304 6305 if (ring->use_doorbell) { 6306 /* XXX check if swapping is necessary on BE */ 6307 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6308 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6309 } else { 6310 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6311 (void)RREG32(mmCP_RB0_WPTR); 6312 } 6313 } 6314 6315 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6316 { 6317 u32 ref_and_mask, reg_mem_engine; 6318 6319 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6320 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6321 switch (ring->me) { 6322 case 1: 6323 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6324 break; 6325 case 2: 6326 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6327 break; 6328 default: 6329 return; 6330 } 6331 reg_mem_engine = 0; 6332 } else { 6333 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6334 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6335 } 6336 6337 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6338 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6339 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6340 reg_mem_engine)); 6341 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6342 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6343 amdgpu_ring_write(ring, ref_and_mask); 6344 amdgpu_ring_write(ring, ref_and_mask); 6345 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6346 } 6347 6348 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6349 { 6350 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6351 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6352 EVENT_INDEX(4)); 6353 6354 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6355 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6356 EVENT_INDEX(0)); 6357 } 6358 6359 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6360 struct amdgpu_ib *ib, 6361 unsigned vmid, bool ctx_switch) 6362 { 6363 u32 header, control = 0; 6364 6365 if (ib->flags & AMDGPU_IB_FLAG_CE) 6366 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6367 else 6368 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6369 6370 control |= ib->length_dw | (vmid << 24); 6371 6372 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6373 control |= INDIRECT_BUFFER_PRE_ENB(1); 6374 6375 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6376 gfx_v8_0_ring_emit_de_meta(ring); 6377 } 6378 6379 amdgpu_ring_write(ring, header); 6380 amdgpu_ring_write(ring, 6381 #ifdef __BIG_ENDIAN 6382 (2 << 0) | 6383 #endif 6384 (ib->gpu_addr & 0xFFFFFFFC)); 6385 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6386 amdgpu_ring_write(ring, control); 6387 } 6388 6389 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6390 struct amdgpu_ib *ib, 6391 unsigned vmid, bool ctx_switch) 6392 { 6393 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6394 6395 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6396 amdgpu_ring_write(ring, 6397 #ifdef __BIG_ENDIAN 6398 (2 << 0) | 6399 #endif 6400 (ib->gpu_addr & 0xFFFFFFFC)); 6401 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6402 amdgpu_ring_write(ring, control); 6403 } 6404 6405 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, uint64_t addr, 6406 uint64_t seq, unsigned flags) 6407 { 6408 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6409 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6410 6411 /* Workaround for cache flush problems. First send a dummy EOP 6412 * event down the pipe with seq one below. 6413 */ 6414 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6415 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6416 EOP_TC_ACTION_EN | 6417 EOP_TC_WB_ACTION_EN | 6418 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6419 EVENT_INDEX(5))); 6420 amdgpu_ring_write(ring, addr & 0xfffffffc); 6421 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6422 DATA_SEL(1) | INT_SEL(0)); 6423 amdgpu_ring_write(ring, lower_32_bits(seq - 1)); 6424 amdgpu_ring_write(ring, upper_32_bits(seq - 1)); 6425 6426 /* Then send the real EOP event down the pipe: 6427 * EVENT_WRITE_EOP - flush caches, send int */ 6428 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6429 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6430 EOP_TC_ACTION_EN | 6431 EOP_TC_WB_ACTION_EN | 6432 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6433 EVENT_INDEX(5))); 6434 amdgpu_ring_write(ring, addr & 0xfffffffc); 6435 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6436 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6437 amdgpu_ring_write(ring, lower_32_bits(seq)); 6438 amdgpu_ring_write(ring, upper_32_bits(seq)); 6439 6440 } 6441 6442 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6443 { 6444 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6445 uint32_t seq = ring->fence_drv.sync_seq; 6446 uint64_t addr = ring->fence_drv.gpu_addr; 6447 6448 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6449 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6450 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6451 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6452 amdgpu_ring_write(ring, addr & 0xfffffffc); 6453 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6454 amdgpu_ring_write(ring, seq); 6455 amdgpu_ring_write(ring, 0xffffffff); 6456 amdgpu_ring_write(ring, 4); /* poll interval */ 6457 } 6458 6459 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6460 unsigned vmid, uint64_t pd_addr) 6461 { 6462 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6463 6464 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6465 6466 /* wait for the invalidate to complete */ 6467 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6468 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6469 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6470 WAIT_REG_MEM_ENGINE(0))); /* me */ 6471 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6472 amdgpu_ring_write(ring, 0); 6473 amdgpu_ring_write(ring, 0); /* ref */ 6474 amdgpu_ring_write(ring, 0); /* mask */ 6475 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6476 6477 /* compute doesn't have PFP */ 6478 if (usepfp) { 6479 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6480 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6481 amdgpu_ring_write(ring, 0x0); 6482 } 6483 } 6484 6485 static uint64_t gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6486 { 6487 return ring->adev->wb.wb[ring->wptr_offs]; 6488 } 6489 6490 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6491 { 6492 struct amdgpu_device *adev = ring->adev; 6493 6494 /* XXX check if swapping is necessary on BE */ 6495 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6496 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6497 } 6498 6499 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6500 bool acquire) 6501 { 6502 struct amdgpu_device *adev = ring->adev; 6503 int pipe_num, tmp, reg; 6504 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6505 6506 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6507 6508 /* first me only has 2 entries, GFX and HP3D */ 6509 if (ring->me > 0) 6510 pipe_num -= 2; 6511 6512 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6513 tmp = RREG32(reg); 6514 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6515 WREG32(reg, tmp); 6516 } 6517 6518 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6519 struct amdgpu_ring *ring, 6520 bool acquire) 6521 { 6522 int i, pipe; 6523 bool reserve; 6524 struct amdgpu_ring *iring; 6525 6526 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6527 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6528 if (acquire) 6529 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6530 else 6531 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6532 6533 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6534 /* Clear all reservations - everyone reacquires all resources */ 6535 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6536 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6537 true); 6538 6539 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6540 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6541 true); 6542 } else { 6543 /* Lower all pipes without a current reservation */ 6544 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6545 iring = &adev->gfx.gfx_ring[i]; 6546 pipe = amdgpu_gfx_queue_to_bit(adev, 6547 iring->me, 6548 iring->pipe, 6549 0); 6550 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6551 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6552 } 6553 6554 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6555 iring = &adev->gfx.compute_ring[i]; 6556 pipe = amdgpu_gfx_queue_to_bit(adev, 6557 iring->me, 6558 iring->pipe, 6559 0); 6560 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6561 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6562 } 6563 } 6564 6565 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6566 } 6567 6568 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6569 struct amdgpu_ring *ring, 6570 bool acquire) 6571 { 6572 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6573 uint32_t queue_priority = acquire ? 0xf : 0x0; 6574 6575 mutex_lock(&adev->srbm_mutex); 6576 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6577 6578 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6579 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6580 6581 vi_srbm_select(adev, 0, 0, 0, 0); 6582 mutex_unlock(&adev->srbm_mutex); 6583 } 6584 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6585 enum drm_sched_priority priority) 6586 { 6587 struct amdgpu_device *adev = ring->adev; 6588 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6589 6590 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6591 return; 6592 6593 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6594 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6595 } 6596 6597 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6598 uint64_t addr, uint64_t seq, 6599 unsigned flags) 6600 { 6601 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6602 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6603 6604 /* RELEASE_MEM - flush caches, send int */ 6605 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6606 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6607 EOP_TC_ACTION_EN | 6608 EOP_TC_WB_ACTION_EN | 6609 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6610 EVENT_INDEX(5))); 6611 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6612 amdgpu_ring_write(ring, addr & 0xfffffffc); 6613 amdgpu_ring_write(ring, upper_32_bits(addr)); 6614 amdgpu_ring_write(ring, lower_32_bits(seq)); 6615 amdgpu_ring_write(ring, upper_32_bits(seq)); 6616 } 6617 6618 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, uint64_t addr, 6619 uint64_t seq, unsigned int flags) 6620 { 6621 /* we only allocate 32bit for each seq wb address */ 6622 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6623 6624 /* write fence seq to the "addr" */ 6625 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6626 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6627 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6628 amdgpu_ring_write(ring, lower_32_bits(addr)); 6629 amdgpu_ring_write(ring, upper_32_bits(addr)); 6630 amdgpu_ring_write(ring, lower_32_bits(seq)); 6631 6632 if (flags & AMDGPU_FENCE_FLAG_INT) { 6633 /* set register to trigger INT */ 6634 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6635 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6636 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6637 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6638 amdgpu_ring_write(ring, 0); 6639 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6640 } 6641 } 6642 6643 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6644 { 6645 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6646 amdgpu_ring_write(ring, 0); 6647 } 6648 6649 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6650 { 6651 uint32_t dw2 = 0; 6652 6653 if (amdgpu_sriov_vf(ring->adev)) 6654 gfx_v8_0_ring_emit_ce_meta(ring); 6655 6656 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6657 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6658 gfx_v8_0_ring_emit_vgt_flush(ring); 6659 /* set load_global_config & load_global_uconfig */ 6660 dw2 |= 0x8001; 6661 /* set load_cs_sh_regs */ 6662 dw2 |= 0x01000000; 6663 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6664 dw2 |= 0x10002; 6665 6666 /* set load_ce_ram if preamble presented */ 6667 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6668 dw2 |= 0x10000000; 6669 } else { 6670 /* still load_ce_ram if this is the first time preamble presented 6671 * although there is no context switch happens. 6672 */ 6673 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6674 dw2 |= 0x10000000; 6675 } 6676 6677 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6678 amdgpu_ring_write(ring, dw2); 6679 amdgpu_ring_write(ring, 0); 6680 } 6681 6682 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6683 { 6684 unsigned ret; 6685 6686 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6687 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6688 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6689 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6690 ret = ring->wptr & ring->buf_mask; 6691 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6692 return ret; 6693 } 6694 6695 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6696 { 6697 unsigned cur; 6698 6699 BUG_ON(offset > ring->buf_mask); 6700 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6701 6702 cur = (ring->wptr & ring->buf_mask) - 1; 6703 if (likely(cur > offset)) 6704 ring->ring[offset] = cur - offset; 6705 else 6706 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6707 } 6708 6709 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6710 { 6711 struct amdgpu_device *adev = ring->adev; 6712 6713 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6714 amdgpu_ring_write(ring, 0 | /* src: register*/ 6715 (5 << 8) | /* dst: memory */ 6716 (1 << 20)); /* write confirm */ 6717 amdgpu_ring_write(ring, reg); 6718 amdgpu_ring_write(ring, 0); 6719 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6720 adev->virt.reg_val_offs * 4)); 6721 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6722 adev->virt.reg_val_offs * 4)); 6723 } 6724 6725 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6726 uint32_t val) 6727 { 6728 uint32_t cmd; 6729 6730 switch (ring->funcs->type) { 6731 case AMDGPU_RING_TYPE_GFX: 6732 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6733 break; 6734 case AMDGPU_RING_TYPE_KIQ: 6735 cmd = 1 << 16; /* no inc addr */ 6736 break; 6737 default: 6738 cmd = WR_CONFIRM; 6739 break; 6740 } 6741 6742 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6743 amdgpu_ring_write(ring, cmd); 6744 amdgpu_ring_write(ring, reg); 6745 amdgpu_ring_write(ring, 0); 6746 amdgpu_ring_write(ring, val); 6747 } 6748 6749 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6750 enum amdgpu_interrupt_state state) 6751 { 6752 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6753 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6754 } 6755 6756 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6757 int me, int pipe, 6758 enum amdgpu_interrupt_state state) 6759 { 6760 u32 mec_int_cntl, mec_int_cntl_reg; 6761 6762 /* 6763 * amdgpu controls only the first MEC. That's why this function only 6764 * handles the setting of interrupts for this specific MEC. All other 6765 * pipes' interrupts are set by amdkfd. 6766 */ 6767 6768 if (me == 1) { 6769 switch (pipe) { 6770 case 0: 6771 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6772 break; 6773 case 1: 6774 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6775 break; 6776 case 2: 6777 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6778 break; 6779 case 3: 6780 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6781 break; 6782 default: 6783 DRM_DEBUG("invalid pipe %d\n", pipe); 6784 return; 6785 } 6786 } else { 6787 DRM_DEBUG("invalid me %d\n", me); 6788 return; 6789 } 6790 6791 switch (state) { 6792 case AMDGPU_IRQ_STATE_DISABLE: 6793 mec_int_cntl = RREG32(mec_int_cntl_reg); 6794 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6795 WREG32(mec_int_cntl_reg, mec_int_cntl); 6796 break; 6797 case AMDGPU_IRQ_STATE_ENABLE: 6798 mec_int_cntl = RREG32(mec_int_cntl_reg); 6799 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6800 WREG32(mec_int_cntl_reg, mec_int_cntl); 6801 break; 6802 default: 6803 break; 6804 } 6805 } 6806 6807 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6808 struct amdgpu_irq_src *source, 6809 unsigned type, 6810 enum amdgpu_interrupt_state state) 6811 { 6812 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6813 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6814 6815 return 0; 6816 } 6817 6818 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6819 struct amdgpu_irq_src *source, 6820 unsigned type, 6821 enum amdgpu_interrupt_state state) 6822 { 6823 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6824 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6825 6826 return 0; 6827 } 6828 6829 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6830 struct amdgpu_irq_src *src, 6831 unsigned type, 6832 enum amdgpu_interrupt_state state) 6833 { 6834 switch (type) { 6835 case AMDGPU_CP_IRQ_GFX_EOP: 6836 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6837 break; 6838 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6839 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6840 break; 6841 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6842 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6843 break; 6844 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6845 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6846 break; 6847 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6848 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6849 break; 6850 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6851 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6852 break; 6853 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6854 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6855 break; 6856 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6857 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6858 break; 6859 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6860 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6861 break; 6862 default: 6863 break; 6864 } 6865 return 0; 6866 } 6867 6868 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6869 struct amdgpu_irq_src *source, 6870 unsigned int type, 6871 enum amdgpu_interrupt_state state) 6872 { 6873 int enable_flag; 6874 6875 switch (state) { 6876 case AMDGPU_IRQ_STATE_DISABLE: 6877 enable_flag = 0; 6878 break; 6879 6880 case AMDGPU_IRQ_STATE_ENABLE: 6881 enable_flag = 1; 6882 break; 6883 6884 default: 6885 return -EINVAL; 6886 } 6887 6888 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6889 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6890 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6891 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6892 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6893 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6894 enable_flag); 6895 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6896 enable_flag); 6897 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6898 enable_flag); 6899 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6900 enable_flag); 6901 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6902 enable_flag); 6903 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6904 enable_flag); 6905 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6906 enable_flag); 6907 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6908 enable_flag); 6909 6910 return 0; 6911 } 6912 6913 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6914 struct amdgpu_irq_src *source, 6915 unsigned int type, 6916 enum amdgpu_interrupt_state state) 6917 { 6918 int enable_flag; 6919 6920 switch (state) { 6921 case AMDGPU_IRQ_STATE_DISABLE: 6922 enable_flag = 1; 6923 break; 6924 6925 case AMDGPU_IRQ_STATE_ENABLE: 6926 enable_flag = 0; 6927 break; 6928 6929 default: 6930 return -EINVAL; 6931 } 6932 6933 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6934 enable_flag); 6935 6936 return 0; 6937 } 6938 6939 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6940 struct amdgpu_irq_src *source, 6941 struct amdgpu_iv_entry *entry) 6942 { 6943 int i; 6944 u8 me_id, pipe_id, queue_id; 6945 struct amdgpu_ring *ring; 6946 6947 DRM_DEBUG("IH: CP EOP\n"); 6948 me_id = (entry->ring_id & 0x0c) >> 2; 6949 pipe_id = (entry->ring_id & 0x03) >> 0; 6950 queue_id = (entry->ring_id & 0x70) >> 4; 6951 6952 switch (me_id) { 6953 case 0: 6954 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6955 break; 6956 case 1: 6957 case 2: 6958 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6959 ring = &adev->gfx.compute_ring[i]; 6960 /* Per-queue interrupt is supported for MEC starting from VI. 6961 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6962 */ 6963 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6964 amdgpu_fence_process(ring); 6965 } 6966 break; 6967 } 6968 return 0; 6969 } 6970 6971 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6972 struct amdgpu_irq_src *source, 6973 struct amdgpu_iv_entry *entry) 6974 { 6975 DRM_ERROR("Illegal register access in command stream\n"); 6976 schedule_work(&adev->reset_work); 6977 return 0; 6978 } 6979 6980 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6981 struct amdgpu_irq_src *source, 6982 struct amdgpu_iv_entry *entry) 6983 { 6984 DRM_ERROR("Illegal instruction in command stream\n"); 6985 schedule_work(&adev->reset_work); 6986 return 0; 6987 } 6988 6989 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6990 struct amdgpu_irq_src *source, 6991 struct amdgpu_iv_entry *entry) 6992 { 6993 DRM_ERROR("CP EDC/ECC error detected."); 6994 return 0; 6995 } 6996 6997 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6998 { 6999 u32 enc, se_id, sh_id, cu_id; 7000 char type[20]; 7001 int sq_edc_source = -1; 7002 7003 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 7004 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 7005 7006 switch (enc) { 7007 case 0: 7008 DRM_INFO("SQ general purpose intr detected:" 7009 "se_id %d, immed_overflow %d, host_reg_overflow %d," 7010 "host_cmd_overflow %d, cmd_timestamp %d," 7011 "reg_timestamp %d, thread_trace_buff_full %d," 7012 "wlt %d, thread_trace %d.\n", 7013 se_id, 7014 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 7015 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 7016 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 7017 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 7018 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 7019 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 7020 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 7021 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 7022 ); 7023 break; 7024 case 1: 7025 case 2: 7026 7027 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 7028 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 7029 7030 /* 7031 * This function can be called either directly from ISR 7032 * or from BH in which case we can access SQ_EDC_INFO 7033 * instance 7034 */ 7035 if (in_task()) { 7036 mutex_lock(&adev->grbm_idx_mutex); 7037 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 7038 7039 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 7040 7041 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7042 mutex_unlock(&adev->grbm_idx_mutex); 7043 } 7044 7045 if (enc == 1) 7046 sprintf(type, "instruction intr"); 7047 else 7048 sprintf(type, "EDC/ECC error"); 7049 7050 DRM_INFO( 7051 "SQ %s detected: " 7052 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 7053 "trap %s, sq_ed_info.source %s.\n", 7054 type, se_id, sh_id, cu_id, 7055 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 7056 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 7057 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 7058 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 7059 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 7060 ); 7061 break; 7062 default: 7063 DRM_ERROR("SQ invalid encoding type\n."); 7064 } 7065 } 7066 7067 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 7068 { 7069 7070 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 7071 struct sq_work *sq_work = container_of(work, struct sq_work, work); 7072 7073 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 7074 } 7075 7076 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 7077 struct amdgpu_irq_src *source, 7078 struct amdgpu_iv_entry *entry) 7079 { 7080 unsigned ih_data = entry->src_data[0]; 7081 7082 /* 7083 * Try to submit work so SQ_EDC_INFO can be accessed from 7084 * BH. If previous work submission hasn't finished yet 7085 * just print whatever info is possible directly from the ISR. 7086 */ 7087 if (work_pending(&adev->gfx.sq_work.work)) { 7088 gfx_v8_0_parse_sq_irq(adev, ih_data); 7089 } else { 7090 adev->gfx.sq_work.ih_data = ih_data; 7091 schedule_work(&adev->gfx.sq_work.work); 7092 } 7093 7094 return 0; 7095 } 7096 7097 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 7098 struct amdgpu_irq_src *src, 7099 unsigned int type, 7100 enum amdgpu_interrupt_state state) 7101 { 7102 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7103 7104 switch (type) { 7105 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 7106 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 7107 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7108 if (ring->me == 1) 7109 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 7110 ring->pipe, 7111 GENERIC2_INT_ENABLE, 7112 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7113 else 7114 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 7115 ring->pipe, 7116 GENERIC2_INT_ENABLE, 7117 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7118 break; 7119 default: 7120 BUG(); /* kiq only support GENERIC2_INT now */ 7121 break; 7122 } 7123 return 0; 7124 } 7125 7126 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7127 struct amdgpu_irq_src *source, 7128 struct amdgpu_iv_entry *entry) 7129 { 7130 u8 me_id, pipe_id, queue_id; 7131 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7132 7133 me_id = (entry->ring_id & 0x0c) >> 2; 7134 pipe_id = (entry->ring_id & 0x03) >> 0; 7135 queue_id = (entry->ring_id & 0x70) >> 4; 7136 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7137 me_id, pipe_id, queue_id); 7138 7139 amdgpu_fence_process(ring); 7140 return 0; 7141 } 7142 7143 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7144 .name = "gfx_v8_0", 7145 .early_init = gfx_v8_0_early_init, 7146 .late_init = gfx_v8_0_late_init, 7147 .sw_init = gfx_v8_0_sw_init, 7148 .sw_fini = gfx_v8_0_sw_fini, 7149 .hw_init = gfx_v8_0_hw_init, 7150 .hw_fini = gfx_v8_0_hw_fini, 7151 .suspend = gfx_v8_0_suspend, 7152 .resume = gfx_v8_0_resume, 7153 .is_idle = gfx_v8_0_is_idle, 7154 .wait_for_idle = gfx_v8_0_wait_for_idle, 7155 .check_soft_reset = gfx_v8_0_check_soft_reset, 7156 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7157 .soft_reset = gfx_v8_0_soft_reset, 7158 .post_soft_reset = gfx_v8_0_post_soft_reset, 7159 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7160 .set_powergating_state = gfx_v8_0_set_powergating_state, 7161 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7162 }; 7163 7164 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7165 .type = AMDGPU_RING_TYPE_GFX, 7166 .align_mask = 0xff, 7167 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7168 .support_64bit_ptrs = false, 7169 .get_rptr = gfx_v8_0_ring_get_rptr, 7170 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7171 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7172 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7173 5 + /* COND_EXEC */ 7174 7 + /* PIPELINE_SYNC */ 7175 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 7176 12 + /* FENCE for VM_FLUSH */ 7177 20 + /* GDS switch */ 7178 4 + /* double SWITCH_BUFFER, 7179 the first COND_EXEC jump to the place just 7180 prior to this double SWITCH_BUFFER */ 7181 5 + /* COND_EXEC */ 7182 7 + /* HDP_flush */ 7183 4 + /* VGT_flush */ 7184 14 + /* CE_META */ 7185 31 + /* DE_META */ 7186 3 + /* CNTX_CTRL */ 7187 5 + /* HDP_INVL */ 7188 12 + 12 + /* FENCE x2 */ 7189 2, /* SWITCH_BUFFER */ 7190 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7191 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7192 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7193 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7194 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7195 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7196 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7197 .test_ring = gfx_v8_0_ring_test_ring, 7198 .test_ib = gfx_v8_0_ring_test_ib, 7199 .insert_nop = amdgpu_ring_insert_nop, 7200 .pad_ib = amdgpu_ring_generic_pad_ib, 7201 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7202 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7203 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7204 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7205 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7206 }; 7207 7208 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7209 .type = AMDGPU_RING_TYPE_COMPUTE, 7210 .align_mask = 0xff, 7211 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7212 .support_64bit_ptrs = false, 7213 .get_rptr = gfx_v8_0_ring_get_rptr, 7214 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7215 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7216 .emit_frame_size = 7217 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7218 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7219 5 + /* hdp_invalidate */ 7220 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7221 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7222 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7223 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7224 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7225 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7226 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7227 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7228 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7229 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7230 .test_ring = gfx_v8_0_ring_test_ring, 7231 .test_ib = gfx_v8_0_ring_test_ib, 7232 .insert_nop = amdgpu_ring_insert_nop, 7233 .pad_ib = amdgpu_ring_generic_pad_ib, 7234 .set_priority = gfx_v8_0_ring_set_priority_compute, 7235 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7236 }; 7237 7238 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7239 .type = AMDGPU_RING_TYPE_KIQ, 7240 .align_mask = 0xff, 7241 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7242 .support_64bit_ptrs = false, 7243 .get_rptr = gfx_v8_0_ring_get_rptr, 7244 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7245 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7246 .emit_frame_size = 7247 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7248 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7249 5 + /* hdp_invalidate */ 7250 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7251 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7252 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7253 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7254 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7255 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7256 .test_ring = gfx_v8_0_ring_test_ring, 7257 .test_ib = gfx_v8_0_ring_test_ib, 7258 .insert_nop = amdgpu_ring_insert_nop, 7259 .pad_ib = amdgpu_ring_generic_pad_ib, 7260 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7261 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7262 }; 7263 7264 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7265 { 7266 int i; 7267 7268 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7269 7270 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7271 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7272 7273 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7274 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7275 } 7276 7277 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7278 .set = gfx_v8_0_set_eop_interrupt_state, 7279 .process = gfx_v8_0_eop_irq, 7280 }; 7281 7282 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7283 .set = gfx_v8_0_set_priv_reg_fault_state, 7284 .process = gfx_v8_0_priv_reg_irq, 7285 }; 7286 7287 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7288 .set = gfx_v8_0_set_priv_inst_fault_state, 7289 .process = gfx_v8_0_priv_inst_irq, 7290 }; 7291 7292 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7293 .set = gfx_v8_0_kiq_set_interrupt_state, 7294 .process = gfx_v8_0_kiq_irq, 7295 }; 7296 7297 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7298 .set = gfx_v8_0_set_cp_ecc_int_state, 7299 .process = gfx_v8_0_cp_ecc_error_irq, 7300 }; 7301 7302 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7303 .set = gfx_v8_0_set_sq_int_state, 7304 .process = gfx_v8_0_sq_irq, 7305 }; 7306 7307 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7308 { 7309 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7310 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7311 7312 adev->gfx.priv_reg_irq.num_types = 1; 7313 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7314 7315 adev->gfx.priv_inst_irq.num_types = 1; 7316 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7317 7318 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7319 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7320 7321 adev->gfx.cp_ecc_error_irq.num_types = 1; 7322 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7323 7324 adev->gfx.sq_irq.num_types = 1; 7325 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7326 } 7327 7328 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7329 { 7330 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7331 } 7332 7333 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7334 { 7335 /* init asci gds info */ 7336 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7337 adev->gds.gws.total_size = 64; 7338 adev->gds.oa.total_size = 16; 7339 7340 if (adev->gds.mem.total_size == 64 * 1024) { 7341 adev->gds.mem.gfx_partition_size = 4096; 7342 adev->gds.mem.cs_partition_size = 4096; 7343 7344 adev->gds.gws.gfx_partition_size = 4; 7345 adev->gds.gws.cs_partition_size = 4; 7346 7347 adev->gds.oa.gfx_partition_size = 4; 7348 adev->gds.oa.cs_partition_size = 1; 7349 } else { 7350 adev->gds.mem.gfx_partition_size = 1024; 7351 adev->gds.mem.cs_partition_size = 1024; 7352 7353 adev->gds.gws.gfx_partition_size = 16; 7354 adev->gds.gws.cs_partition_size = 16; 7355 7356 adev->gds.oa.gfx_partition_size = 4; 7357 adev->gds.oa.cs_partition_size = 4; 7358 } 7359 } 7360 7361 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7362 u32 bitmap) 7363 { 7364 u32 data; 7365 7366 if (!bitmap) 7367 return; 7368 7369 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7370 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7371 7372 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7373 } 7374 7375 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7376 { 7377 u32 data, mask; 7378 7379 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7380 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7381 7382 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7383 7384 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7385 } 7386 7387 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7388 { 7389 int i, j, k, counter, active_cu_number = 0; 7390 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7391 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7392 unsigned disable_masks[4 * 2]; 7393 u32 ao_cu_num; 7394 7395 memset(cu_info, 0, sizeof(*cu_info)); 7396 7397 if (adev->flags & AMD_IS_APU) 7398 ao_cu_num = 2; 7399 else 7400 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7401 7402 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7403 7404 mutex_lock(&adev->grbm_idx_mutex); 7405 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7406 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7407 mask = 1; 7408 ao_bitmap = 0; 7409 counter = 0; 7410 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7411 if (i < 4 && j < 2) 7412 gfx_v8_0_set_user_cu_inactive_bitmap( 7413 adev, disable_masks[i * 2 + j]); 7414 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7415 cu_info->bitmap[i][j] = bitmap; 7416 7417 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7418 if (bitmap & mask) { 7419 if (counter < ao_cu_num) 7420 ao_bitmap |= mask; 7421 counter ++; 7422 } 7423 mask <<= 1; 7424 } 7425 active_cu_number += counter; 7426 if (i < 2 && j < 2) 7427 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7428 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7429 } 7430 } 7431 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7432 mutex_unlock(&adev->grbm_idx_mutex); 7433 7434 cu_info->number = active_cu_number; 7435 cu_info->ao_cu_mask = ao_cu_mask; 7436 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7437 cu_info->max_waves_per_simd = 10; 7438 cu_info->max_scratch_slots_per_cu = 32; 7439 cu_info->wave_front_size = 64; 7440 cu_info->lds_size = 64; 7441 } 7442 7443 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7444 { 7445 .type = AMD_IP_BLOCK_TYPE_GFX, 7446 .major = 8, 7447 .minor = 0, 7448 .rev = 0, 7449 .funcs = &gfx_v8_0_ip_funcs, 7450 }; 7451 7452 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7453 { 7454 .type = AMD_IP_BLOCK_TYPE_GFX, 7455 .major = 8, 7456 .minor = 1, 7457 .rev = 0, 7458 .funcs = &gfx_v8_0_ip_funcs, 7459 }; 7460 7461 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7462 { 7463 uint64_t ce_payload_addr; 7464 int cnt_ce; 7465 union { 7466 struct vi_ce_ib_state regular; 7467 struct vi_ce_ib_state_chained_ib chained; 7468 } ce_payload = {}; 7469 7470 if (ring->adev->virt.chained_ib_support) { 7471 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7472 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7473 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7474 } else { 7475 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7476 offsetof(struct vi_gfx_meta_data, ce_payload); 7477 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7478 } 7479 7480 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7481 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7482 WRITE_DATA_DST_SEL(8) | 7483 WR_CONFIRM) | 7484 WRITE_DATA_CACHE_POLICY(0)); 7485 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7486 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7487 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7488 } 7489 7490 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7491 { 7492 uint64_t de_payload_addr, gds_addr, csa_addr; 7493 int cnt_de; 7494 union { 7495 struct vi_de_ib_state regular; 7496 struct vi_de_ib_state_chained_ib chained; 7497 } de_payload = {}; 7498 7499 csa_addr = amdgpu_csa_vaddr(ring->adev); 7500 gds_addr = csa_addr + 4096; 7501 if (ring->adev->virt.chained_ib_support) { 7502 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7503 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7504 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7505 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7506 } else { 7507 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7508 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7509 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7510 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7511 } 7512 7513 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7514 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7515 WRITE_DATA_DST_SEL(8) | 7516 WR_CONFIRM) | 7517 WRITE_DATA_CACHE_POLICY(0)); 7518 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7519 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7520 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7521 } 7522