1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #include "ivsrcid/ivsrcid_vislands30.h" 55 56 #define GFX8_NUM_GFX_RINGS 1 57 #define GFX8_MEC_HPD_SIZE 2048 58 59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 63 64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 73 74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 80 81 /* BPM SERDES CMD */ 82 #define SET_BPM_SERDES_CMD 1 83 #define CLE_BPM_SERDES_CMD 0 84 85 /* BPM Register Address*/ 86 enum { 87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 92 BPM_REG_FGCG_MAX 93 }; 94 95 #define RLC_FormatDirectRegListLength 14 96 97 MODULE_FIRMWARE("amdgpufw_carrizo_ce"); 98 MODULE_FIRMWARE("amdgpufw_carrizo_pfp"); 99 MODULE_FIRMWARE("amdgpufw_carrizo_me"); 100 MODULE_FIRMWARE("amdgpufw_carrizo_mec"); 101 MODULE_FIRMWARE("amdgpufw_carrizo_mec2"); 102 MODULE_FIRMWARE("amdgpufw_carrizo_rlc"); 103 104 MODULE_FIRMWARE("amdgpufw_stoney_ce"); 105 MODULE_FIRMWARE("amdgpufw_stoney_pfp"); 106 MODULE_FIRMWARE("amdgpufw_stoney_me"); 107 MODULE_FIRMWARE("amdgpufw_stoney_mec"); 108 MODULE_FIRMWARE("amdgpufw_stoney_rlc"); 109 110 MODULE_FIRMWARE("amdgpufw_tonga_ce"); 111 MODULE_FIRMWARE("amdgpufw_tonga_pfp"); 112 MODULE_FIRMWARE("amdgpufw_tonga_me"); 113 MODULE_FIRMWARE("amdgpufw_tonga_mec"); 114 MODULE_FIRMWARE("amdgpufw_tonga_mec2"); 115 MODULE_FIRMWARE("amdgpufw_tonga_rlc"); 116 117 MODULE_FIRMWARE("amdgpufw_topaz_ce"); 118 MODULE_FIRMWARE("amdgpufw_topaz_pfp"); 119 MODULE_FIRMWARE("amdgpufw_topaz_me"); 120 MODULE_FIRMWARE("amdgpufw_topaz_mec"); 121 MODULE_FIRMWARE("amdgpufw_topaz_rlc"); 122 123 MODULE_FIRMWARE("amdgpufw_fiji_ce"); 124 MODULE_FIRMWARE("amdgpufw_fiji_pfp"); 125 MODULE_FIRMWARE("amdgpufw_fiji_me"); 126 MODULE_FIRMWARE("amdgpufw_fiji_mec"); 127 MODULE_FIRMWARE("amdgpufw_fiji_mec2"); 128 MODULE_FIRMWARE("amdgpufw_fiji_rlc"); 129 130 MODULE_FIRMWARE("amdgpufw_polaris10_ce"); 131 MODULE_FIRMWARE("amdgpufw_polaris10_ce_2"); 132 MODULE_FIRMWARE("amdgpufw_polaris10_pfp"); 133 MODULE_FIRMWARE("amdgpufw_polaris10_pfp_2"); 134 MODULE_FIRMWARE("amdgpufw_polaris10_me"); 135 MODULE_FIRMWARE("amdgpufw_polaris10_me_2"); 136 MODULE_FIRMWARE("amdgpufw_polaris10_mec"); 137 MODULE_FIRMWARE("amdgpufw_polaris10_mec_2"); 138 MODULE_FIRMWARE("amdgpufw_polaris10_mec2"); 139 MODULE_FIRMWARE("amdgpufw_polaris10_mec2_2"); 140 MODULE_FIRMWARE("amdgpufw_polaris10_rlc"); 141 142 MODULE_FIRMWARE("amdgpufw_polaris11_ce"); 143 MODULE_FIRMWARE("amdgpufw_polaris11_ce_2"); 144 MODULE_FIRMWARE("amdgpufw_polaris11_pfp"); 145 MODULE_FIRMWARE("amdgpufw_polaris11_pfp_2"); 146 MODULE_FIRMWARE("amdgpufw_polaris11_me"); 147 MODULE_FIRMWARE("amdgpufw_polaris11_me_2"); 148 MODULE_FIRMWARE("amdgpufw_polaris11_mec"); 149 MODULE_FIRMWARE("amdgpufw_polaris11_mec_2"); 150 MODULE_FIRMWARE("amdgpufw_polaris11_mec2"); 151 MODULE_FIRMWARE("amdgpufw_polaris11_mec2_2"); 152 MODULE_FIRMWARE("amdgpufw_polaris11_rlc"); 153 154 MODULE_FIRMWARE("amdgpufw_polaris12_ce"); 155 MODULE_FIRMWARE("amdgpufw_polaris12_ce_2"); 156 MODULE_FIRMWARE("amdgpufw_polaris12_pfp"); 157 MODULE_FIRMWARE("amdgpufw_polaris12_pfp_2"); 158 MODULE_FIRMWARE("amdgpufw_polaris12_me"); 159 MODULE_FIRMWARE("amdgpufw_polaris12_me_2"); 160 MODULE_FIRMWARE("amdgpufw_polaris12_mec"); 161 MODULE_FIRMWARE("amdgpufw_polaris12_mec_2"); 162 MODULE_FIRMWARE("amdgpufw_polaris12_mec2"); 163 MODULE_FIRMWARE("amdgpufw_polaris12_mec2_2"); 164 MODULE_FIRMWARE("amdgpufw_polaris12_rlc"); 165 166 MODULE_FIRMWARE("amdgpufw_vegam_ce"); 167 MODULE_FIRMWARE("amdgpufw_vegam_pfp"); 168 MODULE_FIRMWARE("amdgpufw_vegam_me"); 169 MODULE_FIRMWARE("amdgpufw_vegam_mec"); 170 MODULE_FIRMWARE("amdgpufw_vegam_mec2"); 171 MODULE_FIRMWARE("amdgpufw_vegam_rlc"); 172 173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 174 { 175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 191 }; 192 193 static const u32 golden_settings_tonga_a11[] = 194 { 195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 197 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 198 mmGB_GPU_ID, 0x0000000f, 0x00000000, 199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 205 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 211 }; 212 213 static const u32 tonga_golden_common_all[] = 214 { 215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 223 }; 224 225 static const u32 tonga_mgcg_cgcg_init[] = 226 { 227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 302 }; 303 304 static const u32 golden_settings_vegam_a11[] = 305 { 306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 309 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 316 mmSQ_CONFIG, 0x07f80000, 0x01180000, 317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 318 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 vegam_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 333 }; 334 335 static const u32 golden_settings_polaris11_a11[] = 336 { 337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 340 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 347 mmSQ_CONFIG, 0x07f80000, 0x01180000, 348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 349 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 354 }; 355 356 static const u32 polaris11_golden_common_all[] = 357 { 358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 364 }; 365 366 static const u32 golden_settings_polaris10_a11[] = 367 { 368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 372 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 379 mmSQ_CONFIG, 0x07f80000, 0x07180000, 380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 381 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 385 }; 386 387 static const u32 polaris10_golden_common_all[] = 388 { 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 397 }; 398 399 static const u32 fiji_golden_common_all[] = 400 { 401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 411 }; 412 413 static const u32 golden_settings_fiji_a10[] = 414 { 415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 416 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 422 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 426 }; 427 428 static const u32 fiji_mgcg_cgcg_init[] = 429 { 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 465 }; 466 467 static const u32 golden_settings_iceland_a11[] = 468 { 469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 470 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 471 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 472 mmGB_GPU_ID, 0x0000000f, 0x00000000, 473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 480 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 485 }; 486 487 static const u32 iceland_golden_common_all[] = 488 { 489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 497 }; 498 499 static const u32 iceland_mgcg_cgcg_init[] = 500 { 501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 565 }; 566 567 static const u32 cz_golden_settings_a11[] = 568 { 569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 570 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 571 mmGB_GPU_ID, 0x0000000f, 0x00000000, 572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 581 }; 582 583 static const u32 cz_golden_common_all[] = 584 { 585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 593 }; 594 595 static const u32 cz_mgcg_cgcg_init[] = 596 { 597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 672 }; 673 674 static const u32 stoney_golden_settings_a11[] = 675 { 676 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 677 mmGB_GPU_ID, 0x0000000f, 0x00000000, 678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 686 }; 687 688 static const u32 stoney_golden_common_all[] = 689 { 690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 698 }; 699 700 static const u32 stoney_mgcg_cgcg_init[] = 701 { 702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 707 }; 708 709 710 static const char * const sq_edc_source_names[] = { 711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 718 }; 719 720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 728 729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 730 { 731 switch (adev->asic_type) { 732 case CHIP_TOPAZ: 733 amdgpu_device_program_register_sequence(adev, 734 iceland_mgcg_cgcg_init, 735 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 736 amdgpu_device_program_register_sequence(adev, 737 golden_settings_iceland_a11, 738 ARRAY_SIZE(golden_settings_iceland_a11)); 739 amdgpu_device_program_register_sequence(adev, 740 iceland_golden_common_all, 741 ARRAY_SIZE(iceland_golden_common_all)); 742 break; 743 case CHIP_FIJI: 744 amdgpu_device_program_register_sequence(adev, 745 fiji_mgcg_cgcg_init, 746 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 747 amdgpu_device_program_register_sequence(adev, 748 golden_settings_fiji_a10, 749 ARRAY_SIZE(golden_settings_fiji_a10)); 750 amdgpu_device_program_register_sequence(adev, 751 fiji_golden_common_all, 752 ARRAY_SIZE(fiji_golden_common_all)); 753 break; 754 755 case CHIP_TONGA: 756 amdgpu_device_program_register_sequence(adev, 757 tonga_mgcg_cgcg_init, 758 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 759 amdgpu_device_program_register_sequence(adev, 760 golden_settings_tonga_a11, 761 ARRAY_SIZE(golden_settings_tonga_a11)); 762 amdgpu_device_program_register_sequence(adev, 763 tonga_golden_common_all, 764 ARRAY_SIZE(tonga_golden_common_all)); 765 break; 766 case CHIP_VEGAM: 767 amdgpu_device_program_register_sequence(adev, 768 golden_settings_vegam_a11, 769 ARRAY_SIZE(golden_settings_vegam_a11)); 770 amdgpu_device_program_register_sequence(adev, 771 vegam_golden_common_all, 772 ARRAY_SIZE(vegam_golden_common_all)); 773 break; 774 case CHIP_POLARIS11: 775 case CHIP_POLARIS12: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_polaris11_a11, 778 ARRAY_SIZE(golden_settings_polaris11_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 polaris11_golden_common_all, 781 ARRAY_SIZE(polaris11_golden_common_all)); 782 break; 783 case CHIP_POLARIS10: 784 amdgpu_device_program_register_sequence(adev, 785 golden_settings_polaris10_a11, 786 ARRAY_SIZE(golden_settings_polaris10_a11)); 787 amdgpu_device_program_register_sequence(adev, 788 polaris10_golden_common_all, 789 ARRAY_SIZE(polaris10_golden_common_all)); 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 791 if (adev->pdev->revision == 0xc7 && 792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 797 } 798 break; 799 case CHIP_CARRIZO: 800 amdgpu_device_program_register_sequence(adev, 801 cz_mgcg_cgcg_init, 802 ARRAY_SIZE(cz_mgcg_cgcg_init)); 803 amdgpu_device_program_register_sequence(adev, 804 cz_golden_settings_a11, 805 ARRAY_SIZE(cz_golden_settings_a11)); 806 amdgpu_device_program_register_sequence(adev, 807 cz_golden_common_all, 808 ARRAY_SIZE(cz_golden_common_all)); 809 break; 810 case CHIP_STONEY: 811 amdgpu_device_program_register_sequence(adev, 812 stoney_mgcg_cgcg_init, 813 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 814 amdgpu_device_program_register_sequence(adev, 815 stoney_golden_settings_a11, 816 ARRAY_SIZE(stoney_golden_settings_a11)); 817 amdgpu_device_program_register_sequence(adev, 818 stoney_golden_common_all, 819 ARRAY_SIZE(stoney_golden_common_all)); 820 break; 821 default: 822 break; 823 } 824 } 825 826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 827 { 828 adev->gfx.scratch.num_reg = 8; 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 831 } 832 833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 uint32_t scratch; 837 uint32_t tmp = 0; 838 unsigned i; 839 int r; 840 841 r = amdgpu_gfx_scratch_get(adev, &scratch); 842 if (r) { 843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 844 return r; 845 } 846 WREG32(scratch, 0xCAFEDEAD); 847 r = amdgpu_ring_alloc(ring, 3); 848 if (r) { 849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 850 ring->idx, r); 851 amdgpu_gfx_scratch_free(adev, scratch); 852 return r; 853 } 854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 856 amdgpu_ring_write(ring, 0xDEADBEEF); 857 amdgpu_ring_commit(ring); 858 859 for (i = 0; i < adev->usec_timeout; i++) { 860 tmp = RREG32(scratch); 861 if (tmp == 0xDEADBEEF) 862 break; 863 DRM_UDELAY(1); 864 } 865 if (i < adev->usec_timeout) { 866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 867 ring->idx, i); 868 } else { 869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 870 ring->idx, scratch, tmp); 871 r = -EINVAL; 872 } 873 amdgpu_gfx_scratch_free(adev, scratch); 874 return r; 875 } 876 877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 878 { 879 struct amdgpu_device *adev = ring->adev; 880 struct amdgpu_ib ib; 881 struct dma_fence *f = NULL; 882 883 unsigned int index; 884 uint64_t gpu_addr; 885 uint32_t tmp; 886 long r; 887 888 r = amdgpu_device_wb_get(adev, &index); 889 if (r) { 890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 891 return r; 892 } 893 894 gpu_addr = adev->wb.gpu_addr + (index * 4); 895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 896 memset(&ib, 0, sizeof(ib)); 897 r = amdgpu_ib_get(adev, NULL, 16, &ib); 898 if (r) { 899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 900 goto err1; 901 } 902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 904 ib.ptr[2] = lower_32_bits(gpu_addr); 905 ib.ptr[3] = upper_32_bits(gpu_addr); 906 ib.ptr[4] = 0xDEADBEEF; 907 ib.length_dw = 5; 908 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 910 if (r) 911 goto err2; 912 913 r = dma_fence_wait_timeout(f, false, timeout); 914 if (r == 0) { 915 DRM_ERROR("amdgpu: IB test timed out.\n"); 916 r = -ETIMEDOUT; 917 goto err2; 918 } else if (r < 0) { 919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 920 goto err2; 921 } 922 923 tmp = adev->wb.wb[index]; 924 if (tmp == 0xDEADBEEF) { 925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 926 r = 0; 927 } else { 928 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 929 r = -EINVAL; 930 } 931 932 err2: 933 amdgpu_ib_free(adev, &ib, NULL); 934 dma_fence_put(f); 935 err1: 936 amdgpu_device_wb_free(adev, index); 937 return r; 938 } 939 940 941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 942 { 943 release_firmware(adev->gfx.pfp_fw); 944 adev->gfx.pfp_fw = NULL; 945 release_firmware(adev->gfx.me_fw); 946 adev->gfx.me_fw = NULL; 947 release_firmware(adev->gfx.ce_fw); 948 adev->gfx.ce_fw = NULL; 949 release_firmware(adev->gfx.rlc_fw); 950 adev->gfx.rlc_fw = NULL; 951 release_firmware(adev->gfx.mec_fw); 952 adev->gfx.mec_fw = NULL; 953 if ((adev->asic_type != CHIP_STONEY) && 954 (adev->asic_type != CHIP_TOPAZ)) 955 release_firmware(adev->gfx.mec2_fw); 956 adev->gfx.mec2_fw = NULL; 957 958 kfree(adev->gfx.rlc.register_list_format); 959 } 960 961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 962 { 963 const char *chip_name; 964 char fw_name[30]; 965 int err; 966 struct amdgpu_firmware_info *info = NULL; 967 const struct common_firmware_header *header = NULL; 968 const struct gfx_firmware_header_v1_0 *cp_hdr; 969 const struct rlc_firmware_header_v2_0 *rlc_hdr; 970 unsigned int *tmp = NULL, i; 971 972 DRM_DEBUG("\n"); 973 974 switch (adev->asic_type) { 975 case CHIP_TOPAZ: 976 chip_name = "topaz"; 977 break; 978 case CHIP_TONGA: 979 chip_name = "tonga"; 980 break; 981 case CHIP_CARRIZO: 982 chip_name = "carrizo"; 983 break; 984 case CHIP_FIJI: 985 chip_name = "fiji"; 986 break; 987 case CHIP_STONEY: 988 chip_name = "stoney"; 989 break; 990 case CHIP_POLARIS10: 991 chip_name = "polaris10"; 992 break; 993 case CHIP_POLARIS11: 994 chip_name = "polaris11"; 995 break; 996 case CHIP_POLARIS12: 997 chip_name = "polaris12"; 998 break; 999 case CHIP_VEGAM: 1000 chip_name = "vegam"; 1001 break; 1002 default: 1003 BUG(); 1004 } 1005 1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_pfp_2", chip_name); 1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1009 if (err == -ENOENT) { 1010 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_pfp", chip_name); 1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1012 } 1013 } else { 1014 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_pfp", chip_name); 1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1016 } 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1027 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_me_2", chip_name); 1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1029 if (err == -ENOENT) { 1030 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_me", chip_name); 1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1032 } 1033 } else { 1034 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_me", chip_name); 1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1036 } 1037 if (err) 1038 goto out; 1039 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1040 if (err) 1041 goto out; 1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1044 1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1046 1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1048 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_ce_2", chip_name); 1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1050 if (err == -ENOENT) { 1051 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_ce", chip_name); 1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1053 } 1054 } else { 1055 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_ce", chip_name); 1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1057 } 1058 if (err) 1059 goto out; 1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1061 if (err) 1062 goto out; 1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1066 1067 /* 1068 * Support for MCBP/Virtualization in combination with chained IBs is 1069 * formal released on feature version #46 1070 */ 1071 if (adev->gfx.ce_feature_version >= 46 && 1072 adev->gfx.pfp_feature_version >= 46) { 1073 adev->virt.chained_ib_support = true; 1074 DRM_INFO("Chained IB support enabled!\n"); 1075 } else 1076 adev->virt.chained_ib_support = false; 1077 1078 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_rlc", chip_name); 1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1080 if (err) 1081 goto out; 1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1086 1087 adev->gfx.rlc.save_and_restore_offset = 1088 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1089 adev->gfx.rlc.clear_state_descriptor_offset = 1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1091 adev->gfx.rlc.avail_scratch_ram_locations = 1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1093 adev->gfx.rlc.reg_restore_list_size = 1094 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1095 adev->gfx.rlc.reg_list_format_start = 1096 le32_to_cpu(rlc_hdr->reg_list_format_start); 1097 adev->gfx.rlc.reg_list_format_separate_start = 1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1099 adev->gfx.rlc.starting_offsets_start = 1100 le32_to_cpu(rlc_hdr->starting_offsets_start); 1101 adev->gfx.rlc.reg_list_format_size_bytes = 1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1103 adev->gfx.rlc.reg_list_size_bytes = 1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1105 1106 adev->gfx.rlc.register_list_format = 1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1108 adev->gfx.rlc.reg_list_size_bytes, M_DRM, GFP_KERNEL); 1109 1110 if (!adev->gfx.rlc.register_list_format) { 1111 err = -ENOMEM; 1112 goto out; 1113 } 1114 1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1117 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1119 1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1121 1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1124 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1126 1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1128 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_mec_2", chip_name); 1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1130 if (err == -ENOENT) { 1131 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_mec", chip_name); 1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1133 } 1134 } else { 1135 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_mec", chip_name); 1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1137 } 1138 if (err) 1139 goto out; 1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1141 if (err) 1142 goto out; 1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1146 1147 if ((adev->asic_type != CHIP_STONEY) && 1148 (adev->asic_type != CHIP_TOPAZ)) { 1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1150 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_mec2_2", chip_name); 1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1152 if (err == -ENOENT) { 1153 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_mec2", chip_name); 1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1155 } 1156 } else { 1157 snprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_mec2", chip_name); 1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1159 } 1160 if (!err) { 1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1162 if (err) 1163 goto out; 1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1165 adev->gfx.mec2_fw->data; 1166 adev->gfx.mec2_fw_version = 1167 le32_to_cpu(cp_hdr->header.ucode_version); 1168 adev->gfx.mec2_feature_version = 1169 le32_to_cpu(cp_hdr->ucode_feature_version); 1170 } else { 1171 err = 0; 1172 adev->gfx.mec2_fw = NULL; 1173 } 1174 } 1175 1176 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1177 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1178 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1179 info->fw = adev->gfx.pfp_fw; 1180 header = (const struct common_firmware_header *)info->fw->data; 1181 adev->firmware.fw_size += 1182 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1183 1184 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1185 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1186 info->fw = adev->gfx.me_fw; 1187 header = (const struct common_firmware_header *)info->fw->data; 1188 adev->firmware.fw_size += 1189 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1190 1191 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1192 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1193 info->fw = adev->gfx.ce_fw; 1194 header = (const struct common_firmware_header *)info->fw->data; 1195 adev->firmware.fw_size += 1196 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1197 1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1199 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1200 info->fw = adev->gfx.rlc_fw; 1201 header = (const struct common_firmware_header *)info->fw->data; 1202 adev->firmware.fw_size += 1203 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1204 1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1206 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1207 info->fw = adev->gfx.mec_fw; 1208 header = (const struct common_firmware_header *)info->fw->data; 1209 adev->firmware.fw_size += 1210 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1211 1212 /* we need account JT in */ 1213 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1214 adev->firmware.fw_size += 1215 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1216 1217 if (amdgpu_sriov_vf(adev)) { 1218 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1219 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1220 info->fw = adev->gfx.mec_fw; 1221 adev->firmware.fw_size += 1222 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1223 } 1224 1225 if (adev->gfx.mec2_fw) { 1226 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1227 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1228 info->fw = adev->gfx.mec2_fw; 1229 header = (const struct common_firmware_header *)info->fw->data; 1230 adev->firmware.fw_size += 1231 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1232 } 1233 1234 } 1235 1236 out: 1237 if (err) { 1238 dev_err(adev->dev, 1239 "gfx8: Failed to load firmware \"%s\"\n", 1240 fw_name); 1241 release_firmware(adev->gfx.pfp_fw); 1242 adev->gfx.pfp_fw = NULL; 1243 release_firmware(adev->gfx.me_fw); 1244 adev->gfx.me_fw = NULL; 1245 release_firmware(adev->gfx.ce_fw); 1246 adev->gfx.ce_fw = NULL; 1247 release_firmware(adev->gfx.rlc_fw); 1248 adev->gfx.rlc_fw = NULL; 1249 release_firmware(adev->gfx.mec_fw); 1250 adev->gfx.mec_fw = NULL; 1251 release_firmware(adev->gfx.mec2_fw); 1252 adev->gfx.mec2_fw = NULL; 1253 } 1254 return err; 1255 } 1256 1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1258 volatile u32 *buffer) 1259 { 1260 u32 count = 0, i; 1261 const struct cs_section_def *sect = NULL; 1262 const struct cs_extent_def *ext = NULL; 1263 1264 if (adev->gfx.rlc.cs_data == NULL) 1265 return; 1266 if (buffer == NULL) 1267 return; 1268 1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1270 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1271 1272 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1273 buffer[count++] = cpu_to_le32(0x80000000); 1274 buffer[count++] = cpu_to_le32(0x80000000); 1275 1276 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1277 for (ext = sect->section; ext->extent != NULL; ++ext) { 1278 if (sect->id == SECT_CONTEXT) { 1279 buffer[count++] = 1280 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1281 buffer[count++] = cpu_to_le32(ext->reg_index - 1282 PACKET3_SET_CONTEXT_REG_START); 1283 for (i = 0; i < ext->reg_count; i++) 1284 buffer[count++] = cpu_to_le32(ext->extent[i]); 1285 } else { 1286 return; 1287 } 1288 } 1289 } 1290 1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1292 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1293 PACKET3_SET_CONTEXT_REG_START); 1294 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1296 1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1298 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1299 1300 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1301 buffer[count++] = cpu_to_le32(0); 1302 } 1303 1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1305 { 1306 const __le32 *fw_data; 1307 volatile u32 *dst_ptr; 1308 int me, i, max_me = 4; 1309 u32 bo_offset = 0; 1310 u32 table_offset, table_size; 1311 1312 if (adev->asic_type == CHIP_CARRIZO) 1313 max_me = 5; 1314 1315 /* write the cp table buffer */ 1316 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1317 for (me = 0; me < max_me; me++) { 1318 if (me == 0) { 1319 const struct gfx_firmware_header_v1_0 *hdr = 1320 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1321 fw_data = (const __le32 *) 1322 (adev->gfx.ce_fw->data + 1323 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1324 table_offset = le32_to_cpu(hdr->jt_offset); 1325 table_size = le32_to_cpu(hdr->jt_size); 1326 } else if (me == 1) { 1327 const struct gfx_firmware_header_v1_0 *hdr = 1328 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1329 fw_data = (const __le32 *) 1330 (adev->gfx.pfp_fw->data + 1331 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1332 table_offset = le32_to_cpu(hdr->jt_offset); 1333 table_size = le32_to_cpu(hdr->jt_size); 1334 } else if (me == 2) { 1335 const struct gfx_firmware_header_v1_0 *hdr = 1336 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1337 fw_data = (const __le32 *) 1338 (adev->gfx.me_fw->data + 1339 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1340 table_offset = le32_to_cpu(hdr->jt_offset); 1341 table_size = le32_to_cpu(hdr->jt_size); 1342 } else if (me == 3) { 1343 const struct gfx_firmware_header_v1_0 *hdr = 1344 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1345 fw_data = (const __le32 *) 1346 (adev->gfx.mec_fw->data + 1347 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1348 table_offset = le32_to_cpu(hdr->jt_offset); 1349 table_size = le32_to_cpu(hdr->jt_size); 1350 } else if (me == 4) { 1351 const struct gfx_firmware_header_v1_0 *hdr = 1352 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1353 fw_data = (const __le32 *) 1354 (adev->gfx.mec2_fw->data + 1355 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1356 table_offset = le32_to_cpu(hdr->jt_offset); 1357 table_size = le32_to_cpu(hdr->jt_size); 1358 } 1359 1360 for (i = 0; i < table_size; i ++) { 1361 dst_ptr[bo_offset + i] = 1362 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1363 } 1364 1365 bo_offset += table_size; 1366 } 1367 } 1368 1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1370 { 1371 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1373 } 1374 1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1376 { 1377 volatile u32 *dst_ptr; 1378 u32 dws; 1379 const struct cs_section_def *cs_data; 1380 int r; 1381 1382 adev->gfx.rlc.cs_data = vi_cs_data; 1383 1384 cs_data = adev->gfx.rlc.cs_data; 1385 1386 if (cs_data) { 1387 /* clear state block */ 1388 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1389 1390 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1391 AMDGPU_GEM_DOMAIN_VRAM, 1392 &adev->gfx.rlc.clear_state_obj, 1393 (u64 *)&adev->gfx.rlc.clear_state_gpu_addr, 1394 (void **)&adev->gfx.rlc.cs_ptr); 1395 if (r) { 1396 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1397 gfx_v8_0_rlc_fini(adev); 1398 return r; 1399 } 1400 1401 /* set up the cs buffer */ 1402 dst_ptr = adev->gfx.rlc.cs_ptr; 1403 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1404 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1405 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1406 } 1407 1408 if ((adev->asic_type == CHIP_CARRIZO) || 1409 (adev->asic_type == CHIP_STONEY)) { 1410 adev->gfx.rlc.cp_table_size = (96 * 5 * 4) + (64 * 1024); /* JT + GDS */ 1411 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1412 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1413 &adev->gfx.rlc.cp_table_obj, 1414 (u64 *)&adev->gfx.rlc.cp_table_gpu_addr, 1415 (void **)&adev->gfx.rlc.cp_table_ptr); 1416 if (r) { 1417 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1418 return r; 1419 } 1420 1421 cz_init_cp_jump_table(adev); 1422 1423 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1424 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1425 } 1426 1427 return 0; 1428 } 1429 1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1431 { 1432 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1433 } 1434 1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1436 { 1437 int r; 1438 u32 *hpd; 1439 size_t mec_hpd_size; 1440 1441 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1442 1443 /* take ownership of the relevant compute queues */ 1444 amdgpu_gfx_compute_queue_acquire(adev); 1445 1446 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1447 1448 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1449 AMDGPU_GEM_DOMAIN_GTT, 1450 &adev->gfx.mec.hpd_eop_obj, 1451 &adev->gfx.mec.hpd_eop_gpu_addr, 1452 (void **)&hpd); 1453 if (r) { 1454 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1455 return r; 1456 } 1457 1458 memset(hpd, 0, mec_hpd_size); 1459 1460 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1461 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1462 1463 return 0; 1464 } 1465 1466 static const u32 vgpr_init_compute_shader[] = 1467 { 1468 0x7e000209, 0x7e020208, 1469 0x7e040207, 0x7e060206, 1470 0x7e080205, 0x7e0a0204, 1471 0x7e0c0203, 0x7e0e0202, 1472 0x7e100201, 0x7e120200, 1473 0x7e140209, 0x7e160208, 1474 0x7e180207, 0x7e1a0206, 1475 0x7e1c0205, 0x7e1e0204, 1476 0x7e200203, 0x7e220202, 1477 0x7e240201, 0x7e260200, 1478 0x7e280209, 0x7e2a0208, 1479 0x7e2c0207, 0x7e2e0206, 1480 0x7e300205, 0x7e320204, 1481 0x7e340203, 0x7e360202, 1482 0x7e380201, 0x7e3a0200, 1483 0x7e3c0209, 0x7e3e0208, 1484 0x7e400207, 0x7e420206, 1485 0x7e440205, 0x7e460204, 1486 0x7e480203, 0x7e4a0202, 1487 0x7e4c0201, 0x7e4e0200, 1488 0x7e500209, 0x7e520208, 1489 0x7e540207, 0x7e560206, 1490 0x7e580205, 0x7e5a0204, 1491 0x7e5c0203, 0x7e5e0202, 1492 0x7e600201, 0x7e620200, 1493 0x7e640209, 0x7e660208, 1494 0x7e680207, 0x7e6a0206, 1495 0x7e6c0205, 0x7e6e0204, 1496 0x7e700203, 0x7e720202, 1497 0x7e740201, 0x7e760200, 1498 0x7e780209, 0x7e7a0208, 1499 0x7e7c0207, 0x7e7e0206, 1500 0xbf8a0000, 0xbf810000, 1501 }; 1502 1503 static const u32 sgpr_init_compute_shader[] = 1504 { 1505 0xbe8a0100, 0xbe8c0102, 1506 0xbe8e0104, 0xbe900106, 1507 0xbe920108, 0xbe940100, 1508 0xbe960102, 0xbe980104, 1509 0xbe9a0106, 0xbe9c0108, 1510 0xbe9e0100, 0xbea00102, 1511 0xbea20104, 0xbea40106, 1512 0xbea60108, 0xbea80100, 1513 0xbeaa0102, 0xbeac0104, 1514 0xbeae0106, 0xbeb00108, 1515 0xbeb20100, 0xbeb40102, 1516 0xbeb60104, 0xbeb80106, 1517 0xbeba0108, 0xbebc0100, 1518 0xbebe0102, 0xbec00104, 1519 0xbec20106, 0xbec40108, 1520 0xbec60100, 0xbec80102, 1521 0xbee60004, 0xbee70005, 1522 0xbeea0006, 0xbeeb0007, 1523 0xbee80008, 0xbee90009, 1524 0xbefc0000, 0xbf8a0000, 1525 0xbf810000, 0x00000000, 1526 }; 1527 1528 static const u32 vgpr_init_regs[] = 1529 { 1530 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1531 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1532 mmCOMPUTE_NUM_THREAD_X, 256*4, 1533 mmCOMPUTE_NUM_THREAD_Y, 1, 1534 mmCOMPUTE_NUM_THREAD_Z, 1, 1535 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1536 mmCOMPUTE_PGM_RSRC2, 20, 1537 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1538 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1539 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1540 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1541 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1542 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1543 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1544 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1545 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1546 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1547 }; 1548 1549 static const u32 sgpr1_init_regs[] = 1550 { 1551 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1552 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1553 mmCOMPUTE_NUM_THREAD_X, 256*5, 1554 mmCOMPUTE_NUM_THREAD_Y, 1, 1555 mmCOMPUTE_NUM_THREAD_Z, 1, 1556 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1557 mmCOMPUTE_PGM_RSRC2, 20, 1558 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1559 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1560 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1561 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1562 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1563 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1564 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1565 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1566 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1567 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1568 }; 1569 1570 static const u32 sgpr2_init_regs[] = 1571 { 1572 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1573 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1574 mmCOMPUTE_NUM_THREAD_X, 256*5, 1575 mmCOMPUTE_NUM_THREAD_Y, 1, 1576 mmCOMPUTE_NUM_THREAD_Z, 1, 1577 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1578 mmCOMPUTE_PGM_RSRC2, 20, 1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1589 }; 1590 1591 static const u32 sec_ded_counter_registers[] = 1592 { 1593 mmCPC_EDC_ATC_CNT, 1594 mmCPC_EDC_SCRATCH_CNT, 1595 mmCPC_EDC_UCODE_CNT, 1596 mmCPF_EDC_ATC_CNT, 1597 mmCPF_EDC_ROQ_CNT, 1598 mmCPF_EDC_TAG_CNT, 1599 mmCPG_EDC_ATC_CNT, 1600 mmCPG_EDC_DMA_CNT, 1601 mmCPG_EDC_TAG_CNT, 1602 mmDC_EDC_CSINVOC_CNT, 1603 mmDC_EDC_RESTORE_CNT, 1604 mmDC_EDC_STATE_CNT, 1605 mmGDS_EDC_CNT, 1606 mmGDS_EDC_GRBM_CNT, 1607 mmGDS_EDC_OA_DED, 1608 mmSPI_EDC_CNT, 1609 mmSQC_ATC_EDC_GATCL1_CNT, 1610 mmSQC_EDC_CNT, 1611 mmSQ_EDC_DED_CNT, 1612 mmSQ_EDC_INFO, 1613 mmSQ_EDC_SEC_CNT, 1614 mmTCC_EDC_CNT, 1615 mmTCP_ATC_EDC_GATCL1_CNT, 1616 mmTCP_EDC_CNT, 1617 mmTD_EDC_CNT 1618 }; 1619 1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1621 { 1622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1623 struct amdgpu_ib ib; 1624 struct dma_fence *f = NULL; 1625 int r, i; 1626 u32 tmp; 1627 unsigned total_size, vgpr_offset, sgpr_offset; 1628 u64 gpu_addr; 1629 1630 /* only supported on CZ */ 1631 if (adev->asic_type != CHIP_CARRIZO) 1632 return 0; 1633 1634 /* bail if the compute ring is not ready */ 1635 if (!ring->ready) 1636 return 0; 1637 1638 tmp = RREG32(mmGB_EDC_MODE); 1639 WREG32(mmGB_EDC_MODE, 0); 1640 1641 total_size = 1642 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1643 total_size += 1644 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1645 total_size += 1646 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1647 total_size = ALIGN(total_size, 256); 1648 vgpr_offset = total_size; 1649 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1650 sgpr_offset = total_size; 1651 total_size += sizeof(sgpr_init_compute_shader); 1652 1653 /* allocate an indirect buffer to put the commands in */ 1654 memset(&ib, 0, sizeof(ib)); 1655 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1656 if (r) { 1657 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1658 return r; 1659 } 1660 1661 /* load the compute shaders */ 1662 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1663 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1664 1665 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1666 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1667 1668 /* init the ib length to 0 */ 1669 ib.length_dw = 0; 1670 1671 /* VGPR */ 1672 /* write the register state for the compute dispatch */ 1673 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1677 } 1678 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1679 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1681 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1684 1685 /* write dispatch packet */ 1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1687 ib.ptr[ib.length_dw++] = 8; /* x */ 1688 ib.ptr[ib.length_dw++] = 1; /* y */ 1689 ib.ptr[ib.length_dw++] = 1; /* z */ 1690 ib.ptr[ib.length_dw++] = 1691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1692 1693 /* write CS partial flush packet */ 1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1696 1697 /* SGPR1 */ 1698 /* write the register state for the compute dispatch */ 1699 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1701 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1703 } 1704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1705 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1707 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1710 1711 /* write dispatch packet */ 1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1713 ib.ptr[ib.length_dw++] = 8; /* x */ 1714 ib.ptr[ib.length_dw++] = 1; /* y */ 1715 ib.ptr[ib.length_dw++] = 1; /* z */ 1716 ib.ptr[ib.length_dw++] = 1717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1718 1719 /* write CS partial flush packet */ 1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1722 1723 /* SGPR2 */ 1724 /* write the register state for the compute dispatch */ 1725 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1727 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1729 } 1730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1733 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1736 1737 /* write dispatch packet */ 1738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1739 ib.ptr[ib.length_dw++] = 8; /* x */ 1740 ib.ptr[ib.length_dw++] = 1; /* y */ 1741 ib.ptr[ib.length_dw++] = 1; /* z */ 1742 ib.ptr[ib.length_dw++] = 1743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1744 1745 /* write CS partial flush packet */ 1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1748 1749 /* shedule the ib on the ring */ 1750 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1751 if (r) { 1752 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1753 goto fail; 1754 } 1755 1756 /* wait for the GPU to finish processing the IB */ 1757 r = dma_fence_wait(f, false); 1758 if (r) { 1759 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1760 goto fail; 1761 } 1762 1763 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1765 WREG32(mmGB_EDC_MODE, tmp); 1766 1767 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1768 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1769 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1770 1771 1772 /* read back registers to clear the counters */ 1773 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1774 RREG32(sec_ded_counter_registers[i]); 1775 1776 fail: 1777 amdgpu_ib_free(adev, &ib, NULL); 1778 dma_fence_put(f); 1779 1780 return r; 1781 } 1782 1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1784 { 1785 u32 gb_addr_config; 1786 u32 mc_shared_chmap, mc_arb_ramcfg; 1787 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1788 u32 tmp; 1789 int ret; 1790 1791 switch (adev->asic_type) { 1792 case CHIP_TOPAZ: 1793 adev->gfx.config.max_shader_engines = 1; 1794 adev->gfx.config.max_tile_pipes = 2; 1795 adev->gfx.config.max_cu_per_sh = 6; 1796 adev->gfx.config.max_sh_per_se = 1; 1797 adev->gfx.config.max_backends_per_se = 2; 1798 adev->gfx.config.max_texture_channel_caches = 2; 1799 adev->gfx.config.max_gprs = 256; 1800 adev->gfx.config.max_gs_threads = 32; 1801 adev->gfx.config.max_hw_contexts = 8; 1802 1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1807 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1808 break; 1809 case CHIP_FIJI: 1810 adev->gfx.config.max_shader_engines = 4; 1811 adev->gfx.config.max_tile_pipes = 16; 1812 adev->gfx.config.max_cu_per_sh = 16; 1813 adev->gfx.config.max_sh_per_se = 1; 1814 adev->gfx.config.max_backends_per_se = 4; 1815 adev->gfx.config.max_texture_channel_caches = 16; 1816 adev->gfx.config.max_gprs = 256; 1817 adev->gfx.config.max_gs_threads = 32; 1818 adev->gfx.config.max_hw_contexts = 8; 1819 1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1825 break; 1826 case CHIP_POLARIS11: 1827 case CHIP_POLARIS12: 1828 ret = amdgpu_atombios_get_gfx_info(adev); 1829 if (ret) 1830 return ret; 1831 adev->gfx.config.max_gprs = 256; 1832 adev->gfx.config.max_gs_threads = 32; 1833 adev->gfx.config.max_hw_contexts = 8; 1834 1835 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1836 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1837 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1838 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1839 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1840 break; 1841 case CHIP_POLARIS10: 1842 case CHIP_VEGAM: 1843 ret = amdgpu_atombios_get_gfx_info(adev); 1844 if (ret) 1845 return ret; 1846 adev->gfx.config.max_gprs = 256; 1847 adev->gfx.config.max_gs_threads = 32; 1848 adev->gfx.config.max_hw_contexts = 8; 1849 1850 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1851 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1852 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1853 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1854 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1855 break; 1856 case CHIP_TONGA: 1857 adev->gfx.config.max_shader_engines = 4; 1858 adev->gfx.config.max_tile_pipes = 8; 1859 adev->gfx.config.max_cu_per_sh = 8; 1860 adev->gfx.config.max_sh_per_se = 1; 1861 adev->gfx.config.max_backends_per_se = 2; 1862 adev->gfx.config.max_texture_channel_caches = 8; 1863 adev->gfx.config.max_gprs = 256; 1864 adev->gfx.config.max_gs_threads = 32; 1865 adev->gfx.config.max_hw_contexts = 8; 1866 1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1871 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1872 break; 1873 case CHIP_CARRIZO: 1874 adev->gfx.config.max_shader_engines = 1; 1875 adev->gfx.config.max_tile_pipes = 2; 1876 adev->gfx.config.max_sh_per_se = 1; 1877 adev->gfx.config.max_backends_per_se = 2; 1878 adev->gfx.config.max_cu_per_sh = 8; 1879 adev->gfx.config.max_texture_channel_caches = 2; 1880 adev->gfx.config.max_gprs = 256; 1881 adev->gfx.config.max_gs_threads = 32; 1882 adev->gfx.config.max_hw_contexts = 8; 1883 1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1888 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1889 break; 1890 case CHIP_STONEY: 1891 adev->gfx.config.max_shader_engines = 1; 1892 adev->gfx.config.max_tile_pipes = 2; 1893 adev->gfx.config.max_sh_per_se = 1; 1894 adev->gfx.config.max_backends_per_se = 1; 1895 adev->gfx.config.max_cu_per_sh = 3; 1896 adev->gfx.config.max_texture_channel_caches = 2; 1897 adev->gfx.config.max_gprs = 256; 1898 adev->gfx.config.max_gs_threads = 16; 1899 adev->gfx.config.max_hw_contexts = 8; 1900 1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1905 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1906 break; 1907 default: 1908 adev->gfx.config.max_shader_engines = 2; 1909 adev->gfx.config.max_tile_pipes = 4; 1910 adev->gfx.config.max_cu_per_sh = 2; 1911 adev->gfx.config.max_sh_per_se = 1; 1912 adev->gfx.config.max_backends_per_se = 2; 1913 adev->gfx.config.max_texture_channel_caches = 4; 1914 adev->gfx.config.max_gprs = 256; 1915 adev->gfx.config.max_gs_threads = 32; 1916 adev->gfx.config.max_hw_contexts = 8; 1917 1918 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1919 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1920 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1921 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1922 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1923 break; 1924 } 1925 1926 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1927 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1928 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1929 1930 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1931 adev->gfx.config.mem_max_burst_length_bytes = 256; 1932 if (adev->flags & AMD_IS_APU) { 1933 /* Get memory bank mapping mode. */ 1934 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1935 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1936 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1937 1938 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1939 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1940 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1941 1942 /* Validate settings in case only one DIMM installed. */ 1943 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1944 dimm00_addr_map = 0; 1945 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1946 dimm01_addr_map = 0; 1947 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1948 dimm10_addr_map = 0; 1949 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1950 dimm11_addr_map = 0; 1951 1952 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1953 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1954 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1955 adev->gfx.config.mem_row_size_in_kb = 2; 1956 else 1957 adev->gfx.config.mem_row_size_in_kb = 1; 1958 } else { 1959 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1960 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1961 if (adev->gfx.config.mem_row_size_in_kb > 4) 1962 adev->gfx.config.mem_row_size_in_kb = 4; 1963 } 1964 1965 adev->gfx.config.shader_engine_tile_size = 32; 1966 adev->gfx.config.num_gpus = 1; 1967 adev->gfx.config.multi_gpu_tile_size = 64; 1968 1969 /* fix up row size */ 1970 switch (adev->gfx.config.mem_row_size_in_kb) { 1971 case 1: 1972 default: 1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1974 break; 1975 case 2: 1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1977 break; 1978 case 4: 1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1980 break; 1981 } 1982 adev->gfx.config.gb_addr_config = gb_addr_config; 1983 1984 return 0; 1985 } 1986 1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1988 int mec, int pipe, int queue) 1989 { 1990 int r; 1991 unsigned irq_type; 1992 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1993 1994 ring = &adev->gfx.compute_ring[ring_id]; 1995 1996 /* mec0 is me1 */ 1997 ring->me = mec + 1; 1998 ring->pipe = pipe; 1999 ring->queue = queue; 2000 2001 ring->ring_obj = NULL; 2002 ring->use_doorbell = true; 2003 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2004 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2005 + (ring_id * GFX8_MEC_HPD_SIZE); 2006 ksprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2007 2008 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2009 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2010 + ring->pipe; 2011 2012 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2013 r = amdgpu_ring_init(adev, ring, 1024, 2014 &adev->gfx.eop_irq, irq_type); 2015 if (r) 2016 return r; 2017 2018 2019 return 0; 2020 } 2021 2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2023 2024 static int gfx_v8_0_sw_init(void *handle) 2025 { 2026 int i, j, k, r, ring_id; 2027 struct amdgpu_ring *ring; 2028 struct amdgpu_kiq *kiq; 2029 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2030 2031 switch (adev->asic_type) { 2032 case CHIP_TONGA: 2033 case CHIP_CARRIZO: 2034 case CHIP_FIJI: 2035 case CHIP_POLARIS10: 2036 case CHIP_POLARIS11: 2037 case CHIP_POLARIS12: 2038 case CHIP_VEGAM: 2039 adev->gfx.mec.num_mec = 2; 2040 break; 2041 case CHIP_TOPAZ: 2042 case CHIP_STONEY: 2043 default: 2044 adev->gfx.mec.num_mec = 1; 2045 break; 2046 } 2047 2048 adev->gfx.mec.num_pipe_per_mec = 4; 2049 adev->gfx.mec.num_queue_per_pipe = 8; 2050 2051 /* KIQ event */ 2052 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); 2053 if (r) 2054 return r; 2055 2056 /* EOP Event */ 2057 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 2058 if (r) 2059 return r; 2060 2061 /* Privileged reg */ 2062 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 2063 &adev->gfx.priv_reg_irq); 2064 if (r) 2065 return r; 2066 2067 /* Privileged inst */ 2068 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 2069 &adev->gfx.priv_inst_irq); 2070 if (r) 2071 return r; 2072 2073 /* Add CP EDC/ECC irq */ 2074 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 2075 &adev->gfx.cp_ecc_error_irq); 2076 if (r) 2077 return r; 2078 2079 /* SQ interrupts. */ 2080 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 2081 &adev->gfx.sq_irq); 2082 if (r) { 2083 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2084 return r; 2085 } 2086 2087 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2088 2089 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2090 2091 gfx_v8_0_scratch_init(adev); 2092 2093 r = gfx_v8_0_init_microcode(adev); 2094 if (r) { 2095 DRM_ERROR("Failed to load gfx firmware!\n"); 2096 return r; 2097 } 2098 2099 r = gfx_v8_0_rlc_init(adev); 2100 if (r) { 2101 DRM_ERROR("Failed to init rlc BOs!\n"); 2102 return r; 2103 } 2104 2105 r = gfx_v8_0_mec_init(adev); 2106 if (r) { 2107 DRM_ERROR("Failed to init MEC BOs!\n"); 2108 return r; 2109 } 2110 2111 /* set up the gfx ring */ 2112 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2113 ring = &adev->gfx.gfx_ring[i]; 2114 ring->ring_obj = NULL; 2115 ksprintf(ring->name, "gfx"); 2116 /* no gfx doorbells on iceland */ 2117 if (adev->asic_type != CHIP_TOPAZ) { 2118 ring->use_doorbell = true; 2119 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2120 } 2121 2122 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2123 AMDGPU_CP_IRQ_GFX_EOP); 2124 if (r) 2125 return r; 2126 } 2127 2128 2129 /* set up the compute queues - allocate horizontally across pipes */ 2130 ring_id = 0; 2131 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2132 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2133 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2134 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2135 continue; 2136 2137 r = gfx_v8_0_compute_ring_init(adev, 2138 ring_id, 2139 i, k, j); 2140 if (r) 2141 return r; 2142 2143 ring_id++; 2144 } 2145 } 2146 } 2147 2148 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2149 if (r) { 2150 DRM_ERROR("Failed to init KIQ BOs!\n"); 2151 return r; 2152 } 2153 2154 kiq = &adev->gfx.kiq; 2155 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2156 if (r) 2157 return r; 2158 2159 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2160 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2161 if (r) 2162 return r; 2163 2164 /* reserve GDS, GWS and OA resource for gfx */ 2165 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2166 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2167 &adev->gds.gds_gfx_bo, NULL, NULL); 2168 if (r) 2169 return r; 2170 2171 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2172 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2173 &adev->gds.gws_gfx_bo, NULL, NULL); 2174 if (r) 2175 return r; 2176 2177 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2178 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2179 &adev->gds.oa_gfx_bo, NULL, NULL); 2180 if (r) 2181 return r; 2182 2183 adev->gfx.ce_ram_size = 0x8000; 2184 2185 r = gfx_v8_0_gpu_early_init(adev); 2186 if (r) 2187 return r; 2188 2189 return 0; 2190 } 2191 2192 static int gfx_v8_0_sw_fini(void *handle) 2193 { 2194 int i; 2195 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2196 2197 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2198 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2199 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2200 2201 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2202 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2203 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2204 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2205 2206 amdgpu_gfx_compute_mqd_sw_fini(adev); 2207 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2208 amdgpu_gfx_kiq_fini(adev); 2209 2210 gfx_v8_0_mec_fini(adev); 2211 gfx_v8_0_rlc_fini(adev); 2212 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2213 (u64 *)&adev->gfx.rlc.clear_state_gpu_addr, 2214 (void **)&adev->gfx.rlc.cs_ptr); 2215 if ((adev->asic_type == CHIP_CARRIZO) || 2216 (adev->asic_type == CHIP_STONEY)) { 2217 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2218 (u64 *)&adev->gfx.rlc.cp_table_gpu_addr, 2219 (void **)&adev->gfx.rlc.cp_table_ptr); 2220 } 2221 gfx_v8_0_free_microcode(adev); 2222 2223 return 0; 2224 } 2225 2226 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2227 { 2228 uint32_t *modearray, *mod2array; 2229 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2230 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2231 u32 reg_offset; 2232 2233 modearray = adev->gfx.config.tile_mode_array; 2234 mod2array = adev->gfx.config.macrotile_mode_array; 2235 2236 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2237 modearray[reg_offset] = 0; 2238 2239 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2240 mod2array[reg_offset] = 0; 2241 2242 switch (adev->asic_type) { 2243 case CHIP_TOPAZ: 2244 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2245 PIPE_CONFIG(ADDR_SURF_P2) | 2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2248 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2249 PIPE_CONFIG(ADDR_SURF_P2) | 2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2252 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2253 PIPE_CONFIG(ADDR_SURF_P2) | 2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2256 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2257 PIPE_CONFIG(ADDR_SURF_P2) | 2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2260 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2261 PIPE_CONFIG(ADDR_SURF_P2) | 2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2264 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2265 PIPE_CONFIG(ADDR_SURF_P2) | 2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2268 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2269 PIPE_CONFIG(ADDR_SURF_P2) | 2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2272 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2273 PIPE_CONFIG(ADDR_SURF_P2)); 2274 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2275 PIPE_CONFIG(ADDR_SURF_P2) | 2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2278 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2279 PIPE_CONFIG(ADDR_SURF_P2) | 2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2282 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2283 PIPE_CONFIG(ADDR_SURF_P2) | 2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2286 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2287 PIPE_CONFIG(ADDR_SURF_P2) | 2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2290 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2291 PIPE_CONFIG(ADDR_SURF_P2) | 2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2294 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2295 PIPE_CONFIG(ADDR_SURF_P2) | 2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2298 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2299 PIPE_CONFIG(ADDR_SURF_P2) | 2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2302 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2303 PIPE_CONFIG(ADDR_SURF_P2) | 2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2306 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2307 PIPE_CONFIG(ADDR_SURF_P2) | 2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2310 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2311 PIPE_CONFIG(ADDR_SURF_P2) | 2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2314 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2315 PIPE_CONFIG(ADDR_SURF_P2) | 2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2318 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2319 PIPE_CONFIG(ADDR_SURF_P2) | 2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2322 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2323 PIPE_CONFIG(ADDR_SURF_P2) | 2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2326 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2327 PIPE_CONFIG(ADDR_SURF_P2) | 2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2330 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2331 PIPE_CONFIG(ADDR_SURF_P2) | 2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2334 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2335 PIPE_CONFIG(ADDR_SURF_P2) | 2336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2338 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2339 PIPE_CONFIG(ADDR_SURF_P2) | 2340 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2342 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2343 PIPE_CONFIG(ADDR_SURF_P2) | 2344 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2346 2347 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2350 NUM_BANKS(ADDR_SURF_8_BANK)); 2351 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2354 NUM_BANKS(ADDR_SURF_8_BANK)); 2355 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2358 NUM_BANKS(ADDR_SURF_8_BANK)); 2359 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2362 NUM_BANKS(ADDR_SURF_8_BANK)); 2363 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2366 NUM_BANKS(ADDR_SURF_8_BANK)); 2367 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2370 NUM_BANKS(ADDR_SURF_8_BANK)); 2371 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2374 NUM_BANKS(ADDR_SURF_8_BANK)); 2375 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2378 NUM_BANKS(ADDR_SURF_16_BANK)); 2379 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2382 NUM_BANKS(ADDR_SURF_16_BANK)); 2383 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2386 NUM_BANKS(ADDR_SURF_16_BANK)); 2387 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2390 NUM_BANKS(ADDR_SURF_16_BANK)); 2391 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2394 NUM_BANKS(ADDR_SURF_16_BANK)); 2395 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2398 NUM_BANKS(ADDR_SURF_16_BANK)); 2399 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2402 NUM_BANKS(ADDR_SURF_8_BANK)); 2403 2404 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2405 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2406 reg_offset != 23) 2407 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2408 2409 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2410 if (reg_offset != 7) 2411 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2412 2413 break; 2414 case CHIP_FIJI: 2415 case CHIP_VEGAM: 2416 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2420 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2424 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2428 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2432 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2434 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2435 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2436 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2438 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2440 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2442 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2443 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2444 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2446 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2447 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2448 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2450 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2454 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2458 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2462 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2463 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2466 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2467 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2470 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2471 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2474 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2475 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2478 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2479 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2482 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2483 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2486 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2487 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2490 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2491 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2494 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2495 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2498 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2499 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2500 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2502 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2503 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2504 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2506 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2507 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2508 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2510 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2511 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2512 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2514 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2515 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2516 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2518 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2519 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2520 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2521 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2522 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2523 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2524 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2525 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2526 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2527 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2528 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2530 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2531 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2532 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2533 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2534 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2535 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2536 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2538 2539 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2542 NUM_BANKS(ADDR_SURF_8_BANK)); 2543 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2546 NUM_BANKS(ADDR_SURF_8_BANK)); 2547 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2550 NUM_BANKS(ADDR_SURF_8_BANK)); 2551 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2554 NUM_BANKS(ADDR_SURF_8_BANK)); 2555 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2558 NUM_BANKS(ADDR_SURF_8_BANK)); 2559 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2562 NUM_BANKS(ADDR_SURF_8_BANK)); 2563 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2566 NUM_BANKS(ADDR_SURF_8_BANK)); 2567 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2568 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2569 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2570 NUM_BANKS(ADDR_SURF_8_BANK)); 2571 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2574 NUM_BANKS(ADDR_SURF_8_BANK)); 2575 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2576 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2577 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2578 NUM_BANKS(ADDR_SURF_8_BANK)); 2579 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2580 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2581 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2582 NUM_BANKS(ADDR_SURF_8_BANK)); 2583 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2586 NUM_BANKS(ADDR_SURF_8_BANK)); 2587 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2588 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2589 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2590 NUM_BANKS(ADDR_SURF_8_BANK)); 2591 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2594 NUM_BANKS(ADDR_SURF_4_BANK)); 2595 2596 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2597 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2598 2599 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2600 if (reg_offset != 7) 2601 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2602 2603 break; 2604 case CHIP_TONGA: 2605 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2607 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2609 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2611 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2613 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2615 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2616 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2617 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2619 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2620 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2621 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2623 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2625 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2627 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2629 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2631 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2633 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2635 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2637 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2639 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2641 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2643 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2645 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2647 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2649 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2651 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2655 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2656 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2657 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2659 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2660 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2661 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2663 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2664 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2665 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2667 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2668 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2669 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2671 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2673 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2675 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2676 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2677 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2679 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2680 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2681 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2683 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2684 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2687 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2688 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2691 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2692 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2695 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2697 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2699 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2700 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2703 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2704 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2707 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2708 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2711 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2712 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2713 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2715 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2716 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2717 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2719 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2720 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2723 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2725 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2727 2728 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2729 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2730 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2731 NUM_BANKS(ADDR_SURF_16_BANK)); 2732 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2735 NUM_BANKS(ADDR_SURF_16_BANK)); 2736 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2739 NUM_BANKS(ADDR_SURF_16_BANK)); 2740 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2743 NUM_BANKS(ADDR_SURF_16_BANK)); 2744 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2747 NUM_BANKS(ADDR_SURF_16_BANK)); 2748 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2751 NUM_BANKS(ADDR_SURF_16_BANK)); 2752 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2755 NUM_BANKS(ADDR_SURF_16_BANK)); 2756 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2759 NUM_BANKS(ADDR_SURF_16_BANK)); 2760 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2763 NUM_BANKS(ADDR_SURF_16_BANK)); 2764 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2765 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2766 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2767 NUM_BANKS(ADDR_SURF_16_BANK)); 2768 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2771 NUM_BANKS(ADDR_SURF_16_BANK)); 2772 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2775 NUM_BANKS(ADDR_SURF_8_BANK)); 2776 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2779 NUM_BANKS(ADDR_SURF_4_BANK)); 2780 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2783 NUM_BANKS(ADDR_SURF_4_BANK)); 2784 2785 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2786 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2787 2788 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2789 if (reg_offset != 7) 2790 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2791 2792 break; 2793 case CHIP_POLARIS11: 2794 case CHIP_POLARIS12: 2795 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2799 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2801 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2803 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2807 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2811 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2814 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2815 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2817 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2819 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2823 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2825 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2826 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2827 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2828 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2829 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2831 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2833 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2835 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2837 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2841 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2843 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2845 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2847 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2849 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2850 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2851 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2853 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2854 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2857 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2858 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2859 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2861 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2862 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2863 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2864 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2865 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2866 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2867 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2869 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2870 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2871 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2873 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2874 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2875 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2877 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2878 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2879 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2881 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2882 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2883 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2885 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2886 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2889 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2890 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2893 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2894 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2897 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2898 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2899 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2901 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2902 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2903 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2905 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2906 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2907 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2909 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2910 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2911 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2913 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2914 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2915 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2917 2918 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2921 NUM_BANKS(ADDR_SURF_16_BANK)); 2922 2923 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2926 NUM_BANKS(ADDR_SURF_16_BANK)); 2927 2928 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2931 NUM_BANKS(ADDR_SURF_16_BANK)); 2932 2933 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2936 NUM_BANKS(ADDR_SURF_16_BANK)); 2937 2938 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2941 NUM_BANKS(ADDR_SURF_16_BANK)); 2942 2943 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2946 NUM_BANKS(ADDR_SURF_16_BANK)); 2947 2948 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2951 NUM_BANKS(ADDR_SURF_16_BANK)); 2952 2953 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2956 NUM_BANKS(ADDR_SURF_16_BANK)); 2957 2958 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2961 NUM_BANKS(ADDR_SURF_16_BANK)); 2962 2963 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2966 NUM_BANKS(ADDR_SURF_16_BANK)); 2967 2968 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2971 NUM_BANKS(ADDR_SURF_16_BANK)); 2972 2973 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2976 NUM_BANKS(ADDR_SURF_16_BANK)); 2977 2978 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2981 NUM_BANKS(ADDR_SURF_8_BANK)); 2982 2983 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2986 NUM_BANKS(ADDR_SURF_4_BANK)); 2987 2988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2989 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2990 2991 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2992 if (reg_offset != 7) 2993 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2994 2995 break; 2996 case CHIP_POLARIS10: 2997 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3000 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3001 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3005 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3007 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3009 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3013 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3017 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3021 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3023 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3024 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3025 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3028 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3029 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3031 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3035 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3037 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3039 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3043 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3044 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3047 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3051 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3055 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3057 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3059 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3063 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3064 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3067 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3068 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3069 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3071 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3073 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3075 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3076 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3077 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3079 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3080 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3083 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3084 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3085 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3087 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3088 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3089 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3091 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3092 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3095 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3096 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3099 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3100 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3103 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3104 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3105 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3107 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3109 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3111 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3112 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3113 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3115 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3116 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3117 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3119 3120 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3123 NUM_BANKS(ADDR_SURF_16_BANK)); 3124 3125 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3128 NUM_BANKS(ADDR_SURF_16_BANK)); 3129 3130 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3133 NUM_BANKS(ADDR_SURF_16_BANK)); 3134 3135 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3138 NUM_BANKS(ADDR_SURF_16_BANK)); 3139 3140 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3143 NUM_BANKS(ADDR_SURF_16_BANK)); 3144 3145 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3148 NUM_BANKS(ADDR_SURF_16_BANK)); 3149 3150 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3153 NUM_BANKS(ADDR_SURF_16_BANK)); 3154 3155 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3158 NUM_BANKS(ADDR_SURF_16_BANK)); 3159 3160 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3163 NUM_BANKS(ADDR_SURF_16_BANK)); 3164 3165 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3168 NUM_BANKS(ADDR_SURF_16_BANK)); 3169 3170 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3173 NUM_BANKS(ADDR_SURF_16_BANK)); 3174 3175 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3178 NUM_BANKS(ADDR_SURF_8_BANK)); 3179 3180 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3183 NUM_BANKS(ADDR_SURF_4_BANK)); 3184 3185 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3188 NUM_BANKS(ADDR_SURF_4_BANK)); 3189 3190 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3191 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3192 3193 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3194 if (reg_offset != 7) 3195 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3196 3197 break; 3198 case CHIP_STONEY: 3199 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3200 PIPE_CONFIG(ADDR_SURF_P2) | 3201 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3203 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3204 PIPE_CONFIG(ADDR_SURF_P2) | 3205 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3206 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3207 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3208 PIPE_CONFIG(ADDR_SURF_P2) | 3209 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3210 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3211 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3212 PIPE_CONFIG(ADDR_SURF_P2) | 3213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3215 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3216 PIPE_CONFIG(ADDR_SURF_P2) | 3217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3219 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3220 PIPE_CONFIG(ADDR_SURF_P2) | 3221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3223 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3224 PIPE_CONFIG(ADDR_SURF_P2) | 3225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3227 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3228 PIPE_CONFIG(ADDR_SURF_P2)); 3229 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3233 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3234 PIPE_CONFIG(ADDR_SURF_P2) | 3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3237 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3238 PIPE_CONFIG(ADDR_SURF_P2) | 3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3241 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3242 PIPE_CONFIG(ADDR_SURF_P2) | 3243 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3245 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3246 PIPE_CONFIG(ADDR_SURF_P2) | 3247 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3249 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3250 PIPE_CONFIG(ADDR_SURF_P2) | 3251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3253 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3254 PIPE_CONFIG(ADDR_SURF_P2) | 3255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3257 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3258 PIPE_CONFIG(ADDR_SURF_P2) | 3259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3261 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3262 PIPE_CONFIG(ADDR_SURF_P2) | 3263 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3265 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3266 PIPE_CONFIG(ADDR_SURF_P2) | 3267 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3269 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3270 PIPE_CONFIG(ADDR_SURF_P2) | 3271 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3273 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3274 PIPE_CONFIG(ADDR_SURF_P2) | 3275 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3277 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3278 PIPE_CONFIG(ADDR_SURF_P2) | 3279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3281 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3282 PIPE_CONFIG(ADDR_SURF_P2) | 3283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3285 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3286 PIPE_CONFIG(ADDR_SURF_P2) | 3287 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3289 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3290 PIPE_CONFIG(ADDR_SURF_P2) | 3291 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3293 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3294 PIPE_CONFIG(ADDR_SURF_P2) | 3295 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3297 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3298 PIPE_CONFIG(ADDR_SURF_P2) | 3299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3301 3302 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3305 NUM_BANKS(ADDR_SURF_8_BANK)); 3306 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3309 NUM_BANKS(ADDR_SURF_8_BANK)); 3310 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3313 NUM_BANKS(ADDR_SURF_8_BANK)); 3314 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3317 NUM_BANKS(ADDR_SURF_8_BANK)); 3318 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3321 NUM_BANKS(ADDR_SURF_8_BANK)); 3322 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3325 NUM_BANKS(ADDR_SURF_8_BANK)); 3326 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3329 NUM_BANKS(ADDR_SURF_8_BANK)); 3330 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3333 NUM_BANKS(ADDR_SURF_16_BANK)); 3334 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3337 NUM_BANKS(ADDR_SURF_16_BANK)); 3338 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3341 NUM_BANKS(ADDR_SURF_16_BANK)); 3342 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3345 NUM_BANKS(ADDR_SURF_16_BANK)); 3346 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3349 NUM_BANKS(ADDR_SURF_16_BANK)); 3350 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3353 NUM_BANKS(ADDR_SURF_16_BANK)); 3354 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3357 NUM_BANKS(ADDR_SURF_8_BANK)); 3358 3359 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3360 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3361 reg_offset != 23) 3362 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3363 3364 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3365 if (reg_offset != 7) 3366 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3367 3368 break; 3369 default: 3370 dev_warn(adev->dev, 3371 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3372 adev->asic_type); 3373 3374 case CHIP_CARRIZO: 3375 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3376 PIPE_CONFIG(ADDR_SURF_P2) | 3377 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3378 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3379 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3380 PIPE_CONFIG(ADDR_SURF_P2) | 3381 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3382 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3383 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3384 PIPE_CONFIG(ADDR_SURF_P2) | 3385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3386 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3387 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3388 PIPE_CONFIG(ADDR_SURF_P2) | 3389 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3390 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3391 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3392 PIPE_CONFIG(ADDR_SURF_P2) | 3393 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3395 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3396 PIPE_CONFIG(ADDR_SURF_P2) | 3397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3399 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3400 PIPE_CONFIG(ADDR_SURF_P2) | 3401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3403 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3404 PIPE_CONFIG(ADDR_SURF_P2)); 3405 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3406 PIPE_CONFIG(ADDR_SURF_P2) | 3407 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3409 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3410 PIPE_CONFIG(ADDR_SURF_P2) | 3411 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3413 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3414 PIPE_CONFIG(ADDR_SURF_P2) | 3415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3417 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3418 PIPE_CONFIG(ADDR_SURF_P2) | 3419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3421 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3422 PIPE_CONFIG(ADDR_SURF_P2) | 3423 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3425 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3426 PIPE_CONFIG(ADDR_SURF_P2) | 3427 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3429 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3430 PIPE_CONFIG(ADDR_SURF_P2) | 3431 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3433 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3434 PIPE_CONFIG(ADDR_SURF_P2) | 3435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3437 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3438 PIPE_CONFIG(ADDR_SURF_P2) | 3439 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3441 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3442 PIPE_CONFIG(ADDR_SURF_P2) | 3443 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3445 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3446 PIPE_CONFIG(ADDR_SURF_P2) | 3447 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3449 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3450 PIPE_CONFIG(ADDR_SURF_P2) | 3451 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3453 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3454 PIPE_CONFIG(ADDR_SURF_P2) | 3455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3457 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3458 PIPE_CONFIG(ADDR_SURF_P2) | 3459 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3461 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3462 PIPE_CONFIG(ADDR_SURF_P2) | 3463 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3465 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3466 PIPE_CONFIG(ADDR_SURF_P2) | 3467 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3469 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3470 PIPE_CONFIG(ADDR_SURF_P2) | 3471 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3473 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3474 PIPE_CONFIG(ADDR_SURF_P2) | 3475 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3477 3478 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3481 NUM_BANKS(ADDR_SURF_8_BANK)); 3482 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3485 NUM_BANKS(ADDR_SURF_8_BANK)); 3486 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3489 NUM_BANKS(ADDR_SURF_8_BANK)); 3490 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3493 NUM_BANKS(ADDR_SURF_8_BANK)); 3494 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3497 NUM_BANKS(ADDR_SURF_8_BANK)); 3498 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3501 NUM_BANKS(ADDR_SURF_8_BANK)); 3502 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3505 NUM_BANKS(ADDR_SURF_8_BANK)); 3506 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3509 NUM_BANKS(ADDR_SURF_16_BANK)); 3510 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3513 NUM_BANKS(ADDR_SURF_16_BANK)); 3514 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3517 NUM_BANKS(ADDR_SURF_16_BANK)); 3518 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3521 NUM_BANKS(ADDR_SURF_16_BANK)); 3522 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3525 NUM_BANKS(ADDR_SURF_16_BANK)); 3526 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3529 NUM_BANKS(ADDR_SURF_16_BANK)); 3530 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3533 NUM_BANKS(ADDR_SURF_8_BANK)); 3534 3535 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3536 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3537 reg_offset != 23) 3538 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3539 3540 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3541 if (reg_offset != 7) 3542 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3543 3544 break; 3545 } 3546 } 3547 3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3549 u32 se_num, u32 sh_num, u32 instance) 3550 { 3551 u32 data; 3552 3553 if (instance == 0xffffffff) 3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3555 else 3556 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3557 3558 if (se_num == 0xffffffff) 3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3560 else 3561 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3562 3563 if (sh_num == 0xffffffff) 3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3565 else 3566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3567 3568 WREG32(mmGRBM_GFX_INDEX, data); 3569 } 3570 3571 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3572 u32 me, u32 pipe, u32 q) 3573 { 3574 vi_srbm_select(adev, me, pipe, q, 0); 3575 } 3576 3577 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3578 { 3579 u32 data, mask; 3580 3581 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3582 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3583 3584 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3585 3586 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3587 adev->gfx.config.max_sh_per_se); 3588 3589 return (~data) & mask; 3590 } 3591 3592 static void 3593 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3594 { 3595 switch (adev->asic_type) { 3596 case CHIP_FIJI: 3597 case CHIP_VEGAM: 3598 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3599 RB_XSEL2(1) | PKR_MAP(2) | 3600 PKR_XSEL(1) | PKR_YSEL(1) | 3601 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3602 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3603 SE_PAIR_YSEL(2); 3604 break; 3605 case CHIP_TONGA: 3606 case CHIP_POLARIS10: 3607 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3608 SE_XSEL(1) | SE_YSEL(1); 3609 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3610 SE_PAIR_YSEL(2); 3611 break; 3612 case CHIP_TOPAZ: 3613 case CHIP_CARRIZO: 3614 *rconf |= RB_MAP_PKR0(2); 3615 *rconf1 |= 0x0; 3616 break; 3617 case CHIP_POLARIS11: 3618 case CHIP_POLARIS12: 3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3620 SE_XSEL(1) | SE_YSEL(1); 3621 *rconf1 |= 0x0; 3622 break; 3623 case CHIP_STONEY: 3624 *rconf |= 0x0; 3625 *rconf1 |= 0x0; 3626 break; 3627 default: 3628 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3629 break; 3630 } 3631 } 3632 3633 static void 3634 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3635 u32 raster_config, u32 raster_config_1, 3636 unsigned rb_mask, unsigned num_rb) 3637 { 3638 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3639 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3640 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3641 unsigned rb_per_se = num_rb / num_se; 3642 unsigned se_mask[4]; 3643 unsigned se; 3644 3645 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3646 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3647 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3648 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3649 3650 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3651 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3652 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3653 3654 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3655 (!se_mask[2] && !se_mask[3]))) { 3656 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3657 3658 if (!se_mask[0] && !se_mask[1]) { 3659 raster_config_1 |= 3660 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3661 } else { 3662 raster_config_1 |= 3663 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3664 } 3665 } 3666 3667 for (se = 0; se < num_se; se++) { 3668 unsigned raster_config_se = raster_config; 3669 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3670 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3671 int idx = (se / 2) * 2; 3672 3673 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3674 raster_config_se &= ~SE_MAP_MASK; 3675 3676 if (!se_mask[idx]) { 3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3678 } else { 3679 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3680 } 3681 } 3682 3683 pkr0_mask &= rb_mask; 3684 pkr1_mask &= rb_mask; 3685 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3686 raster_config_se &= ~PKR_MAP_MASK; 3687 3688 if (!pkr0_mask) { 3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3690 } else { 3691 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3692 } 3693 } 3694 3695 if (rb_per_se >= 2) { 3696 unsigned rb0_mask = 1 << (se * rb_per_se); 3697 unsigned rb1_mask = rb0_mask << 1; 3698 3699 rb0_mask &= rb_mask; 3700 rb1_mask &= rb_mask; 3701 if (!rb0_mask || !rb1_mask) { 3702 raster_config_se &= ~RB_MAP_PKR0_MASK; 3703 3704 if (!rb0_mask) { 3705 raster_config_se |= 3706 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3707 } else { 3708 raster_config_se |= 3709 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3710 } 3711 } 3712 3713 if (rb_per_se > 2) { 3714 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3715 rb1_mask = rb0_mask << 1; 3716 rb0_mask &= rb_mask; 3717 rb1_mask &= rb_mask; 3718 if (!rb0_mask || !rb1_mask) { 3719 raster_config_se &= ~RB_MAP_PKR1_MASK; 3720 3721 if (!rb0_mask) { 3722 raster_config_se |= 3723 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3724 } else { 3725 raster_config_se |= 3726 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3727 } 3728 } 3729 } 3730 } 3731 3732 /* GRBM_GFX_INDEX has a different offset on VI */ 3733 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3734 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3735 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3736 } 3737 3738 /* GRBM_GFX_INDEX has a different offset on VI */ 3739 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3740 } 3741 3742 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3743 { 3744 int i, j; 3745 u32 data; 3746 u32 raster_config = 0, raster_config_1 = 0; 3747 u32 active_rbs = 0; 3748 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3749 adev->gfx.config.max_sh_per_se; 3750 unsigned num_rb_pipes; 3751 3752 mutex_lock(&adev->grbm_idx_mutex); 3753 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3754 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3755 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3756 data = gfx_v8_0_get_rb_active_bitmap(adev); 3757 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3758 rb_bitmap_width_per_sh); 3759 } 3760 } 3761 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3762 3763 adev->gfx.config.backend_enable_mask = active_rbs; 3764 adev->gfx.config.num_rbs = hweight32(active_rbs); 3765 3766 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3767 adev->gfx.config.max_shader_engines, 16); 3768 3769 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3770 3771 if (!adev->gfx.config.backend_enable_mask || 3772 adev->gfx.config.num_rbs >= num_rb_pipes) { 3773 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3774 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3775 } else { 3776 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3777 adev->gfx.config.backend_enable_mask, 3778 num_rb_pipes); 3779 } 3780 3781 /* cache the values for userspace */ 3782 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3783 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3784 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3785 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3786 RREG32(mmCC_RB_BACKEND_DISABLE); 3787 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3788 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3789 adev->gfx.config.rb_config[i][j].raster_config = 3790 RREG32(mmPA_SC_RASTER_CONFIG); 3791 adev->gfx.config.rb_config[i][j].raster_config_1 = 3792 RREG32(mmPA_SC_RASTER_CONFIG_1); 3793 } 3794 } 3795 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3796 mutex_unlock(&adev->grbm_idx_mutex); 3797 } 3798 3799 /** 3800 * gfx_v8_0_init_compute_vmid - gart enable 3801 * 3802 * @adev: amdgpu_device pointer 3803 * 3804 * Initialize compute vmid sh_mem registers 3805 * 3806 */ 3807 #define DEFAULT_SH_MEM_BASES (0x6000) 3808 #define FIRST_COMPUTE_VMID (8) 3809 #define LAST_COMPUTE_VMID (16) 3810 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3811 { 3812 int i; 3813 uint32_t sh_mem_config; 3814 uint32_t sh_mem_bases; 3815 3816 /* 3817 * Configure apertures: 3818 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3819 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3820 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3821 */ 3822 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3823 3824 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3825 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3826 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3827 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3828 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3829 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3830 3831 mutex_lock(&adev->srbm_mutex); 3832 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3833 vi_srbm_select(adev, 0, 0, 0, i); 3834 /* CP and shaders */ 3835 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3836 WREG32(mmSH_MEM_APE1_BASE, 1); 3837 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3838 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3839 } 3840 vi_srbm_select(adev, 0, 0, 0, 0); 3841 mutex_unlock(&adev->srbm_mutex); 3842 } 3843 3844 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3845 { 3846 switch (adev->asic_type) { 3847 default: 3848 adev->gfx.config.double_offchip_lds_buf = 1; 3849 break; 3850 case CHIP_CARRIZO: 3851 case CHIP_STONEY: 3852 adev->gfx.config.double_offchip_lds_buf = 0; 3853 break; 3854 } 3855 } 3856 3857 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3858 { 3859 u32 tmp, sh_static_mem_cfg; 3860 int i; 3861 3862 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3863 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3864 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3865 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3866 3867 gfx_v8_0_tiling_mode_table_init(adev); 3868 gfx_v8_0_setup_rb(adev); 3869 gfx_v8_0_get_cu_info(adev); 3870 gfx_v8_0_config_init(adev); 3871 3872 /* XXX SH_MEM regs */ 3873 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3874 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3875 SWIZZLE_ENABLE, 1); 3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3877 ELEMENT_SIZE, 1); 3878 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3879 INDEX_STRIDE, 3); 3880 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3881 3882 mutex_lock(&adev->srbm_mutex); 3883 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3884 vi_srbm_select(adev, 0, 0, 0, i); 3885 /* CP and shaders */ 3886 if (i == 0) { 3887 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3888 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3889 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3890 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3891 WREG32(mmSH_MEM_CONFIG, tmp); 3892 WREG32(mmSH_MEM_BASES, 0); 3893 } else { 3894 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3895 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3896 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3897 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3898 WREG32(mmSH_MEM_CONFIG, tmp); 3899 tmp = adev->gmc.shared_aperture_start >> 48; 3900 WREG32(mmSH_MEM_BASES, tmp); 3901 } 3902 3903 WREG32(mmSH_MEM_APE1_BASE, 1); 3904 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3905 } 3906 vi_srbm_select(adev, 0, 0, 0, 0); 3907 mutex_unlock(&adev->srbm_mutex); 3908 3909 gfx_v8_0_init_compute_vmid(adev); 3910 3911 mutex_lock(&adev->grbm_idx_mutex); 3912 /* 3913 * making sure that the following register writes will be broadcasted 3914 * to all the shaders 3915 */ 3916 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3917 3918 WREG32(mmPA_SC_FIFO_SIZE, 3919 (adev->gfx.config.sc_prim_fifo_size_frontend << 3920 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3921 (adev->gfx.config.sc_prim_fifo_size_backend << 3922 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3923 (adev->gfx.config.sc_hiz_tile_fifo_size << 3924 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3925 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3926 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3927 3928 tmp = RREG32(mmSPI_ARB_PRIORITY); 3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3931 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3932 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3933 WREG32(mmSPI_ARB_PRIORITY, tmp); 3934 3935 mutex_unlock(&adev->grbm_idx_mutex); 3936 3937 } 3938 3939 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3940 { 3941 u32 i, j, k; 3942 u32 mask; 3943 3944 mutex_lock(&adev->grbm_idx_mutex); 3945 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3946 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3947 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3948 for (k = 0; k < adev->usec_timeout; k++) { 3949 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3950 break; 3951 udelay(1); 3952 } 3953 if (k == adev->usec_timeout) { 3954 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3955 0xffffffff, 0xffffffff); 3956 mutex_unlock(&adev->grbm_idx_mutex); 3957 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3958 i, j); 3959 return; 3960 } 3961 } 3962 } 3963 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3964 mutex_unlock(&adev->grbm_idx_mutex); 3965 3966 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3967 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3968 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3969 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3970 for (k = 0; k < adev->usec_timeout; k++) { 3971 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3972 break; 3973 udelay(1); 3974 } 3975 } 3976 3977 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3978 bool enable) 3979 { 3980 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3981 3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3986 3987 WREG32(mmCP_INT_CNTL_RING0, tmp); 3988 } 3989 3990 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3991 { 3992 /* csib */ 3993 WREG32(mmRLC_CSIB_ADDR_HI, 3994 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3995 WREG32(mmRLC_CSIB_ADDR_LO, 3996 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3997 WREG32(mmRLC_CSIB_LENGTH, 3998 adev->gfx.rlc.clear_state_size); 3999 } 4000 4001 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4002 int ind_offset, 4003 int list_size, 4004 int *unique_indices, 4005 int *indices_count, 4006 int max_indices, 4007 int *ind_start_offsets, 4008 int *offset_count, 4009 int max_offset) 4010 { 4011 int indices; 4012 bool new_entry = true; 4013 4014 for (; ind_offset < list_size; ind_offset++) { 4015 4016 if (new_entry) { 4017 new_entry = false; 4018 ind_start_offsets[*offset_count] = ind_offset; 4019 *offset_count = *offset_count + 1; 4020 BUG_ON(*offset_count >= max_offset); 4021 } 4022 4023 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4024 new_entry = true; 4025 continue; 4026 } 4027 4028 ind_offset += 2; 4029 4030 /* look for the matching indice */ 4031 for (indices = 0; 4032 indices < *indices_count; 4033 indices++) { 4034 if (unique_indices[indices] == 4035 register_list_format[ind_offset]) 4036 break; 4037 } 4038 4039 if (indices >= *indices_count) { 4040 unique_indices[*indices_count] = 4041 register_list_format[ind_offset]; 4042 indices = *indices_count; 4043 *indices_count = *indices_count + 1; 4044 BUG_ON(*indices_count >= max_indices); 4045 } 4046 4047 register_list_format[ind_offset] = indices; 4048 } 4049 } 4050 4051 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4052 { 4053 int i, temp, data; 4054 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4055 int indices_count = 0; 4056 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4057 int offset_count = 0; 4058 4059 int list_size; 4060 unsigned int *register_list_format = 4061 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, M_DRM, GFP_KERNEL); 4062 if (!register_list_format) 4063 return -ENOMEM; 4064 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4065 adev->gfx.rlc.reg_list_format_size_bytes); 4066 4067 gfx_v8_0_parse_ind_reg_list(register_list_format, 4068 RLC_FormatDirectRegListLength, 4069 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4070 unique_indices, 4071 &indices_count, 4072 ARRAY_SIZE(unique_indices), 4073 indirect_start_offsets, 4074 &offset_count, 4075 ARRAY_SIZE(indirect_start_offsets)); 4076 4077 /* save and restore list */ 4078 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4079 4080 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4081 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4082 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4083 4084 /* indirect list */ 4085 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4086 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4087 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4088 4089 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4090 list_size = list_size >> 1; 4091 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4092 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4093 4094 /* starting offsets starts */ 4095 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4096 adev->gfx.rlc.starting_offsets_start); 4097 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4098 WREG32(mmRLC_GPM_SCRATCH_DATA, 4099 indirect_start_offsets[i]); 4100 4101 /* unique indices */ 4102 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4103 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4104 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4105 if (unique_indices[i] != 0) { 4106 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4107 WREG32(data + i, unique_indices[i] >> 20); 4108 } 4109 } 4110 kfree(register_list_format); 4111 4112 return 0; 4113 } 4114 4115 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4116 { 4117 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4118 } 4119 4120 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4121 { 4122 uint32_t data; 4123 4124 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4125 4126 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4128 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4129 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4130 WREG32(mmRLC_PG_DELAY, data); 4131 4132 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4133 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4134 4135 } 4136 4137 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4138 bool enable) 4139 { 4140 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4141 } 4142 4143 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4144 bool enable) 4145 { 4146 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4147 } 4148 4149 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4150 { 4151 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4152 } 4153 4154 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4155 { 4156 if ((adev->asic_type == CHIP_CARRIZO) || 4157 (adev->asic_type == CHIP_STONEY)) { 4158 gfx_v8_0_init_csb(adev); 4159 gfx_v8_0_init_save_restore_list(adev); 4160 gfx_v8_0_enable_save_restore_machine(adev); 4161 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4162 gfx_v8_0_init_power_gating(adev); 4163 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4164 } else if ((adev->asic_type == CHIP_POLARIS11) || 4165 (adev->asic_type == CHIP_POLARIS12) || 4166 (adev->asic_type == CHIP_VEGAM)) { 4167 gfx_v8_0_init_csb(adev); 4168 gfx_v8_0_init_save_restore_list(adev); 4169 gfx_v8_0_enable_save_restore_machine(adev); 4170 gfx_v8_0_init_power_gating(adev); 4171 } 4172 4173 } 4174 4175 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4176 { 4177 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4178 4179 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4180 gfx_v8_0_wait_for_rlc_serdes(adev); 4181 } 4182 4183 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4184 { 4185 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4186 udelay(50); 4187 4188 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4189 udelay(50); 4190 } 4191 4192 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4193 { 4194 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4195 4196 /* carrizo do enable cp interrupt after cp inited */ 4197 if (!(adev->flags & AMD_IS_APU)) 4198 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4199 4200 udelay(50); 4201 } 4202 4203 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4204 { 4205 const struct rlc_firmware_header_v2_0 *hdr; 4206 const __le32 *fw_data; 4207 unsigned i, fw_size; 4208 4209 if (!adev->gfx.rlc_fw) 4210 return -EINVAL; 4211 4212 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4213 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4214 4215 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4216 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4217 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4218 4219 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4220 for (i = 0; i < fw_size; i++) 4221 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4222 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4223 4224 return 0; 4225 } 4226 4227 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4228 { 4229 int r; 4230 u32 tmp; 4231 4232 gfx_v8_0_rlc_stop(adev); 4233 4234 /* disable CG */ 4235 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4236 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4237 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4238 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4239 if (adev->asic_type == CHIP_POLARIS11 || 4240 adev->asic_type == CHIP_POLARIS10 || 4241 adev->asic_type == CHIP_POLARIS12 || 4242 adev->asic_type == CHIP_VEGAM) { 4243 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4244 tmp &= ~0x3; 4245 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4246 } 4247 4248 /* disable PG */ 4249 WREG32(mmRLC_PG_CNTL, 0); 4250 4251 gfx_v8_0_rlc_reset(adev); 4252 gfx_v8_0_init_pg(adev); 4253 4254 4255 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4256 /* legacy rlc firmware loading */ 4257 r = gfx_v8_0_rlc_load_microcode(adev); 4258 if (r) 4259 return r; 4260 } 4261 4262 gfx_v8_0_rlc_start(adev); 4263 4264 return 0; 4265 } 4266 4267 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4268 { 4269 int i; 4270 u32 tmp = RREG32(mmCP_ME_CNTL); 4271 4272 if (enable) { 4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4276 } else { 4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4280 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4281 adev->gfx.gfx_ring[i].ready = false; 4282 } 4283 WREG32(mmCP_ME_CNTL, tmp); 4284 udelay(50); 4285 } 4286 4287 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4288 { 4289 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4290 const struct gfx_firmware_header_v1_0 *ce_hdr; 4291 const struct gfx_firmware_header_v1_0 *me_hdr; 4292 const __le32 *fw_data; 4293 unsigned i, fw_size; 4294 4295 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4296 return -EINVAL; 4297 4298 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4299 adev->gfx.pfp_fw->data; 4300 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4301 adev->gfx.ce_fw->data; 4302 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4303 adev->gfx.me_fw->data; 4304 4305 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4306 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4307 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4308 4309 gfx_v8_0_cp_gfx_enable(adev, false); 4310 4311 /* PFP */ 4312 fw_data = (const __le32 *) 4313 (adev->gfx.pfp_fw->data + 4314 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4315 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4316 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4317 for (i = 0; i < fw_size; i++) 4318 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4319 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4320 4321 /* CE */ 4322 fw_data = (const __le32 *) 4323 (adev->gfx.ce_fw->data + 4324 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4325 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4326 WREG32(mmCP_CE_UCODE_ADDR, 0); 4327 for (i = 0; i < fw_size; i++) 4328 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4329 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4330 4331 /* ME */ 4332 fw_data = (const __le32 *) 4333 (adev->gfx.me_fw->data + 4334 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4335 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4336 WREG32(mmCP_ME_RAM_WADDR, 0); 4337 for (i = 0; i < fw_size; i++) 4338 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4339 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4340 4341 return 0; 4342 } 4343 4344 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4345 { 4346 u32 count = 0; 4347 const struct cs_section_def *sect = NULL; 4348 const struct cs_extent_def *ext = NULL; 4349 4350 /* begin clear state */ 4351 count += 2; 4352 /* context control state */ 4353 count += 3; 4354 4355 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4356 for (ext = sect->section; ext->extent != NULL; ++ext) { 4357 if (sect->id == SECT_CONTEXT) 4358 count += 2 + ext->reg_count; 4359 else 4360 return 0; 4361 } 4362 } 4363 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4364 count += 4; 4365 /* end clear state */ 4366 count += 2; 4367 /* clear state */ 4368 count += 2; 4369 4370 return count; 4371 } 4372 4373 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4374 { 4375 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4376 const struct cs_section_def *sect = NULL; 4377 const struct cs_extent_def *ext = NULL; 4378 int r, i; 4379 4380 /* init the CP */ 4381 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4382 WREG32(mmCP_ENDIAN_SWAP, 0); 4383 WREG32(mmCP_DEVICE_ID, 1); 4384 4385 gfx_v8_0_cp_gfx_enable(adev, true); 4386 4387 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4388 if (r) { 4389 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4390 return r; 4391 } 4392 4393 /* clear state buffer */ 4394 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4395 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4396 4397 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4398 amdgpu_ring_write(ring, 0x80000000); 4399 amdgpu_ring_write(ring, 0x80000000); 4400 4401 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4402 for (ext = sect->section; ext->extent != NULL; ++ext) { 4403 if (sect->id == SECT_CONTEXT) { 4404 amdgpu_ring_write(ring, 4405 PACKET3(PACKET3_SET_CONTEXT_REG, 4406 ext->reg_count)); 4407 amdgpu_ring_write(ring, 4408 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4409 for (i = 0; i < ext->reg_count; i++) 4410 amdgpu_ring_write(ring, ext->extent[i]); 4411 } 4412 } 4413 } 4414 4415 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4416 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4417 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4418 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4419 4420 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4421 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4422 4423 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4424 amdgpu_ring_write(ring, 0); 4425 4426 /* init the CE partitions */ 4427 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4428 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4429 amdgpu_ring_write(ring, 0x8000); 4430 amdgpu_ring_write(ring, 0x8000); 4431 4432 amdgpu_ring_commit(ring); 4433 4434 return 0; 4435 } 4436 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4437 { 4438 u32 tmp; 4439 /* no gfx doorbells on iceland */ 4440 if (adev->asic_type == CHIP_TOPAZ) 4441 return; 4442 4443 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4444 4445 if (ring->use_doorbell) { 4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4447 DOORBELL_OFFSET, ring->doorbell_index); 4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4449 DOORBELL_HIT, 0); 4450 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4451 DOORBELL_EN, 1); 4452 } else { 4453 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4454 } 4455 4456 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4457 4458 if (adev->flags & AMD_IS_APU) 4459 return; 4460 4461 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4462 DOORBELL_RANGE_LOWER, 4463 AMDGPU_DOORBELL_GFX_RING0); 4464 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4465 4466 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4467 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4468 } 4469 4470 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4471 { 4472 struct amdgpu_ring *ring; 4473 u32 tmp; 4474 u32 rb_bufsz; 4475 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4476 int r; 4477 4478 /* Set the write pointer delay */ 4479 WREG32(mmCP_RB_WPTR_DELAY, 0); 4480 4481 /* set the RB to use vmid 0 */ 4482 WREG32(mmCP_RB_VMID, 0); 4483 4484 /* Set ring buffer size */ 4485 ring = &adev->gfx.gfx_ring[0]; 4486 rb_bufsz = order_base_2(ring->ring_size / 8); 4487 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4489 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4491 #ifdef __BIG_ENDIAN 4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4493 #endif 4494 WREG32(mmCP_RB0_CNTL, tmp); 4495 4496 /* Initialize the ring buffer's read and write pointers */ 4497 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4498 ring->wptr = 0; 4499 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4500 4501 /* set the wb address wether it's enabled or not */ 4502 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4503 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4504 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4505 4506 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4507 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4508 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4509 mdelay(1); 4510 WREG32(mmCP_RB0_CNTL, tmp); 4511 4512 rb_addr = ring->gpu_addr >> 8; 4513 WREG32(mmCP_RB0_BASE, rb_addr); 4514 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4515 4516 gfx_v8_0_set_cpg_door_bell(adev, ring); 4517 /* start the ring */ 4518 amdgpu_ring_clear_ring(ring); 4519 gfx_v8_0_cp_gfx_start(adev); 4520 ring->ready = true; 4521 r = amdgpu_ring_test_ring(ring); 4522 if (r) 4523 ring->ready = false; 4524 4525 return r; 4526 } 4527 4528 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4529 { 4530 int i; 4531 4532 if (enable) { 4533 WREG32(mmCP_MEC_CNTL, 0); 4534 } else { 4535 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4536 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4537 adev->gfx.compute_ring[i].ready = false; 4538 adev->gfx.kiq.ring.ready = false; 4539 } 4540 udelay(50); 4541 } 4542 4543 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4544 { 4545 const struct gfx_firmware_header_v1_0 *mec_hdr; 4546 const __le32 *fw_data; 4547 unsigned i, fw_size; 4548 4549 if (!adev->gfx.mec_fw) 4550 return -EINVAL; 4551 4552 gfx_v8_0_cp_compute_enable(adev, false); 4553 4554 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4555 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4556 4557 fw_data = (const __le32 *) 4558 (adev->gfx.mec_fw->data + 4559 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4560 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4561 4562 /* MEC1 */ 4563 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4564 for (i = 0; i < fw_size; i++) 4565 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4566 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4567 4568 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4569 if (adev->gfx.mec2_fw) { 4570 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4571 4572 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4573 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4574 4575 fw_data = (const __le32 *) 4576 (adev->gfx.mec2_fw->data + 4577 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4578 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4579 4580 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4581 for (i = 0; i < fw_size; i++) 4582 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4583 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4584 } 4585 4586 return 0; 4587 } 4588 4589 /* KIQ functions */ 4590 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4591 { 4592 uint32_t tmp; 4593 struct amdgpu_device *adev = ring->adev; 4594 4595 /* tell RLC which is KIQ queue */ 4596 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4597 tmp &= 0xffffff00; 4598 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4599 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4600 tmp |= 0x80; 4601 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4602 } 4603 4604 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4605 { 4606 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4607 uint32_t scratch, tmp = 0; 4608 uint64_t queue_mask = 0; 4609 int r, i; 4610 4611 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4612 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4613 continue; 4614 4615 /* This situation may be hit in the future if a new HW 4616 * generation exposes more than 64 queues. If so, the 4617 * definition of queue_mask needs updating */ 4618 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4619 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4620 break; 4621 } 4622 4623 queue_mask |= (1ull << i); 4624 } 4625 4626 r = amdgpu_gfx_scratch_get(adev, &scratch); 4627 if (r) { 4628 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4629 return r; 4630 } 4631 WREG32(scratch, 0xCAFEDEAD); 4632 4633 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4634 if (r) { 4635 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4636 amdgpu_gfx_scratch_free(adev, scratch); 4637 return r; 4638 } 4639 /* set resources */ 4640 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4641 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4642 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4643 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4644 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4645 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4646 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4647 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4648 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4649 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4650 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4651 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4652 4653 /* map queues */ 4654 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4655 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4656 amdgpu_ring_write(kiq_ring, 4657 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4658 amdgpu_ring_write(kiq_ring, 4659 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4660 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4661 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4662 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4663 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4664 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4665 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4666 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4667 } 4668 /* write to scratch for completion */ 4669 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4670 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4671 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4672 amdgpu_ring_commit(kiq_ring); 4673 4674 for (i = 0; i < adev->usec_timeout; i++) { 4675 tmp = RREG32(scratch); 4676 if (tmp == 0xDEADBEEF) 4677 break; 4678 DRM_UDELAY(1); 4679 } 4680 if (i >= adev->usec_timeout) { 4681 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4682 scratch, tmp); 4683 r = -EINVAL; 4684 } 4685 amdgpu_gfx_scratch_free(adev, scratch); 4686 4687 return r; 4688 } 4689 4690 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4691 { 4692 int i, r = 0; 4693 4694 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4695 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4696 for (i = 0; i < adev->usec_timeout; i++) { 4697 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4698 break; 4699 udelay(1); 4700 } 4701 if (i == adev->usec_timeout) 4702 r = -ETIMEDOUT; 4703 } 4704 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4705 WREG32(mmCP_HQD_PQ_RPTR, 0); 4706 WREG32(mmCP_HQD_PQ_WPTR, 0); 4707 4708 return r; 4709 } 4710 4711 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4712 { 4713 struct amdgpu_device *adev = ring->adev; 4714 struct vi_mqd *mqd = ring->mqd_ptr; 4715 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4716 uint32_t tmp; 4717 4718 mqd->header = 0xC0310800; 4719 mqd->compute_pipelinestat_enable = 0x00000001; 4720 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4721 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4722 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4723 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4724 mqd->compute_misc_reserved = 0x00000003; 4725 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4726 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4727 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4728 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4729 eop_base_addr = ring->eop_gpu_addr >> 8; 4730 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4731 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4732 4733 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4734 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4735 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4736 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4737 4738 mqd->cp_hqd_eop_control = tmp; 4739 4740 /* enable doorbell? */ 4741 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4742 CP_HQD_PQ_DOORBELL_CONTROL, 4743 DOORBELL_EN, 4744 ring->use_doorbell ? 1 : 0); 4745 4746 mqd->cp_hqd_pq_doorbell_control = tmp; 4747 4748 /* set the pointer to the MQD */ 4749 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4750 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4751 4752 /* set MQD vmid to 0 */ 4753 tmp = RREG32(mmCP_MQD_CONTROL); 4754 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4755 mqd->cp_mqd_control = tmp; 4756 4757 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4758 hqd_gpu_addr = ring->gpu_addr >> 8; 4759 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4760 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4761 4762 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4763 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4765 (order_base_2(ring->ring_size / 4) - 1)); 4766 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4767 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4768 #ifdef __BIG_ENDIAN 4769 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4770 #endif 4771 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4774 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4775 mqd->cp_hqd_pq_control = tmp; 4776 4777 /* set the wb address whether it's enabled or not */ 4778 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4779 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4780 mqd->cp_hqd_pq_rptr_report_addr_hi = 4781 upper_32_bits(wb_gpu_addr) & 0xffff; 4782 4783 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4784 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4785 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4786 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4787 4788 tmp = 0; 4789 /* enable the doorbell if requested */ 4790 if (ring->use_doorbell) { 4791 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4792 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4793 DOORBELL_OFFSET, ring->doorbell_index); 4794 4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4796 DOORBELL_EN, 1); 4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4798 DOORBELL_SOURCE, 0); 4799 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4800 DOORBELL_HIT, 0); 4801 } 4802 4803 mqd->cp_hqd_pq_doorbell_control = tmp; 4804 4805 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4806 ring->wptr = 0; 4807 mqd->cp_hqd_pq_wptr = ring->wptr; 4808 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4809 4810 /* set the vmid for the queue */ 4811 mqd->cp_hqd_vmid = 0; 4812 4813 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4814 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4815 mqd->cp_hqd_persistent_state = tmp; 4816 4817 /* set MTYPE */ 4818 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4819 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4820 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4821 mqd->cp_hqd_ib_control = tmp; 4822 4823 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4824 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4825 mqd->cp_hqd_iq_timer = tmp; 4826 4827 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4828 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4829 mqd->cp_hqd_ctx_save_control = tmp; 4830 4831 /* defaults */ 4832 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4833 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4834 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4835 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4836 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4837 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4838 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4839 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4840 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4841 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4842 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4843 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4844 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4845 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4846 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4847 4848 /* activate the queue */ 4849 mqd->cp_hqd_active = 1; 4850 4851 return 0; 4852 } 4853 4854 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4855 struct vi_mqd *mqd); 4856 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4857 struct vi_mqd *mqd) 4858 { 4859 uint32_t mqd_reg; 4860 uint32_t *mqd_data; 4861 4862 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4863 mqd_data = &mqd->cp_mqd_base_addr_lo; 4864 4865 /* disable wptr polling */ 4866 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4867 4868 /* program all HQD registers */ 4869 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4870 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4871 4872 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4873 * This is safe since EOP RPTR==WPTR for any inactive HQD 4874 * on ASICs that do not support context-save. 4875 * EOP writes/reads can start anywhere in the ring. 4876 */ 4877 if (adev->asic_type != CHIP_TONGA) { 4878 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4879 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4880 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4881 } 4882 4883 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4884 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4885 4886 /* activate the HQD */ 4887 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4888 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4889 4890 return 0; 4891 } 4892 4893 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4894 { 4895 struct amdgpu_device *adev = ring->adev; 4896 struct vi_mqd *mqd = ring->mqd_ptr; 4897 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4898 4899 gfx_v8_0_kiq_setting(ring); 4900 4901 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4902 /* reset MQD to a clean status */ 4903 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4904 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4905 4906 /* reset ring buffer */ 4907 ring->wptr = 0; 4908 amdgpu_ring_clear_ring(ring); 4909 mutex_lock(&adev->srbm_mutex); 4910 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4911 gfx_v8_0_mqd_commit(adev, mqd); 4912 vi_srbm_select(adev, 0, 0, 0, 0); 4913 mutex_unlock(&adev->srbm_mutex); 4914 } else { 4915 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4916 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4917 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4918 mutex_lock(&adev->srbm_mutex); 4919 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4920 gfx_v8_0_mqd_init(ring); 4921 gfx_v8_0_mqd_commit(adev, mqd); 4922 vi_srbm_select(adev, 0, 0, 0, 0); 4923 mutex_unlock(&adev->srbm_mutex); 4924 4925 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4926 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4927 } 4928 4929 return 0; 4930 } 4931 4932 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4933 { 4934 struct amdgpu_device *adev = ring->adev; 4935 struct vi_mqd *mqd = ring->mqd_ptr; 4936 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4937 4938 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4939 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4940 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4941 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4942 mutex_lock(&adev->srbm_mutex); 4943 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4944 gfx_v8_0_mqd_init(ring); 4945 vi_srbm_select(adev, 0, 0, 0, 0); 4946 mutex_unlock(&adev->srbm_mutex); 4947 4948 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4949 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4950 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4951 /* reset MQD to a clean status */ 4952 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4953 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4954 /* reset ring buffer */ 4955 ring->wptr = 0; 4956 amdgpu_ring_clear_ring(ring); 4957 } else { 4958 amdgpu_ring_clear_ring(ring); 4959 } 4960 return 0; 4961 } 4962 4963 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4964 { 4965 if (adev->asic_type > CHIP_TONGA) { 4966 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4967 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4968 } 4969 /* enable doorbells */ 4970 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4971 } 4972 4973 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4974 { 4975 struct amdgpu_ring *ring = NULL; 4976 int r = 0, i; 4977 4978 gfx_v8_0_cp_compute_enable(adev, true); 4979 4980 ring = &adev->gfx.kiq.ring; 4981 4982 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4983 if (unlikely(r != 0)) 4984 goto done; 4985 4986 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4987 if (!r) { 4988 r = gfx_v8_0_kiq_init_queue(ring); 4989 amdgpu_bo_kunmap(ring->mqd_obj); 4990 ring->mqd_ptr = NULL; 4991 } 4992 amdgpu_bo_unreserve(ring->mqd_obj); 4993 if (r) 4994 goto done; 4995 4996 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4997 ring = &adev->gfx.compute_ring[i]; 4998 4999 r = amdgpu_bo_reserve(ring->mqd_obj, false); 5000 if (unlikely(r != 0)) 5001 goto done; 5002 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 5003 if (!r) { 5004 r = gfx_v8_0_kcq_init_queue(ring); 5005 amdgpu_bo_kunmap(ring->mqd_obj); 5006 ring->mqd_ptr = NULL; 5007 } 5008 amdgpu_bo_unreserve(ring->mqd_obj); 5009 if (r) 5010 goto done; 5011 } 5012 5013 gfx_v8_0_set_mec_doorbell_range(adev); 5014 5015 r = gfx_v8_0_kiq_kcq_enable(adev); 5016 if (r) 5017 goto done; 5018 5019 /* Test KIQ */ 5020 ring = &adev->gfx.kiq.ring; 5021 ring->ready = true; 5022 r = amdgpu_ring_test_ring(ring); 5023 if (r) { 5024 ring->ready = false; 5025 goto done; 5026 } 5027 5028 /* Test KCQs */ 5029 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5030 ring = &adev->gfx.compute_ring[i]; 5031 ring->ready = true; 5032 r = amdgpu_ring_test_ring(ring); 5033 if (r) 5034 ring->ready = false; 5035 } 5036 5037 done: 5038 return r; 5039 } 5040 5041 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5042 { 5043 int r; 5044 5045 if (!(adev->flags & AMD_IS_APU)) 5046 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5047 5048 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 5049 /* legacy firmware loading */ 5050 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5051 if (r) 5052 return r; 5053 5054 r = gfx_v8_0_cp_compute_load_microcode(adev); 5055 if (r) 5056 return r; 5057 } 5058 5059 r = gfx_v8_0_cp_gfx_resume(adev); 5060 if (r) 5061 return r; 5062 5063 r = gfx_v8_0_kiq_resume(adev); 5064 if (r) 5065 return r; 5066 5067 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5068 5069 return 0; 5070 } 5071 5072 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5073 { 5074 gfx_v8_0_cp_gfx_enable(adev, enable); 5075 gfx_v8_0_cp_compute_enable(adev, enable); 5076 } 5077 5078 static int gfx_v8_0_hw_init(void *handle) 5079 { 5080 int r; 5081 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5082 5083 gfx_v8_0_init_golden_registers(adev); 5084 gfx_v8_0_gpu_init(adev); 5085 5086 r = gfx_v8_0_rlc_resume(adev); 5087 if (r) 5088 return r; 5089 5090 r = gfx_v8_0_cp_resume(adev); 5091 5092 return r; 5093 } 5094 5095 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 5096 { 5097 struct amdgpu_device *adev = kiq_ring->adev; 5098 uint32_t scratch, tmp = 0; 5099 int r, i; 5100 5101 r = amdgpu_gfx_scratch_get(adev, &scratch); 5102 if (r) { 5103 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5104 return r; 5105 } 5106 WREG32(scratch, 0xCAFEDEAD); 5107 5108 r = amdgpu_ring_alloc(kiq_ring, 10); 5109 if (r) { 5110 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5111 amdgpu_gfx_scratch_free(adev, scratch); 5112 return r; 5113 } 5114 5115 /* unmap queues */ 5116 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5117 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5118 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5119 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5120 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5121 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5122 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5123 amdgpu_ring_write(kiq_ring, 0); 5124 amdgpu_ring_write(kiq_ring, 0); 5125 amdgpu_ring_write(kiq_ring, 0); 5126 /* write to scratch for completion */ 5127 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5128 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5129 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5130 amdgpu_ring_commit(kiq_ring); 5131 5132 for (i = 0; i < adev->usec_timeout; i++) { 5133 tmp = RREG32(scratch); 5134 if (tmp == 0xDEADBEEF) 5135 break; 5136 DRM_UDELAY(1); 5137 } 5138 if (i >= adev->usec_timeout) { 5139 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5140 r = -EINVAL; 5141 } 5142 amdgpu_gfx_scratch_free(adev, scratch); 5143 return r; 5144 } 5145 5146 static int gfx_v8_0_hw_fini(void *handle) 5147 { 5148 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5149 int i; 5150 5151 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5152 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5153 5154 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 5155 5156 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 5157 5158 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5159 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5160 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5161 5162 if (amdgpu_sriov_vf(adev)) { 5163 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5164 return 0; 5165 } 5166 gfx_v8_0_cp_enable(adev, false); 5167 gfx_v8_0_rlc_stop(adev); 5168 5169 amdgpu_device_ip_set_powergating_state(adev, 5170 AMD_IP_BLOCK_TYPE_GFX, 5171 AMD_PG_STATE_UNGATE); 5172 5173 return 0; 5174 } 5175 5176 static int gfx_v8_0_suspend(void *handle) 5177 { 5178 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5179 adev->gfx.in_suspend = true; 5180 return gfx_v8_0_hw_fini(adev); 5181 } 5182 5183 static int gfx_v8_0_resume(void *handle) 5184 { 5185 int r; 5186 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5187 5188 r = gfx_v8_0_hw_init(adev); 5189 adev->gfx.in_suspend = false; 5190 return r; 5191 } 5192 5193 static bool gfx_v8_0_is_idle(void *handle) 5194 { 5195 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5196 5197 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5198 return false; 5199 else 5200 return true; 5201 } 5202 5203 static int gfx_v8_0_wait_for_idle(void *handle) 5204 { 5205 unsigned i; 5206 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5207 5208 for (i = 0; i < adev->usec_timeout; i++) { 5209 if (gfx_v8_0_is_idle(handle)) 5210 return 0; 5211 5212 udelay(1); 5213 } 5214 return -ETIMEDOUT; 5215 } 5216 5217 static bool gfx_v8_0_check_soft_reset(void *handle) 5218 { 5219 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5220 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5221 u32 tmp; 5222 5223 /* GRBM_STATUS */ 5224 tmp = RREG32(mmGRBM_STATUS); 5225 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5226 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5227 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5228 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5229 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5230 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5231 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5232 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5233 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5234 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5235 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5236 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5237 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5238 } 5239 5240 /* GRBM_STATUS2 */ 5241 tmp = RREG32(mmGRBM_STATUS2); 5242 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5243 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5244 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5245 5246 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5247 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5248 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5249 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5250 SOFT_RESET_CPF, 1); 5251 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5252 SOFT_RESET_CPC, 1); 5253 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5254 SOFT_RESET_CPG, 1); 5255 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5256 SOFT_RESET_GRBM, 1); 5257 } 5258 5259 /* SRBM_STATUS */ 5260 tmp = RREG32(mmSRBM_STATUS); 5261 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5262 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5263 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5264 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5265 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5266 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5267 5268 if (grbm_soft_reset || srbm_soft_reset) { 5269 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5270 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5271 return true; 5272 } else { 5273 adev->gfx.grbm_soft_reset = 0; 5274 adev->gfx.srbm_soft_reset = 0; 5275 return false; 5276 } 5277 } 5278 5279 static int gfx_v8_0_pre_soft_reset(void *handle) 5280 { 5281 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5282 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5283 5284 if ((!adev->gfx.grbm_soft_reset) && 5285 (!adev->gfx.srbm_soft_reset)) 5286 return 0; 5287 5288 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5289 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5290 5291 /* stop the rlc */ 5292 gfx_v8_0_rlc_stop(adev); 5293 5294 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5295 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5296 /* Disable GFX parsing/prefetching */ 5297 gfx_v8_0_cp_gfx_enable(adev, false); 5298 5299 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5300 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5301 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5302 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5303 int i; 5304 5305 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5306 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5307 5308 mutex_lock(&adev->srbm_mutex); 5309 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5310 gfx_v8_0_deactivate_hqd(adev, 2); 5311 vi_srbm_select(adev, 0, 0, 0, 0); 5312 mutex_unlock(&adev->srbm_mutex); 5313 } 5314 /* Disable MEC parsing/prefetching */ 5315 gfx_v8_0_cp_compute_enable(adev, false); 5316 } 5317 5318 return 0; 5319 } 5320 5321 static int gfx_v8_0_soft_reset(void *handle) 5322 { 5323 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5324 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5325 u32 tmp; 5326 5327 if ((!adev->gfx.grbm_soft_reset) && 5328 (!adev->gfx.srbm_soft_reset)) 5329 return 0; 5330 5331 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5332 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5333 5334 if (grbm_soft_reset || srbm_soft_reset) { 5335 tmp = RREG32(mmGMCON_DEBUG); 5336 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5337 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5338 WREG32(mmGMCON_DEBUG, tmp); 5339 udelay(50); 5340 } 5341 5342 if (grbm_soft_reset) { 5343 tmp = RREG32(mmGRBM_SOFT_RESET); 5344 tmp |= grbm_soft_reset; 5345 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5346 WREG32(mmGRBM_SOFT_RESET, tmp); 5347 tmp = RREG32(mmGRBM_SOFT_RESET); 5348 5349 udelay(50); 5350 5351 tmp &= ~grbm_soft_reset; 5352 WREG32(mmGRBM_SOFT_RESET, tmp); 5353 tmp = RREG32(mmGRBM_SOFT_RESET); 5354 } 5355 5356 if (srbm_soft_reset) { 5357 tmp = RREG32(mmSRBM_SOFT_RESET); 5358 tmp |= srbm_soft_reset; 5359 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5360 WREG32(mmSRBM_SOFT_RESET, tmp); 5361 tmp = RREG32(mmSRBM_SOFT_RESET); 5362 5363 udelay(50); 5364 5365 tmp &= ~srbm_soft_reset; 5366 WREG32(mmSRBM_SOFT_RESET, tmp); 5367 tmp = RREG32(mmSRBM_SOFT_RESET); 5368 } 5369 5370 if (grbm_soft_reset || srbm_soft_reset) { 5371 tmp = RREG32(mmGMCON_DEBUG); 5372 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5373 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5374 WREG32(mmGMCON_DEBUG, tmp); 5375 } 5376 5377 /* Wait a little for things to settle down */ 5378 udelay(50); 5379 5380 return 0; 5381 } 5382 5383 static int gfx_v8_0_post_soft_reset(void *handle) 5384 { 5385 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5386 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5387 5388 if ((!adev->gfx.grbm_soft_reset) && 5389 (!adev->gfx.srbm_soft_reset)) 5390 return 0; 5391 5392 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5393 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5394 5395 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5396 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5397 gfx_v8_0_cp_gfx_resume(adev); 5398 5399 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5400 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5401 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5402 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5403 int i; 5404 5405 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5406 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5407 5408 mutex_lock(&adev->srbm_mutex); 5409 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5410 gfx_v8_0_deactivate_hqd(adev, 2); 5411 vi_srbm_select(adev, 0, 0, 0, 0); 5412 mutex_unlock(&adev->srbm_mutex); 5413 } 5414 gfx_v8_0_kiq_resume(adev); 5415 } 5416 gfx_v8_0_rlc_start(adev); 5417 5418 return 0; 5419 } 5420 5421 /** 5422 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5423 * 5424 * @adev: amdgpu_device pointer 5425 * 5426 * Fetches a GPU clock counter snapshot. 5427 * Returns the 64 bit clock counter snapshot. 5428 */ 5429 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5430 { 5431 uint64_t clock; 5432 5433 mutex_lock(&adev->gfx.gpu_clock_mutex); 5434 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5435 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5436 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5437 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5438 return clock; 5439 } 5440 5441 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5442 uint32_t vmid, 5443 uint32_t gds_base, uint32_t gds_size, 5444 uint32_t gws_base, uint32_t gws_size, 5445 uint32_t oa_base, uint32_t oa_size) 5446 { 5447 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5448 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5449 5450 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5451 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5452 5453 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5454 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5455 5456 /* GDS Base */ 5457 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5458 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5459 WRITE_DATA_DST_SEL(0))); 5460 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5461 amdgpu_ring_write(ring, 0); 5462 amdgpu_ring_write(ring, gds_base); 5463 5464 /* GDS Size */ 5465 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5466 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5467 WRITE_DATA_DST_SEL(0))); 5468 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5469 amdgpu_ring_write(ring, 0); 5470 amdgpu_ring_write(ring, gds_size); 5471 5472 /* GWS */ 5473 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5474 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5475 WRITE_DATA_DST_SEL(0))); 5476 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5477 amdgpu_ring_write(ring, 0); 5478 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5479 5480 /* OA */ 5481 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5482 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5483 WRITE_DATA_DST_SEL(0))); 5484 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5485 amdgpu_ring_write(ring, 0); 5486 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5487 } 5488 5489 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5490 { 5491 WREG32(mmSQ_IND_INDEX, 5492 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5493 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5494 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5495 (SQ_IND_INDEX__FORCE_READ_MASK)); 5496 return RREG32(mmSQ_IND_DATA); 5497 } 5498 5499 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5500 uint32_t wave, uint32_t thread, 5501 uint32_t regno, uint32_t num, uint32_t *out) 5502 { 5503 WREG32(mmSQ_IND_INDEX, 5504 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5505 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5506 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5507 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5508 (SQ_IND_INDEX__FORCE_READ_MASK) | 5509 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5510 while (num--) 5511 *(out++) = RREG32(mmSQ_IND_DATA); 5512 } 5513 5514 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5515 { 5516 /* type 0 wave data */ 5517 dst[(*no_fields)++] = 0; 5518 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5532 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5533 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5534 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5535 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5536 } 5537 5538 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5539 uint32_t wave, uint32_t start, 5540 uint32_t size, uint32_t *dst) 5541 { 5542 wave_read_regs( 5543 adev, simd, wave, 0, 5544 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5545 } 5546 5547 5548 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5549 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5550 .select_se_sh = &gfx_v8_0_select_se_sh, 5551 .read_wave_data = &gfx_v8_0_read_wave_data, 5552 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5553 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5554 }; 5555 5556 static int gfx_v8_0_early_init(void *handle) 5557 { 5558 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5559 5560 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5561 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5562 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5563 gfx_v8_0_set_ring_funcs(adev); 5564 gfx_v8_0_set_irq_funcs(adev); 5565 gfx_v8_0_set_gds_init(adev); 5566 gfx_v8_0_set_rlc_funcs(adev); 5567 5568 return 0; 5569 } 5570 5571 static int gfx_v8_0_late_init(void *handle) 5572 { 5573 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5574 int r; 5575 5576 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5577 if (r) 5578 return r; 5579 5580 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5581 if (r) 5582 return r; 5583 5584 /* requires IBs so do in late init after IB pool is initialized */ 5585 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5586 if (r) 5587 return r; 5588 5589 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5590 if (r) { 5591 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5592 return r; 5593 } 5594 5595 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5596 if (r) { 5597 DRM_ERROR( 5598 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5599 r); 5600 return r; 5601 } 5602 5603 return 0; 5604 } 5605 5606 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5607 bool enable) 5608 { 5609 if (((adev->asic_type == CHIP_POLARIS11) || 5610 (adev->asic_type == CHIP_POLARIS12) || 5611 (adev->asic_type == CHIP_VEGAM)) && 5612 adev->powerplay.pp_funcs->set_powergating_by_smu) 5613 /* Send msg to SMU via Powerplay */ 5614 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5615 5616 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5617 } 5618 5619 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5620 bool enable) 5621 { 5622 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5623 } 5624 5625 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5626 bool enable) 5627 { 5628 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5629 } 5630 5631 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5632 bool enable) 5633 { 5634 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5635 } 5636 5637 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5638 bool enable) 5639 { 5640 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5641 5642 /* Read any GFX register to wake up GFX. */ 5643 if (!enable) 5644 RREG32(mmDB_RENDER_CONTROL); 5645 } 5646 5647 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5648 bool enable) 5649 { 5650 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5651 cz_enable_gfx_cg_power_gating(adev, true); 5652 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5653 cz_enable_gfx_pipeline_power_gating(adev, true); 5654 } else { 5655 cz_enable_gfx_cg_power_gating(adev, false); 5656 cz_enable_gfx_pipeline_power_gating(adev, false); 5657 } 5658 } 5659 5660 static int gfx_v8_0_set_powergating_state(void *handle, 5661 enum amd_powergating_state state) 5662 { 5663 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5664 bool enable = (state == AMD_PG_STATE_GATE); 5665 5666 if (amdgpu_sriov_vf(adev)) 5667 return 0; 5668 5669 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5670 AMD_PG_SUPPORT_RLC_SMU_HS | 5671 AMD_PG_SUPPORT_CP | 5672 AMD_PG_SUPPORT_GFX_DMG)) 5673 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5674 switch (adev->asic_type) { 5675 case CHIP_CARRIZO: 5676 case CHIP_STONEY: 5677 5678 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5679 cz_enable_sck_slow_down_on_power_up(adev, true); 5680 cz_enable_sck_slow_down_on_power_down(adev, true); 5681 } else { 5682 cz_enable_sck_slow_down_on_power_up(adev, false); 5683 cz_enable_sck_slow_down_on_power_down(adev, false); 5684 } 5685 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5686 cz_enable_cp_power_gating(adev, true); 5687 else 5688 cz_enable_cp_power_gating(adev, false); 5689 5690 cz_update_gfx_cg_power_gating(adev, enable); 5691 5692 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5693 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5694 else 5695 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5696 5697 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5698 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5699 else 5700 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5701 break; 5702 case CHIP_POLARIS11: 5703 case CHIP_POLARIS12: 5704 case CHIP_VEGAM: 5705 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5706 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5707 else 5708 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5709 5710 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5711 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5712 else 5713 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5714 5715 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5716 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5717 else 5718 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5719 break; 5720 default: 5721 break; 5722 } 5723 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5724 AMD_PG_SUPPORT_RLC_SMU_HS | 5725 AMD_PG_SUPPORT_CP | 5726 AMD_PG_SUPPORT_GFX_DMG)) 5727 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5728 return 0; 5729 } 5730 5731 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5732 { 5733 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5734 int data; 5735 5736 if (amdgpu_sriov_vf(adev)) 5737 *flags = 0; 5738 5739 /* AMD_CG_SUPPORT_GFX_MGCG */ 5740 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5741 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5742 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5743 5744 /* AMD_CG_SUPPORT_GFX_CGLG */ 5745 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5746 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5747 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5748 5749 /* AMD_CG_SUPPORT_GFX_CGLS */ 5750 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5751 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5752 5753 /* AMD_CG_SUPPORT_GFX_CGTS */ 5754 data = RREG32(mmCGTS_SM_CTRL_REG); 5755 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5756 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5757 5758 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5759 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5760 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5761 5762 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5763 data = RREG32(mmRLC_MEM_SLP_CNTL); 5764 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5765 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5766 5767 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5768 data = RREG32(mmCP_MEM_SLP_CNTL); 5769 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5770 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5771 } 5772 5773 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5774 uint32_t reg_addr, uint32_t cmd) 5775 { 5776 uint32_t data; 5777 5778 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5779 5780 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5781 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5782 5783 data = RREG32(mmRLC_SERDES_WR_CTRL); 5784 if (adev->asic_type == CHIP_STONEY) 5785 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5786 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5787 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5788 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5789 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5790 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5791 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5792 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5793 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5794 else 5795 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5796 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5797 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5798 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5799 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5800 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5801 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5802 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5803 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5804 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5805 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5806 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5807 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5808 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5809 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5810 5811 WREG32(mmRLC_SERDES_WR_CTRL, data); 5812 } 5813 5814 #define MSG_ENTER_RLC_SAFE_MODE 1 5815 #define MSG_EXIT_RLC_SAFE_MODE 0 5816 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5817 #define RLC_GPR_REG2__REQ__SHIFT 0 5818 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5819 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5820 5821 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5822 { 5823 u32 data; 5824 unsigned i; 5825 5826 data = RREG32(mmRLC_CNTL); 5827 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5828 return; 5829 5830 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5831 data |= RLC_SAFE_MODE__CMD_MASK; 5832 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5833 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5834 WREG32(mmRLC_SAFE_MODE, data); 5835 5836 for (i = 0; i < adev->usec_timeout; i++) { 5837 if ((RREG32(mmRLC_GPM_STAT) & 5838 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5839 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5840 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5841 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5842 break; 5843 udelay(1); 5844 } 5845 5846 for (i = 0; i < adev->usec_timeout; i++) { 5847 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5848 break; 5849 udelay(1); 5850 } 5851 adev->gfx.rlc.in_safe_mode = true; 5852 } 5853 } 5854 5855 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5856 { 5857 u32 data = 0; 5858 unsigned i; 5859 5860 data = RREG32(mmRLC_CNTL); 5861 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5862 return; 5863 5864 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5865 if (adev->gfx.rlc.in_safe_mode) { 5866 data |= RLC_SAFE_MODE__CMD_MASK; 5867 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5868 WREG32(mmRLC_SAFE_MODE, data); 5869 adev->gfx.rlc.in_safe_mode = false; 5870 } 5871 } 5872 5873 for (i = 0; i < adev->usec_timeout; i++) { 5874 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5875 break; 5876 udelay(1); 5877 } 5878 } 5879 5880 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5881 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5882 .exit_safe_mode = iceland_exit_rlc_safe_mode 5883 }; 5884 5885 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5886 bool enable) 5887 { 5888 uint32_t temp, data; 5889 5890 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5891 5892 /* It is disabled by HW by default */ 5893 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5894 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5895 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5896 /* 1 - RLC memory Light sleep */ 5897 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5898 5899 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5900 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5901 } 5902 5903 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5904 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5905 if (adev->flags & AMD_IS_APU) 5906 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5907 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5908 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5909 else 5910 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5911 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5912 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5913 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5914 5915 if (temp != data) 5916 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5917 5918 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5919 gfx_v8_0_wait_for_rlc_serdes(adev); 5920 5921 /* 5 - clear mgcg override */ 5922 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5923 5924 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5925 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5926 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5927 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5928 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5929 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5930 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5931 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5932 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5933 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5934 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5935 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5936 if (temp != data) 5937 WREG32(mmCGTS_SM_CTRL_REG, data); 5938 } 5939 udelay(50); 5940 5941 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5942 gfx_v8_0_wait_for_rlc_serdes(adev); 5943 } else { 5944 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5945 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5946 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5947 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5948 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5949 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5950 if (temp != data) 5951 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5952 5953 /* 2 - disable MGLS in RLC */ 5954 data = RREG32(mmRLC_MEM_SLP_CNTL); 5955 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5956 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5957 WREG32(mmRLC_MEM_SLP_CNTL, data); 5958 } 5959 5960 /* 3 - disable MGLS in CP */ 5961 data = RREG32(mmCP_MEM_SLP_CNTL); 5962 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5963 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5964 WREG32(mmCP_MEM_SLP_CNTL, data); 5965 } 5966 5967 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5968 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5969 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5970 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5971 if (temp != data) 5972 WREG32(mmCGTS_SM_CTRL_REG, data); 5973 5974 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5975 gfx_v8_0_wait_for_rlc_serdes(adev); 5976 5977 /* 6 - set mgcg override */ 5978 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5979 5980 udelay(50); 5981 5982 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5983 gfx_v8_0_wait_for_rlc_serdes(adev); 5984 } 5985 5986 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5987 } 5988 5989 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5990 bool enable) 5991 { 5992 uint32_t temp, temp1, data, data1; 5993 5994 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5995 5996 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5997 5998 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5999 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6000 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 6001 if (temp1 != data1) 6002 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6003 6004 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6005 gfx_v8_0_wait_for_rlc_serdes(adev); 6006 6007 /* 2 - clear cgcg override */ 6008 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6009 6010 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6011 gfx_v8_0_wait_for_rlc_serdes(adev); 6012 6013 /* 3 - write cmd to set CGLS */ 6014 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6015 6016 /* 4 - enable cgcg */ 6017 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6018 6019 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6020 /* enable cgls*/ 6021 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6022 6023 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6024 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6025 6026 if (temp1 != data1) 6027 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6028 } else { 6029 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6030 } 6031 6032 if (temp != data) 6033 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6034 6035 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6036 * Cmp_busy/GFX_Idle interrupts 6037 */ 6038 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6039 } else { 6040 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6041 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6042 6043 /* TEST CGCG */ 6044 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6045 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6046 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6047 if (temp1 != data1) 6048 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6049 6050 /* read gfx register to wake up cgcg */ 6051 RREG32(mmCB_CGTT_SCLK_CTRL); 6052 RREG32(mmCB_CGTT_SCLK_CTRL); 6053 RREG32(mmCB_CGTT_SCLK_CTRL); 6054 RREG32(mmCB_CGTT_SCLK_CTRL); 6055 6056 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6057 gfx_v8_0_wait_for_rlc_serdes(adev); 6058 6059 /* write cmd to Set CGCG Overrride */ 6060 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6061 6062 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6063 gfx_v8_0_wait_for_rlc_serdes(adev); 6064 6065 /* write cmd to Clear CGLS */ 6066 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6067 6068 /* disable cgcg, cgls should be disabled too. */ 6069 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6070 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6071 if (temp != data) 6072 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6073 /* enable interrupts again for PG */ 6074 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6075 } 6076 6077 gfx_v8_0_wait_for_rlc_serdes(adev); 6078 6079 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6080 } 6081 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6082 bool enable) 6083 { 6084 if (enable) { 6085 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6086 * === MGCG + MGLS + TS(CG/LS) === 6087 */ 6088 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6089 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6090 } else { 6091 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6092 * === CGCG + CGLS === 6093 */ 6094 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6095 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6096 } 6097 return 0; 6098 } 6099 6100 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6101 enum amd_clockgating_state state) 6102 { 6103 uint32_t msg_id, pp_state = 0; 6104 uint32_t pp_support_state = 0; 6105 6106 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6107 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6108 pp_support_state = PP_STATE_SUPPORT_LS; 6109 pp_state = PP_STATE_LS; 6110 } 6111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6112 pp_support_state |= PP_STATE_SUPPORT_CG; 6113 pp_state |= PP_STATE_CG; 6114 } 6115 if (state == AMD_CG_STATE_UNGATE) 6116 pp_state = 0; 6117 6118 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6119 PP_BLOCK_GFX_CG, 6120 pp_support_state, 6121 pp_state); 6122 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6123 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6124 } 6125 6126 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6127 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6128 pp_support_state = PP_STATE_SUPPORT_LS; 6129 pp_state = PP_STATE_LS; 6130 } 6131 6132 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6133 pp_support_state |= PP_STATE_SUPPORT_CG; 6134 pp_state |= PP_STATE_CG; 6135 } 6136 6137 if (state == AMD_CG_STATE_UNGATE) 6138 pp_state = 0; 6139 6140 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6141 PP_BLOCK_GFX_MG, 6142 pp_support_state, 6143 pp_state); 6144 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6145 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6146 } 6147 6148 return 0; 6149 } 6150 6151 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6152 enum amd_clockgating_state state) 6153 { 6154 6155 uint32_t msg_id, pp_state = 0; 6156 uint32_t pp_support_state = 0; 6157 6158 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6159 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6160 pp_support_state = PP_STATE_SUPPORT_LS; 6161 pp_state = PP_STATE_LS; 6162 } 6163 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6164 pp_support_state |= PP_STATE_SUPPORT_CG; 6165 pp_state |= PP_STATE_CG; 6166 } 6167 if (state == AMD_CG_STATE_UNGATE) 6168 pp_state = 0; 6169 6170 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6171 PP_BLOCK_GFX_CG, 6172 pp_support_state, 6173 pp_state); 6174 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6175 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6176 } 6177 6178 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6179 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6180 pp_support_state = PP_STATE_SUPPORT_LS; 6181 pp_state = PP_STATE_LS; 6182 } 6183 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6184 pp_support_state |= PP_STATE_SUPPORT_CG; 6185 pp_state |= PP_STATE_CG; 6186 } 6187 if (state == AMD_CG_STATE_UNGATE) 6188 pp_state = 0; 6189 6190 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6191 PP_BLOCK_GFX_3D, 6192 pp_support_state, 6193 pp_state); 6194 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6195 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6196 } 6197 6198 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6199 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6200 pp_support_state = PP_STATE_SUPPORT_LS; 6201 pp_state = PP_STATE_LS; 6202 } 6203 6204 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6205 pp_support_state |= PP_STATE_SUPPORT_CG; 6206 pp_state |= PP_STATE_CG; 6207 } 6208 6209 if (state == AMD_CG_STATE_UNGATE) 6210 pp_state = 0; 6211 6212 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6213 PP_BLOCK_GFX_MG, 6214 pp_support_state, 6215 pp_state); 6216 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6217 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6218 } 6219 6220 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6221 pp_support_state = PP_STATE_SUPPORT_LS; 6222 6223 if (state == AMD_CG_STATE_UNGATE) 6224 pp_state = 0; 6225 else 6226 pp_state = PP_STATE_LS; 6227 6228 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6229 PP_BLOCK_GFX_RLC, 6230 pp_support_state, 6231 pp_state); 6232 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6233 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6234 } 6235 6236 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6237 pp_support_state = PP_STATE_SUPPORT_LS; 6238 6239 if (state == AMD_CG_STATE_UNGATE) 6240 pp_state = 0; 6241 else 6242 pp_state = PP_STATE_LS; 6243 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6244 PP_BLOCK_GFX_CP, 6245 pp_support_state, 6246 pp_state); 6247 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6248 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6249 } 6250 6251 return 0; 6252 } 6253 6254 static int gfx_v8_0_set_clockgating_state(void *handle, 6255 enum amd_clockgating_state state) 6256 { 6257 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6258 6259 if (amdgpu_sriov_vf(adev)) 6260 return 0; 6261 6262 switch (adev->asic_type) { 6263 case CHIP_FIJI: 6264 case CHIP_CARRIZO: 6265 case CHIP_STONEY: 6266 gfx_v8_0_update_gfx_clock_gating(adev, 6267 state == AMD_CG_STATE_GATE); 6268 break; 6269 case CHIP_TONGA: 6270 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6271 break; 6272 case CHIP_POLARIS10: 6273 case CHIP_POLARIS11: 6274 case CHIP_POLARIS12: 6275 case CHIP_VEGAM: 6276 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6277 break; 6278 default: 6279 break; 6280 } 6281 return 0; 6282 } 6283 6284 static uint64_t gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6285 { 6286 return ring->adev->wb.wb[ring->rptr_offs]; 6287 } 6288 6289 static uint64_t gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6290 { 6291 struct amdgpu_device *adev = ring->adev; 6292 6293 if (ring->use_doorbell) 6294 /* XXX check if swapping is necessary on BE */ 6295 return ring->adev->wb.wb[ring->wptr_offs]; 6296 else 6297 return RREG32(mmCP_RB0_WPTR); 6298 } 6299 6300 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6301 { 6302 struct amdgpu_device *adev = ring->adev; 6303 6304 if (ring->use_doorbell) { 6305 /* XXX check if swapping is necessary on BE */ 6306 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6307 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6308 } else { 6309 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6310 (void)RREG32(mmCP_RB0_WPTR); 6311 } 6312 } 6313 6314 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6315 { 6316 u32 ref_and_mask, reg_mem_engine; 6317 6318 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6319 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6320 switch (ring->me) { 6321 case 1: 6322 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6323 break; 6324 case 2: 6325 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6326 break; 6327 default: 6328 return; 6329 } 6330 reg_mem_engine = 0; 6331 } else { 6332 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6333 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6334 } 6335 6336 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6337 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6338 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6339 reg_mem_engine)); 6340 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6341 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6342 amdgpu_ring_write(ring, ref_and_mask); 6343 amdgpu_ring_write(ring, ref_and_mask); 6344 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6345 } 6346 6347 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6348 { 6349 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6350 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6351 EVENT_INDEX(4)); 6352 6353 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6354 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6355 EVENT_INDEX(0)); 6356 } 6357 6358 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6359 struct amdgpu_ib *ib, 6360 unsigned vmid, bool ctx_switch) 6361 { 6362 u32 header, control = 0; 6363 6364 if (ib->flags & AMDGPU_IB_FLAG_CE) 6365 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6366 else 6367 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6368 6369 control |= ib->length_dw | (vmid << 24); 6370 6371 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6372 control |= INDIRECT_BUFFER_PRE_ENB(1); 6373 6374 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6375 gfx_v8_0_ring_emit_de_meta(ring); 6376 } 6377 6378 amdgpu_ring_write(ring, header); 6379 amdgpu_ring_write(ring, 6380 #ifdef __BIG_ENDIAN 6381 (2 << 0) | 6382 #endif 6383 (ib->gpu_addr & 0xFFFFFFFC)); 6384 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6385 amdgpu_ring_write(ring, control); 6386 } 6387 6388 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6389 struct amdgpu_ib *ib, 6390 unsigned vmid, bool ctx_switch) 6391 { 6392 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6393 6394 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6395 amdgpu_ring_write(ring, 6396 #ifdef __BIG_ENDIAN 6397 (2 << 0) | 6398 #endif 6399 (ib->gpu_addr & 0xFFFFFFFC)); 6400 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6401 amdgpu_ring_write(ring, control); 6402 } 6403 6404 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, uint64_t addr, 6405 uint64_t seq, unsigned flags) 6406 { 6407 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6408 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6409 6410 /* Workaround for cache flush problems. First send a dummy EOP 6411 * event down the pipe with seq one below. 6412 */ 6413 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6414 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6415 EOP_TC_ACTION_EN | 6416 EOP_TC_WB_ACTION_EN | 6417 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6418 EVENT_INDEX(5))); 6419 amdgpu_ring_write(ring, addr & 0xfffffffc); 6420 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6421 DATA_SEL(1) | INT_SEL(0)); 6422 amdgpu_ring_write(ring, lower_32_bits(seq - 1)); 6423 amdgpu_ring_write(ring, upper_32_bits(seq - 1)); 6424 6425 /* Then send the real EOP event down the pipe: 6426 * EVENT_WRITE_EOP - flush caches, send int */ 6427 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6428 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6429 EOP_TC_ACTION_EN | 6430 EOP_TC_WB_ACTION_EN | 6431 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6432 EVENT_INDEX(5))); 6433 amdgpu_ring_write(ring, addr & 0xfffffffc); 6434 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6435 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6436 amdgpu_ring_write(ring, lower_32_bits(seq)); 6437 amdgpu_ring_write(ring, upper_32_bits(seq)); 6438 6439 } 6440 6441 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6442 { 6443 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6444 uint32_t seq = ring->fence_drv.sync_seq; 6445 uint64_t addr = ring->fence_drv.gpu_addr; 6446 6447 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6448 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6449 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6450 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6451 amdgpu_ring_write(ring, addr & 0xfffffffc); 6452 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6453 amdgpu_ring_write(ring, seq); 6454 amdgpu_ring_write(ring, 0xffffffff); 6455 amdgpu_ring_write(ring, 4); /* poll interval */ 6456 } 6457 6458 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6459 unsigned vmid, uint64_t pd_addr) 6460 { 6461 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6462 6463 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6464 6465 /* wait for the invalidate to complete */ 6466 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6467 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6468 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6469 WAIT_REG_MEM_ENGINE(0))); /* me */ 6470 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6471 amdgpu_ring_write(ring, 0); 6472 amdgpu_ring_write(ring, 0); /* ref */ 6473 amdgpu_ring_write(ring, 0); /* mask */ 6474 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6475 6476 /* compute doesn't have PFP */ 6477 if (usepfp) { 6478 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6479 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6480 amdgpu_ring_write(ring, 0x0); 6481 } 6482 } 6483 6484 static uint64_t gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6485 { 6486 return ring->adev->wb.wb[ring->wptr_offs]; 6487 } 6488 6489 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6490 { 6491 struct amdgpu_device *adev = ring->adev; 6492 6493 /* XXX check if swapping is necessary on BE */ 6494 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6495 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6496 } 6497 6498 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6499 bool acquire) 6500 { 6501 struct amdgpu_device *adev = ring->adev; 6502 int pipe_num, tmp, reg; 6503 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6504 6505 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6506 6507 /* first me only has 2 entries, GFX and HP3D */ 6508 if (ring->me > 0) 6509 pipe_num -= 2; 6510 6511 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6512 tmp = RREG32(reg); 6513 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6514 WREG32(reg, tmp); 6515 } 6516 6517 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6518 struct amdgpu_ring *ring, 6519 bool acquire) 6520 { 6521 int i, pipe; 6522 bool reserve; 6523 struct amdgpu_ring *iring; 6524 6525 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6526 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6527 if (acquire) 6528 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6529 else 6530 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6531 6532 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6533 /* Clear all reservations - everyone reacquires all resources */ 6534 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6535 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6536 true); 6537 6538 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6539 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6540 true); 6541 } else { 6542 /* Lower all pipes without a current reservation */ 6543 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6544 iring = &adev->gfx.gfx_ring[i]; 6545 pipe = amdgpu_gfx_queue_to_bit(adev, 6546 iring->me, 6547 iring->pipe, 6548 0); 6549 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6550 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6551 } 6552 6553 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6554 iring = &adev->gfx.compute_ring[i]; 6555 pipe = amdgpu_gfx_queue_to_bit(adev, 6556 iring->me, 6557 iring->pipe, 6558 0); 6559 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6560 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6561 } 6562 } 6563 6564 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6565 } 6566 6567 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6568 struct amdgpu_ring *ring, 6569 bool acquire) 6570 { 6571 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6572 uint32_t queue_priority = acquire ? 0xf : 0x0; 6573 6574 mutex_lock(&adev->srbm_mutex); 6575 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6576 6577 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6578 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6579 6580 vi_srbm_select(adev, 0, 0, 0, 0); 6581 mutex_unlock(&adev->srbm_mutex); 6582 } 6583 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6584 enum drm_sched_priority priority) 6585 { 6586 struct amdgpu_device *adev = ring->adev; 6587 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6588 6589 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6590 return; 6591 6592 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6593 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6594 } 6595 6596 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6597 uint64_t addr, uint64_t seq, 6598 unsigned flags) 6599 { 6600 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6601 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6602 6603 /* RELEASE_MEM - flush caches, send int */ 6604 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6605 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6606 EOP_TC_ACTION_EN | 6607 EOP_TC_WB_ACTION_EN | 6608 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6609 EVENT_INDEX(5))); 6610 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6611 amdgpu_ring_write(ring, addr & 0xfffffffc); 6612 amdgpu_ring_write(ring, upper_32_bits(addr)); 6613 amdgpu_ring_write(ring, lower_32_bits(seq)); 6614 amdgpu_ring_write(ring, upper_32_bits(seq)); 6615 } 6616 6617 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, uint64_t addr, 6618 uint64_t seq, unsigned int flags) 6619 { 6620 /* we only allocate 32bit for each seq wb address */ 6621 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6622 6623 /* write fence seq to the "addr" */ 6624 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6625 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6626 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6627 amdgpu_ring_write(ring, lower_32_bits(addr)); 6628 amdgpu_ring_write(ring, upper_32_bits(addr)); 6629 amdgpu_ring_write(ring, lower_32_bits(seq)); 6630 6631 if (flags & AMDGPU_FENCE_FLAG_INT) { 6632 /* set register to trigger INT */ 6633 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6634 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6635 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6636 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6637 amdgpu_ring_write(ring, 0); 6638 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6639 } 6640 } 6641 6642 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6643 { 6644 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6645 amdgpu_ring_write(ring, 0); 6646 } 6647 6648 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6649 { 6650 uint32_t dw2 = 0; 6651 6652 if (amdgpu_sriov_vf(ring->adev)) 6653 gfx_v8_0_ring_emit_ce_meta(ring); 6654 6655 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6656 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6657 gfx_v8_0_ring_emit_vgt_flush(ring); 6658 /* set load_global_config & load_global_uconfig */ 6659 dw2 |= 0x8001; 6660 /* set load_cs_sh_regs */ 6661 dw2 |= 0x01000000; 6662 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6663 dw2 |= 0x10002; 6664 6665 /* set load_ce_ram if preamble presented */ 6666 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6667 dw2 |= 0x10000000; 6668 } else { 6669 /* still load_ce_ram if this is the first time preamble presented 6670 * although there is no context switch happens. 6671 */ 6672 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6673 dw2 |= 0x10000000; 6674 } 6675 6676 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6677 amdgpu_ring_write(ring, dw2); 6678 amdgpu_ring_write(ring, 0); 6679 } 6680 6681 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6682 { 6683 unsigned ret; 6684 6685 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6686 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6687 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6688 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6689 ret = ring->wptr & ring->buf_mask; 6690 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6691 return ret; 6692 } 6693 6694 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6695 { 6696 unsigned cur; 6697 6698 BUG_ON(offset > ring->buf_mask); 6699 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6700 6701 cur = (ring->wptr & ring->buf_mask) - 1; 6702 if (likely(cur > offset)) 6703 ring->ring[offset] = cur - offset; 6704 else 6705 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6706 } 6707 6708 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6709 { 6710 struct amdgpu_device *adev = ring->adev; 6711 6712 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6713 amdgpu_ring_write(ring, 0 | /* src: register*/ 6714 (5 << 8) | /* dst: memory */ 6715 (1 << 20)); /* write confirm */ 6716 amdgpu_ring_write(ring, reg); 6717 amdgpu_ring_write(ring, 0); 6718 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6719 adev->virt.reg_val_offs * 4)); 6720 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6721 adev->virt.reg_val_offs * 4)); 6722 } 6723 6724 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6725 uint32_t val) 6726 { 6727 uint32_t cmd; 6728 6729 switch (ring->funcs->type) { 6730 case AMDGPU_RING_TYPE_GFX: 6731 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6732 break; 6733 case AMDGPU_RING_TYPE_KIQ: 6734 cmd = 1 << 16; /* no inc addr */ 6735 break; 6736 default: 6737 cmd = WR_CONFIRM; 6738 break; 6739 } 6740 6741 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6742 amdgpu_ring_write(ring, cmd); 6743 amdgpu_ring_write(ring, reg); 6744 amdgpu_ring_write(ring, 0); 6745 amdgpu_ring_write(ring, val); 6746 } 6747 6748 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6749 enum amdgpu_interrupt_state state) 6750 { 6751 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6752 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6753 } 6754 6755 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6756 int me, int pipe, 6757 enum amdgpu_interrupt_state state) 6758 { 6759 u32 mec_int_cntl, mec_int_cntl_reg; 6760 6761 /* 6762 * amdgpu controls only the first MEC. That's why this function only 6763 * handles the setting of interrupts for this specific MEC. All other 6764 * pipes' interrupts are set by amdkfd. 6765 */ 6766 6767 if (me == 1) { 6768 switch (pipe) { 6769 case 0: 6770 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6771 break; 6772 case 1: 6773 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6774 break; 6775 case 2: 6776 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6777 break; 6778 case 3: 6779 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6780 break; 6781 default: 6782 DRM_DEBUG("invalid pipe %d\n", pipe); 6783 return; 6784 } 6785 } else { 6786 DRM_DEBUG("invalid me %d\n", me); 6787 return; 6788 } 6789 6790 switch (state) { 6791 case AMDGPU_IRQ_STATE_DISABLE: 6792 mec_int_cntl = RREG32(mec_int_cntl_reg); 6793 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6794 WREG32(mec_int_cntl_reg, mec_int_cntl); 6795 break; 6796 case AMDGPU_IRQ_STATE_ENABLE: 6797 mec_int_cntl = RREG32(mec_int_cntl_reg); 6798 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6799 WREG32(mec_int_cntl_reg, mec_int_cntl); 6800 break; 6801 default: 6802 break; 6803 } 6804 } 6805 6806 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6807 struct amdgpu_irq_src *source, 6808 unsigned type, 6809 enum amdgpu_interrupt_state state) 6810 { 6811 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6812 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6813 6814 return 0; 6815 } 6816 6817 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6818 struct amdgpu_irq_src *source, 6819 unsigned type, 6820 enum amdgpu_interrupt_state state) 6821 { 6822 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6823 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6824 6825 return 0; 6826 } 6827 6828 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6829 struct amdgpu_irq_src *src, 6830 unsigned type, 6831 enum amdgpu_interrupt_state state) 6832 { 6833 switch (type) { 6834 case AMDGPU_CP_IRQ_GFX_EOP: 6835 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6836 break; 6837 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6838 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6839 break; 6840 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6841 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6842 break; 6843 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6844 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6845 break; 6846 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6847 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6848 break; 6849 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6850 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6851 break; 6852 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6853 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6854 break; 6855 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6856 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6857 break; 6858 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6859 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6860 break; 6861 default: 6862 break; 6863 } 6864 return 0; 6865 } 6866 6867 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6868 struct amdgpu_irq_src *source, 6869 unsigned int type, 6870 enum amdgpu_interrupt_state state) 6871 { 6872 int enable_flag; 6873 6874 switch (state) { 6875 case AMDGPU_IRQ_STATE_DISABLE: 6876 enable_flag = 0; 6877 break; 6878 6879 case AMDGPU_IRQ_STATE_ENABLE: 6880 enable_flag = 1; 6881 break; 6882 6883 default: 6884 return -EINVAL; 6885 } 6886 6887 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6888 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6889 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6890 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6891 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6892 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6893 enable_flag); 6894 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6895 enable_flag); 6896 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6897 enable_flag); 6898 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6899 enable_flag); 6900 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6901 enable_flag); 6902 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6903 enable_flag); 6904 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6905 enable_flag); 6906 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6907 enable_flag); 6908 6909 return 0; 6910 } 6911 6912 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6913 struct amdgpu_irq_src *source, 6914 unsigned int type, 6915 enum amdgpu_interrupt_state state) 6916 { 6917 int enable_flag; 6918 6919 switch (state) { 6920 case AMDGPU_IRQ_STATE_DISABLE: 6921 enable_flag = 1; 6922 break; 6923 6924 case AMDGPU_IRQ_STATE_ENABLE: 6925 enable_flag = 0; 6926 break; 6927 6928 default: 6929 return -EINVAL; 6930 } 6931 6932 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6933 enable_flag); 6934 6935 return 0; 6936 } 6937 6938 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6939 struct amdgpu_irq_src *source, 6940 struct amdgpu_iv_entry *entry) 6941 { 6942 int i; 6943 u8 me_id, pipe_id, queue_id; 6944 struct amdgpu_ring *ring; 6945 6946 DRM_DEBUG("IH: CP EOP\n"); 6947 me_id = (entry->ring_id & 0x0c) >> 2; 6948 pipe_id = (entry->ring_id & 0x03) >> 0; 6949 queue_id = (entry->ring_id & 0x70) >> 4; 6950 6951 switch (me_id) { 6952 case 0: 6953 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6954 break; 6955 case 1: 6956 case 2: 6957 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6958 ring = &adev->gfx.compute_ring[i]; 6959 /* Per-queue interrupt is supported for MEC starting from VI. 6960 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6961 */ 6962 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6963 amdgpu_fence_process(ring); 6964 } 6965 break; 6966 } 6967 return 0; 6968 } 6969 6970 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6971 struct amdgpu_irq_src *source, 6972 struct amdgpu_iv_entry *entry) 6973 { 6974 DRM_ERROR("Illegal register access in command stream\n"); 6975 schedule_work(&adev->reset_work); 6976 return 0; 6977 } 6978 6979 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6980 struct amdgpu_irq_src *source, 6981 struct amdgpu_iv_entry *entry) 6982 { 6983 DRM_ERROR("Illegal instruction in command stream\n"); 6984 schedule_work(&adev->reset_work); 6985 return 0; 6986 } 6987 6988 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6989 struct amdgpu_irq_src *source, 6990 struct amdgpu_iv_entry *entry) 6991 { 6992 DRM_ERROR("CP EDC/ECC error detected."); 6993 return 0; 6994 } 6995 6996 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6997 { 6998 u32 enc, se_id, sh_id, cu_id; 6999 char type[20]; 7000 int sq_edc_source = -1; 7001 7002 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 7003 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 7004 7005 switch (enc) { 7006 case 0: 7007 DRM_INFO("SQ general purpose intr detected:" 7008 "se_id %d, immed_overflow %d, host_reg_overflow %d," 7009 "host_cmd_overflow %d, cmd_timestamp %d," 7010 "reg_timestamp %d, thread_trace_buff_full %d," 7011 "wlt %d, thread_trace %d.\n", 7012 se_id, 7013 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 7014 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 7015 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 7016 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 7017 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 7018 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 7019 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 7020 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 7021 ); 7022 break; 7023 case 1: 7024 case 2: 7025 7026 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 7027 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 7028 7029 /* 7030 * This function can be called either directly from ISR 7031 * or from BH in which case we can access SQ_EDC_INFO 7032 * instance 7033 */ 7034 if (in_task()) { 7035 mutex_lock(&adev->grbm_idx_mutex); 7036 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 7037 7038 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 7039 7040 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7041 mutex_unlock(&adev->grbm_idx_mutex); 7042 } 7043 7044 if (enc == 1) 7045 sprintf(type, "instruction intr"); 7046 else 7047 sprintf(type, "EDC/ECC error"); 7048 7049 DRM_INFO( 7050 "SQ %s detected: " 7051 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 7052 "trap %s, sq_ed_info.source %s.\n", 7053 type, se_id, sh_id, cu_id, 7054 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 7055 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 7056 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 7057 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 7058 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 7059 ); 7060 break; 7061 default: 7062 DRM_ERROR("SQ invalid encoding type\n."); 7063 } 7064 } 7065 7066 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 7067 { 7068 7069 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 7070 struct sq_work *sq_work = container_of(work, struct sq_work, work); 7071 7072 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 7073 } 7074 7075 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 7076 struct amdgpu_irq_src *source, 7077 struct amdgpu_iv_entry *entry) 7078 { 7079 unsigned ih_data = entry->src_data[0]; 7080 7081 /* 7082 * Try to submit work so SQ_EDC_INFO can be accessed from 7083 * BH. If previous work submission hasn't finished yet 7084 * just print whatever info is possible directly from the ISR. 7085 */ 7086 if (work_pending(&adev->gfx.sq_work.work)) { 7087 gfx_v8_0_parse_sq_irq(adev, ih_data); 7088 } else { 7089 adev->gfx.sq_work.ih_data = ih_data; 7090 schedule_work(&adev->gfx.sq_work.work); 7091 } 7092 7093 return 0; 7094 } 7095 7096 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 7097 struct amdgpu_irq_src *src, 7098 unsigned int type, 7099 enum amdgpu_interrupt_state state) 7100 { 7101 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7102 7103 switch (type) { 7104 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 7105 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 7106 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7107 if (ring->me == 1) 7108 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 7109 ring->pipe, 7110 GENERIC2_INT_ENABLE, 7111 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7112 else 7113 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 7114 ring->pipe, 7115 GENERIC2_INT_ENABLE, 7116 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7117 break; 7118 default: 7119 BUG(); /* kiq only support GENERIC2_INT now */ 7120 break; 7121 } 7122 return 0; 7123 } 7124 7125 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7126 struct amdgpu_irq_src *source, 7127 struct amdgpu_iv_entry *entry) 7128 { 7129 u8 me_id, pipe_id, queue_id; 7130 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7131 7132 me_id = (entry->ring_id & 0x0c) >> 2; 7133 pipe_id = (entry->ring_id & 0x03) >> 0; 7134 queue_id = (entry->ring_id & 0x70) >> 4; 7135 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7136 me_id, pipe_id, queue_id); 7137 7138 amdgpu_fence_process(ring); 7139 return 0; 7140 } 7141 7142 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7143 .name = "gfx_v8_0", 7144 .early_init = gfx_v8_0_early_init, 7145 .late_init = gfx_v8_0_late_init, 7146 .sw_init = gfx_v8_0_sw_init, 7147 .sw_fini = gfx_v8_0_sw_fini, 7148 .hw_init = gfx_v8_0_hw_init, 7149 .hw_fini = gfx_v8_0_hw_fini, 7150 .suspend = gfx_v8_0_suspend, 7151 .resume = gfx_v8_0_resume, 7152 .is_idle = gfx_v8_0_is_idle, 7153 .wait_for_idle = gfx_v8_0_wait_for_idle, 7154 .check_soft_reset = gfx_v8_0_check_soft_reset, 7155 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7156 .soft_reset = gfx_v8_0_soft_reset, 7157 .post_soft_reset = gfx_v8_0_post_soft_reset, 7158 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7159 .set_powergating_state = gfx_v8_0_set_powergating_state, 7160 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7161 }; 7162 7163 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7164 .type = AMDGPU_RING_TYPE_GFX, 7165 .align_mask = 0xff, 7166 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7167 .support_64bit_ptrs = false, 7168 .get_rptr = gfx_v8_0_ring_get_rptr, 7169 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7170 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7171 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7172 5 + /* COND_EXEC */ 7173 7 + /* PIPELINE_SYNC */ 7174 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 7175 12 + /* FENCE for VM_FLUSH */ 7176 20 + /* GDS switch */ 7177 4 + /* double SWITCH_BUFFER, 7178 the first COND_EXEC jump to the place just 7179 prior to this double SWITCH_BUFFER */ 7180 5 + /* COND_EXEC */ 7181 7 + /* HDP_flush */ 7182 4 + /* VGT_flush */ 7183 14 + /* CE_META */ 7184 31 + /* DE_META */ 7185 3 + /* CNTX_CTRL */ 7186 5 + /* HDP_INVL */ 7187 12 + 12 + /* FENCE x2 */ 7188 2, /* SWITCH_BUFFER */ 7189 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7190 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7191 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7192 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7193 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7194 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7195 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7196 .test_ring = gfx_v8_0_ring_test_ring, 7197 .test_ib = gfx_v8_0_ring_test_ib, 7198 .insert_nop = amdgpu_ring_insert_nop, 7199 .pad_ib = amdgpu_ring_generic_pad_ib, 7200 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7201 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7202 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7203 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7204 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7205 }; 7206 7207 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7208 .type = AMDGPU_RING_TYPE_COMPUTE, 7209 .align_mask = 0xff, 7210 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7211 .support_64bit_ptrs = false, 7212 .get_rptr = gfx_v8_0_ring_get_rptr, 7213 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7214 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7215 .emit_frame_size = 7216 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7217 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7218 5 + /* hdp_invalidate */ 7219 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7220 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7221 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7222 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7223 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7224 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7225 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7226 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7227 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7228 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7229 .test_ring = gfx_v8_0_ring_test_ring, 7230 .test_ib = gfx_v8_0_ring_test_ib, 7231 .insert_nop = amdgpu_ring_insert_nop, 7232 .pad_ib = amdgpu_ring_generic_pad_ib, 7233 .set_priority = gfx_v8_0_ring_set_priority_compute, 7234 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7235 }; 7236 7237 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7238 .type = AMDGPU_RING_TYPE_KIQ, 7239 .align_mask = 0xff, 7240 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7241 .support_64bit_ptrs = false, 7242 .get_rptr = gfx_v8_0_ring_get_rptr, 7243 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7244 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7245 .emit_frame_size = 7246 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7247 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7248 5 + /* hdp_invalidate */ 7249 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7250 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7251 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7252 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7253 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7254 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7255 .test_ring = gfx_v8_0_ring_test_ring, 7256 .test_ib = gfx_v8_0_ring_test_ib, 7257 .insert_nop = amdgpu_ring_insert_nop, 7258 .pad_ib = amdgpu_ring_generic_pad_ib, 7259 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7260 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7261 }; 7262 7263 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7264 { 7265 int i; 7266 7267 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7268 7269 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7270 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7271 7272 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7273 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7274 } 7275 7276 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7277 .set = gfx_v8_0_set_eop_interrupt_state, 7278 .process = gfx_v8_0_eop_irq, 7279 }; 7280 7281 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7282 .set = gfx_v8_0_set_priv_reg_fault_state, 7283 .process = gfx_v8_0_priv_reg_irq, 7284 }; 7285 7286 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7287 .set = gfx_v8_0_set_priv_inst_fault_state, 7288 .process = gfx_v8_0_priv_inst_irq, 7289 }; 7290 7291 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7292 .set = gfx_v8_0_kiq_set_interrupt_state, 7293 .process = gfx_v8_0_kiq_irq, 7294 }; 7295 7296 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7297 .set = gfx_v8_0_set_cp_ecc_int_state, 7298 .process = gfx_v8_0_cp_ecc_error_irq, 7299 }; 7300 7301 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7302 .set = gfx_v8_0_set_sq_int_state, 7303 .process = gfx_v8_0_sq_irq, 7304 }; 7305 7306 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7307 { 7308 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7309 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7310 7311 adev->gfx.priv_reg_irq.num_types = 1; 7312 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7313 7314 adev->gfx.priv_inst_irq.num_types = 1; 7315 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7316 7317 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7318 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7319 7320 adev->gfx.cp_ecc_error_irq.num_types = 1; 7321 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7322 7323 adev->gfx.sq_irq.num_types = 1; 7324 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7325 } 7326 7327 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7328 { 7329 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7330 } 7331 7332 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7333 { 7334 /* init asci gds info */ 7335 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7336 adev->gds.gws.total_size = 64; 7337 adev->gds.oa.total_size = 16; 7338 7339 if (adev->gds.mem.total_size == 64 * 1024) { 7340 adev->gds.mem.gfx_partition_size = 4096; 7341 adev->gds.mem.cs_partition_size = 4096; 7342 7343 adev->gds.gws.gfx_partition_size = 4; 7344 adev->gds.gws.cs_partition_size = 4; 7345 7346 adev->gds.oa.gfx_partition_size = 4; 7347 adev->gds.oa.cs_partition_size = 1; 7348 } else { 7349 adev->gds.mem.gfx_partition_size = 1024; 7350 adev->gds.mem.cs_partition_size = 1024; 7351 7352 adev->gds.gws.gfx_partition_size = 16; 7353 adev->gds.gws.cs_partition_size = 16; 7354 7355 adev->gds.oa.gfx_partition_size = 4; 7356 adev->gds.oa.cs_partition_size = 4; 7357 } 7358 } 7359 7360 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7361 u32 bitmap) 7362 { 7363 u32 data; 7364 7365 if (!bitmap) 7366 return; 7367 7368 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7369 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7370 7371 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7372 } 7373 7374 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7375 { 7376 u32 data, mask; 7377 7378 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7379 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7380 7381 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7382 7383 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7384 } 7385 7386 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7387 { 7388 int i, j, k, counter, active_cu_number = 0; 7389 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7390 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7391 unsigned disable_masks[4 * 2]; 7392 u32 ao_cu_num; 7393 7394 memset(cu_info, 0, sizeof(*cu_info)); 7395 7396 if (adev->flags & AMD_IS_APU) 7397 ao_cu_num = 2; 7398 else 7399 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7400 7401 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7402 7403 mutex_lock(&adev->grbm_idx_mutex); 7404 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7405 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7406 mask = 1; 7407 ao_bitmap = 0; 7408 counter = 0; 7409 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7410 if (i < 4 && j < 2) 7411 gfx_v8_0_set_user_cu_inactive_bitmap( 7412 adev, disable_masks[i * 2 + j]); 7413 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7414 cu_info->bitmap[i][j] = bitmap; 7415 7416 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7417 if (bitmap & mask) { 7418 if (counter < ao_cu_num) 7419 ao_bitmap |= mask; 7420 counter ++; 7421 } 7422 mask <<= 1; 7423 } 7424 active_cu_number += counter; 7425 if (i < 2 && j < 2) 7426 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7427 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7428 } 7429 } 7430 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7431 mutex_unlock(&adev->grbm_idx_mutex); 7432 7433 cu_info->number = active_cu_number; 7434 cu_info->ao_cu_mask = ao_cu_mask; 7435 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7436 cu_info->max_waves_per_simd = 10; 7437 cu_info->max_scratch_slots_per_cu = 32; 7438 cu_info->wave_front_size = 64; 7439 cu_info->lds_size = 64; 7440 } 7441 7442 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7443 { 7444 .type = AMD_IP_BLOCK_TYPE_GFX, 7445 .major = 8, 7446 .minor = 0, 7447 .rev = 0, 7448 .funcs = &gfx_v8_0_ip_funcs, 7449 }; 7450 7451 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7452 { 7453 .type = AMD_IP_BLOCK_TYPE_GFX, 7454 .major = 8, 7455 .minor = 1, 7456 .rev = 0, 7457 .funcs = &gfx_v8_0_ip_funcs, 7458 }; 7459 7460 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7461 { 7462 uint64_t ce_payload_addr; 7463 int cnt_ce; 7464 union { 7465 struct vi_ce_ib_state regular; 7466 struct vi_ce_ib_state_chained_ib chained; 7467 } ce_payload = {}; 7468 7469 if (ring->adev->virt.chained_ib_support) { 7470 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7471 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7472 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7473 } else { 7474 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7475 offsetof(struct vi_gfx_meta_data, ce_payload); 7476 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7477 } 7478 7479 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7480 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7481 WRITE_DATA_DST_SEL(8) | 7482 WR_CONFIRM) | 7483 WRITE_DATA_CACHE_POLICY(0)); 7484 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7485 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7486 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7487 } 7488 7489 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7490 { 7491 uint64_t de_payload_addr, gds_addr, csa_addr; 7492 int cnt_de; 7493 union { 7494 struct vi_de_ib_state regular; 7495 struct vi_de_ib_state_chained_ib chained; 7496 } de_payload = {}; 7497 7498 csa_addr = amdgpu_csa_vaddr(ring->adev); 7499 gds_addr = csa_addr + 4096; 7500 if (ring->adev->virt.chained_ib_support) { 7501 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7502 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7503 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7504 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7505 } else { 7506 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7507 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7508 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7509 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7510 } 7511 7512 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7513 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7514 WRITE_DATA_DST_SEL(8) | 7515 WR_CONFIRM) | 7516 WRITE_DATA_CACHE_POLICY(0)); 7517 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7518 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7519 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7520 } 7521