1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "drmP.h" 27 #include "radeon.h" 28 #include "radeon_asic.h" 29 #include "radeon_audio.h" 30 #include "cikd.h" 31 #include "atom.h" 32 #include "cik_blit_shaders.h" 33 #include "radeon_ucode.h" 34 #include "clearstate_ci.h" 35 #include "radeon_kfd.h" 36 37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin"); 43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); 46 47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); 48 MODULE_FIRMWARE("radeon/bonaire_me.bin"); 49 MODULE_FIRMWARE("radeon/bonaire_ce.bin"); 50 MODULE_FIRMWARE("radeon/bonaire_mec.bin"); 51 MODULE_FIRMWARE("radeon/bonaire_mc.bin"); 52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); 53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin"); 54 MODULE_FIRMWARE("radeon/bonaire_smc.bin"); 55 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin"); 56 57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin"); 58 MODULE_FIRMWARE("radeon/HAWAII_me.bin"); 59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin"); 60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin"); 61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin"); 62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin"); 63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin"); 64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin"); 65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin"); 66 67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin"); 68 MODULE_FIRMWARE("radeon/hawaii_me.bin"); 69 MODULE_FIRMWARE("radeon/hawaii_ce.bin"); 70 MODULE_FIRMWARE("radeon/hawaii_mec.bin"); 71 MODULE_FIRMWARE("radeon/hawaii_mc.bin"); 72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin"); 73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin"); 74 MODULE_FIRMWARE("radeon/hawaii_smc.bin"); 75 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin"); 76 77 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 78 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 79 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 80 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 81 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 82 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 83 84 MODULE_FIRMWARE("radeon/kaveri_pfp.bin"); 85 MODULE_FIRMWARE("radeon/kaveri_me.bin"); 86 MODULE_FIRMWARE("radeon/kaveri_ce.bin"); 87 MODULE_FIRMWARE("radeon/kaveri_mec.bin"); 88 MODULE_FIRMWARE("radeon/kaveri_mec2.bin"); 89 MODULE_FIRMWARE("radeon/kaveri_rlc.bin"); 90 MODULE_FIRMWARE("radeon/kaveri_sdma.bin"); 91 92 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 93 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 94 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 95 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 96 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 97 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 98 99 MODULE_FIRMWARE("radeon/kabini_pfp.bin"); 100 MODULE_FIRMWARE("radeon/kabini_me.bin"); 101 MODULE_FIRMWARE("radeon/kabini_ce.bin"); 102 MODULE_FIRMWARE("radeon/kabini_mec.bin"); 103 MODULE_FIRMWARE("radeon/kabini_rlc.bin"); 104 MODULE_FIRMWARE("radeon/kabini_sdma.bin"); 105 106 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin"); 107 MODULE_FIRMWARE("radeon/MULLINS_me.bin"); 108 MODULE_FIRMWARE("radeon/MULLINS_ce.bin"); 109 MODULE_FIRMWARE("radeon/MULLINS_mec.bin"); 110 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin"); 111 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin"); 112 113 MODULE_FIRMWARE("radeon/mullins_pfp.bin"); 114 MODULE_FIRMWARE("radeon/mullins_me.bin"); 115 MODULE_FIRMWARE("radeon/mullins_ce.bin"); 116 MODULE_FIRMWARE("radeon/mullins_mec.bin"); 117 MODULE_FIRMWARE("radeon/mullins_rlc.bin"); 118 MODULE_FIRMWARE("radeon/mullins_sdma.bin"); 119 120 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh); 121 static void cik_rlc_stop(struct radeon_device *rdev); 122 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 123 static void cik_program_aspm(struct radeon_device *rdev); 124 static void cik_init_pg(struct radeon_device *rdev); 125 static void cik_init_cg(struct radeon_device *rdev); 126 static void cik_fini_pg(struct radeon_device *rdev); 127 static void cik_fini_cg(struct radeon_device *rdev); 128 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 129 bool enable); 130 131 /** 132 * cik_get_allowed_info_register - fetch the register for the info ioctl 133 * 134 * @rdev: radeon_device pointer 135 * @reg: register offset in bytes 136 * @val: register value 137 * 138 * Returns 0 for success or -EINVAL for an invalid register 139 * 140 */ 141 int cik_get_allowed_info_register(struct radeon_device *rdev, 142 u32 reg, u32 *val) 143 { 144 switch (reg) { 145 case GRBM_STATUS: 146 case GRBM_STATUS2: 147 case GRBM_STATUS_SE0: 148 case GRBM_STATUS_SE1: 149 case GRBM_STATUS_SE2: 150 case GRBM_STATUS_SE3: 151 case SRBM_STATUS: 152 case SRBM_STATUS2: 153 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET): 154 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET): 155 case UVD_STATUS: 156 /* TODO VCE */ 157 *val = RREG32(reg); 158 return 0; 159 default: 160 return -EINVAL; 161 } 162 } 163 164 /* 165 * Indirect registers accessor 166 */ 167 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) 168 { 169 u32 r; 170 171 lockmgr(&rdev->didt_idx_lock, LK_EXCLUSIVE); 172 WREG32(CIK_DIDT_IND_INDEX, (reg)); 173 r = RREG32(CIK_DIDT_IND_DATA); 174 lockmgr(&rdev->didt_idx_lock, LK_RELEASE); 175 return r; 176 } 177 178 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) 179 { 180 lockmgr(&rdev->didt_idx_lock, LK_EXCLUSIVE); 181 WREG32(CIK_DIDT_IND_INDEX, (reg)); 182 WREG32(CIK_DIDT_IND_DATA, (v)); 183 lockmgr(&rdev->didt_idx_lock, LK_RELEASE); 184 } 185 186 /* get temperature in millidegrees */ 187 int ci_get_temp(struct radeon_device *rdev) 188 { 189 u32 temp; 190 int actual_temp = 0; 191 192 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 193 CTF_TEMP_SHIFT; 194 195 if (temp & 0x200) 196 actual_temp = 255; 197 else 198 actual_temp = temp & 0x1ff; 199 200 actual_temp = actual_temp * 1000; 201 202 return actual_temp; 203 } 204 205 /* get temperature in millidegrees */ 206 int kv_get_temp(struct radeon_device *rdev) 207 { 208 u32 temp; 209 int actual_temp = 0; 210 211 temp = RREG32_SMC(0xC0300E0C); 212 213 if (temp) 214 actual_temp = (temp / 8) - 49; 215 else 216 actual_temp = 0; 217 218 actual_temp = actual_temp * 1000; 219 220 return actual_temp; 221 } 222 223 /* 224 * Indirect registers accessor 225 */ 226 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 227 { 228 u32 r; 229 230 lockmgr(&rdev->pciep_idx_lock, LK_EXCLUSIVE); 231 WREG32(PCIE_INDEX, reg); 232 (void)RREG32(PCIE_INDEX); 233 r = RREG32(PCIE_DATA); 234 lockmgr(&rdev->pciep_idx_lock, LK_RELEASE); 235 return r; 236 } 237 238 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 239 { 240 lockmgr(&rdev->pciep_idx_lock, LK_EXCLUSIVE); 241 WREG32(PCIE_INDEX, reg); 242 (void)RREG32(PCIE_INDEX); 243 WREG32(PCIE_DATA, v); 244 (void)RREG32(PCIE_DATA); 245 lockmgr(&rdev->pciep_idx_lock, LK_RELEASE); 246 } 247 248 static const u32 spectre_rlc_save_restore_register_list[] = 249 { 250 (0x0e00 << 16) | (0xc12c >> 2), 251 0x00000000, 252 (0x0e00 << 16) | (0xc140 >> 2), 253 0x00000000, 254 (0x0e00 << 16) | (0xc150 >> 2), 255 0x00000000, 256 (0x0e00 << 16) | (0xc15c >> 2), 257 0x00000000, 258 (0x0e00 << 16) | (0xc168 >> 2), 259 0x00000000, 260 (0x0e00 << 16) | (0xc170 >> 2), 261 0x00000000, 262 (0x0e00 << 16) | (0xc178 >> 2), 263 0x00000000, 264 (0x0e00 << 16) | (0xc204 >> 2), 265 0x00000000, 266 (0x0e00 << 16) | (0xc2b4 >> 2), 267 0x00000000, 268 (0x0e00 << 16) | (0xc2b8 >> 2), 269 0x00000000, 270 (0x0e00 << 16) | (0xc2bc >> 2), 271 0x00000000, 272 (0x0e00 << 16) | (0xc2c0 >> 2), 273 0x00000000, 274 (0x0e00 << 16) | (0x8228 >> 2), 275 0x00000000, 276 (0x0e00 << 16) | (0x829c >> 2), 277 0x00000000, 278 (0x0e00 << 16) | (0x869c >> 2), 279 0x00000000, 280 (0x0600 << 16) | (0x98f4 >> 2), 281 0x00000000, 282 (0x0e00 << 16) | (0x98f8 >> 2), 283 0x00000000, 284 (0x0e00 << 16) | (0x9900 >> 2), 285 0x00000000, 286 (0x0e00 << 16) | (0xc260 >> 2), 287 0x00000000, 288 (0x0e00 << 16) | (0x90e8 >> 2), 289 0x00000000, 290 (0x0e00 << 16) | (0x3c000 >> 2), 291 0x00000000, 292 (0x0e00 << 16) | (0x3c00c >> 2), 293 0x00000000, 294 (0x0e00 << 16) | (0x8c1c >> 2), 295 0x00000000, 296 (0x0e00 << 16) | (0x9700 >> 2), 297 0x00000000, 298 (0x0e00 << 16) | (0xcd20 >> 2), 299 0x00000000, 300 (0x4e00 << 16) | (0xcd20 >> 2), 301 0x00000000, 302 (0x5e00 << 16) | (0xcd20 >> 2), 303 0x00000000, 304 (0x6e00 << 16) | (0xcd20 >> 2), 305 0x00000000, 306 (0x7e00 << 16) | (0xcd20 >> 2), 307 0x00000000, 308 (0x8e00 << 16) | (0xcd20 >> 2), 309 0x00000000, 310 (0x9e00 << 16) | (0xcd20 >> 2), 311 0x00000000, 312 (0xae00 << 16) | (0xcd20 >> 2), 313 0x00000000, 314 (0xbe00 << 16) | (0xcd20 >> 2), 315 0x00000000, 316 (0x0e00 << 16) | (0x89bc >> 2), 317 0x00000000, 318 (0x0e00 << 16) | (0x8900 >> 2), 319 0x00000000, 320 0x3, 321 (0x0e00 << 16) | (0xc130 >> 2), 322 0x00000000, 323 (0x0e00 << 16) | (0xc134 >> 2), 324 0x00000000, 325 (0x0e00 << 16) | (0xc1fc >> 2), 326 0x00000000, 327 (0x0e00 << 16) | (0xc208 >> 2), 328 0x00000000, 329 (0x0e00 << 16) | (0xc264 >> 2), 330 0x00000000, 331 (0x0e00 << 16) | (0xc268 >> 2), 332 0x00000000, 333 (0x0e00 << 16) | (0xc26c >> 2), 334 0x00000000, 335 (0x0e00 << 16) | (0xc270 >> 2), 336 0x00000000, 337 (0x0e00 << 16) | (0xc274 >> 2), 338 0x00000000, 339 (0x0e00 << 16) | (0xc278 >> 2), 340 0x00000000, 341 (0x0e00 << 16) | (0xc27c >> 2), 342 0x00000000, 343 (0x0e00 << 16) | (0xc280 >> 2), 344 0x00000000, 345 (0x0e00 << 16) | (0xc284 >> 2), 346 0x00000000, 347 (0x0e00 << 16) | (0xc288 >> 2), 348 0x00000000, 349 (0x0e00 << 16) | (0xc28c >> 2), 350 0x00000000, 351 (0x0e00 << 16) | (0xc290 >> 2), 352 0x00000000, 353 (0x0e00 << 16) | (0xc294 >> 2), 354 0x00000000, 355 (0x0e00 << 16) | (0xc298 >> 2), 356 0x00000000, 357 (0x0e00 << 16) | (0xc29c >> 2), 358 0x00000000, 359 (0x0e00 << 16) | (0xc2a0 >> 2), 360 0x00000000, 361 (0x0e00 << 16) | (0xc2a4 >> 2), 362 0x00000000, 363 (0x0e00 << 16) | (0xc2a8 >> 2), 364 0x00000000, 365 (0x0e00 << 16) | (0xc2ac >> 2), 366 0x00000000, 367 (0x0e00 << 16) | (0xc2b0 >> 2), 368 0x00000000, 369 (0x0e00 << 16) | (0x301d0 >> 2), 370 0x00000000, 371 (0x0e00 << 16) | (0x30238 >> 2), 372 0x00000000, 373 (0x0e00 << 16) | (0x30250 >> 2), 374 0x00000000, 375 (0x0e00 << 16) | (0x30254 >> 2), 376 0x00000000, 377 (0x0e00 << 16) | (0x30258 >> 2), 378 0x00000000, 379 (0x0e00 << 16) | (0x3025c >> 2), 380 0x00000000, 381 (0x4e00 << 16) | (0xc900 >> 2), 382 0x00000000, 383 (0x5e00 << 16) | (0xc900 >> 2), 384 0x00000000, 385 (0x6e00 << 16) | (0xc900 >> 2), 386 0x00000000, 387 (0x7e00 << 16) | (0xc900 >> 2), 388 0x00000000, 389 (0x8e00 << 16) | (0xc900 >> 2), 390 0x00000000, 391 (0x9e00 << 16) | (0xc900 >> 2), 392 0x00000000, 393 (0xae00 << 16) | (0xc900 >> 2), 394 0x00000000, 395 (0xbe00 << 16) | (0xc900 >> 2), 396 0x00000000, 397 (0x4e00 << 16) | (0xc904 >> 2), 398 0x00000000, 399 (0x5e00 << 16) | (0xc904 >> 2), 400 0x00000000, 401 (0x6e00 << 16) | (0xc904 >> 2), 402 0x00000000, 403 (0x7e00 << 16) | (0xc904 >> 2), 404 0x00000000, 405 (0x8e00 << 16) | (0xc904 >> 2), 406 0x00000000, 407 (0x9e00 << 16) | (0xc904 >> 2), 408 0x00000000, 409 (0xae00 << 16) | (0xc904 >> 2), 410 0x00000000, 411 (0xbe00 << 16) | (0xc904 >> 2), 412 0x00000000, 413 (0x4e00 << 16) | (0xc908 >> 2), 414 0x00000000, 415 (0x5e00 << 16) | (0xc908 >> 2), 416 0x00000000, 417 (0x6e00 << 16) | (0xc908 >> 2), 418 0x00000000, 419 (0x7e00 << 16) | (0xc908 >> 2), 420 0x00000000, 421 (0x8e00 << 16) | (0xc908 >> 2), 422 0x00000000, 423 (0x9e00 << 16) | (0xc908 >> 2), 424 0x00000000, 425 (0xae00 << 16) | (0xc908 >> 2), 426 0x00000000, 427 (0xbe00 << 16) | (0xc908 >> 2), 428 0x00000000, 429 (0x4e00 << 16) | (0xc90c >> 2), 430 0x00000000, 431 (0x5e00 << 16) | (0xc90c >> 2), 432 0x00000000, 433 (0x6e00 << 16) | (0xc90c >> 2), 434 0x00000000, 435 (0x7e00 << 16) | (0xc90c >> 2), 436 0x00000000, 437 (0x8e00 << 16) | (0xc90c >> 2), 438 0x00000000, 439 (0x9e00 << 16) | (0xc90c >> 2), 440 0x00000000, 441 (0xae00 << 16) | (0xc90c >> 2), 442 0x00000000, 443 (0xbe00 << 16) | (0xc90c >> 2), 444 0x00000000, 445 (0x4e00 << 16) | (0xc910 >> 2), 446 0x00000000, 447 (0x5e00 << 16) | (0xc910 >> 2), 448 0x00000000, 449 (0x6e00 << 16) | (0xc910 >> 2), 450 0x00000000, 451 (0x7e00 << 16) | (0xc910 >> 2), 452 0x00000000, 453 (0x8e00 << 16) | (0xc910 >> 2), 454 0x00000000, 455 (0x9e00 << 16) | (0xc910 >> 2), 456 0x00000000, 457 (0xae00 << 16) | (0xc910 >> 2), 458 0x00000000, 459 (0xbe00 << 16) | (0xc910 >> 2), 460 0x00000000, 461 (0x0e00 << 16) | (0xc99c >> 2), 462 0x00000000, 463 (0x0e00 << 16) | (0x9834 >> 2), 464 0x00000000, 465 (0x0000 << 16) | (0x30f00 >> 2), 466 0x00000000, 467 (0x0001 << 16) | (0x30f00 >> 2), 468 0x00000000, 469 (0x0000 << 16) | (0x30f04 >> 2), 470 0x00000000, 471 (0x0001 << 16) | (0x30f04 >> 2), 472 0x00000000, 473 (0x0000 << 16) | (0x30f08 >> 2), 474 0x00000000, 475 (0x0001 << 16) | (0x30f08 >> 2), 476 0x00000000, 477 (0x0000 << 16) | (0x30f0c >> 2), 478 0x00000000, 479 (0x0001 << 16) | (0x30f0c >> 2), 480 0x00000000, 481 (0x0600 << 16) | (0x9b7c >> 2), 482 0x00000000, 483 (0x0e00 << 16) | (0x8a14 >> 2), 484 0x00000000, 485 (0x0e00 << 16) | (0x8a18 >> 2), 486 0x00000000, 487 (0x0600 << 16) | (0x30a00 >> 2), 488 0x00000000, 489 (0x0e00 << 16) | (0x8bf0 >> 2), 490 0x00000000, 491 (0x0e00 << 16) | (0x8bcc >> 2), 492 0x00000000, 493 (0x0e00 << 16) | (0x8b24 >> 2), 494 0x00000000, 495 (0x0e00 << 16) | (0x30a04 >> 2), 496 0x00000000, 497 (0x0600 << 16) | (0x30a10 >> 2), 498 0x00000000, 499 (0x0600 << 16) | (0x30a14 >> 2), 500 0x00000000, 501 (0x0600 << 16) | (0x30a18 >> 2), 502 0x00000000, 503 (0x0600 << 16) | (0x30a2c >> 2), 504 0x00000000, 505 (0x0e00 << 16) | (0xc700 >> 2), 506 0x00000000, 507 (0x0e00 << 16) | (0xc704 >> 2), 508 0x00000000, 509 (0x0e00 << 16) | (0xc708 >> 2), 510 0x00000000, 511 (0x0e00 << 16) | (0xc768 >> 2), 512 0x00000000, 513 (0x0400 << 16) | (0xc770 >> 2), 514 0x00000000, 515 (0x0400 << 16) | (0xc774 >> 2), 516 0x00000000, 517 (0x0400 << 16) | (0xc778 >> 2), 518 0x00000000, 519 (0x0400 << 16) | (0xc77c >> 2), 520 0x00000000, 521 (0x0400 << 16) | (0xc780 >> 2), 522 0x00000000, 523 (0x0400 << 16) | (0xc784 >> 2), 524 0x00000000, 525 (0x0400 << 16) | (0xc788 >> 2), 526 0x00000000, 527 (0x0400 << 16) | (0xc78c >> 2), 528 0x00000000, 529 (0x0400 << 16) | (0xc798 >> 2), 530 0x00000000, 531 (0x0400 << 16) | (0xc79c >> 2), 532 0x00000000, 533 (0x0400 << 16) | (0xc7a0 >> 2), 534 0x00000000, 535 (0x0400 << 16) | (0xc7a4 >> 2), 536 0x00000000, 537 (0x0400 << 16) | (0xc7a8 >> 2), 538 0x00000000, 539 (0x0400 << 16) | (0xc7ac >> 2), 540 0x00000000, 541 (0x0400 << 16) | (0xc7b0 >> 2), 542 0x00000000, 543 (0x0400 << 16) | (0xc7b4 >> 2), 544 0x00000000, 545 (0x0e00 << 16) | (0x9100 >> 2), 546 0x00000000, 547 (0x0e00 << 16) | (0x3c010 >> 2), 548 0x00000000, 549 (0x0e00 << 16) | (0x92a8 >> 2), 550 0x00000000, 551 (0x0e00 << 16) | (0x92ac >> 2), 552 0x00000000, 553 (0x0e00 << 16) | (0x92b4 >> 2), 554 0x00000000, 555 (0x0e00 << 16) | (0x92b8 >> 2), 556 0x00000000, 557 (0x0e00 << 16) | (0x92bc >> 2), 558 0x00000000, 559 (0x0e00 << 16) | (0x92c0 >> 2), 560 0x00000000, 561 (0x0e00 << 16) | (0x92c4 >> 2), 562 0x00000000, 563 (0x0e00 << 16) | (0x92c8 >> 2), 564 0x00000000, 565 (0x0e00 << 16) | (0x92cc >> 2), 566 0x00000000, 567 (0x0e00 << 16) | (0x92d0 >> 2), 568 0x00000000, 569 (0x0e00 << 16) | (0x8c00 >> 2), 570 0x00000000, 571 (0x0e00 << 16) | (0x8c04 >> 2), 572 0x00000000, 573 (0x0e00 << 16) | (0x8c20 >> 2), 574 0x00000000, 575 (0x0e00 << 16) | (0x8c38 >> 2), 576 0x00000000, 577 (0x0e00 << 16) | (0x8c3c >> 2), 578 0x00000000, 579 (0x0e00 << 16) | (0xae00 >> 2), 580 0x00000000, 581 (0x0e00 << 16) | (0x9604 >> 2), 582 0x00000000, 583 (0x0e00 << 16) | (0xac08 >> 2), 584 0x00000000, 585 (0x0e00 << 16) | (0xac0c >> 2), 586 0x00000000, 587 (0x0e00 << 16) | (0xac10 >> 2), 588 0x00000000, 589 (0x0e00 << 16) | (0xac14 >> 2), 590 0x00000000, 591 (0x0e00 << 16) | (0xac58 >> 2), 592 0x00000000, 593 (0x0e00 << 16) | (0xac68 >> 2), 594 0x00000000, 595 (0x0e00 << 16) | (0xac6c >> 2), 596 0x00000000, 597 (0x0e00 << 16) | (0xac70 >> 2), 598 0x00000000, 599 (0x0e00 << 16) | (0xac74 >> 2), 600 0x00000000, 601 (0x0e00 << 16) | (0xac78 >> 2), 602 0x00000000, 603 (0x0e00 << 16) | (0xac7c >> 2), 604 0x00000000, 605 (0x0e00 << 16) | (0xac80 >> 2), 606 0x00000000, 607 (0x0e00 << 16) | (0xac84 >> 2), 608 0x00000000, 609 (0x0e00 << 16) | (0xac88 >> 2), 610 0x00000000, 611 (0x0e00 << 16) | (0xac8c >> 2), 612 0x00000000, 613 (0x0e00 << 16) | (0x970c >> 2), 614 0x00000000, 615 (0x0e00 << 16) | (0x9714 >> 2), 616 0x00000000, 617 (0x0e00 << 16) | (0x9718 >> 2), 618 0x00000000, 619 (0x0e00 << 16) | (0x971c >> 2), 620 0x00000000, 621 (0x0e00 << 16) | (0x31068 >> 2), 622 0x00000000, 623 (0x4e00 << 16) | (0x31068 >> 2), 624 0x00000000, 625 (0x5e00 << 16) | (0x31068 >> 2), 626 0x00000000, 627 (0x6e00 << 16) | (0x31068 >> 2), 628 0x00000000, 629 (0x7e00 << 16) | (0x31068 >> 2), 630 0x00000000, 631 (0x8e00 << 16) | (0x31068 >> 2), 632 0x00000000, 633 (0x9e00 << 16) | (0x31068 >> 2), 634 0x00000000, 635 (0xae00 << 16) | (0x31068 >> 2), 636 0x00000000, 637 (0xbe00 << 16) | (0x31068 >> 2), 638 0x00000000, 639 (0x0e00 << 16) | (0xcd10 >> 2), 640 0x00000000, 641 (0x0e00 << 16) | (0xcd14 >> 2), 642 0x00000000, 643 (0x0e00 << 16) | (0x88b0 >> 2), 644 0x00000000, 645 (0x0e00 << 16) | (0x88b4 >> 2), 646 0x00000000, 647 (0x0e00 << 16) | (0x88b8 >> 2), 648 0x00000000, 649 (0x0e00 << 16) | (0x88bc >> 2), 650 0x00000000, 651 (0x0400 << 16) | (0x89c0 >> 2), 652 0x00000000, 653 (0x0e00 << 16) | (0x88c4 >> 2), 654 0x00000000, 655 (0x0e00 << 16) | (0x88c8 >> 2), 656 0x00000000, 657 (0x0e00 << 16) | (0x88d0 >> 2), 658 0x00000000, 659 (0x0e00 << 16) | (0x88d4 >> 2), 660 0x00000000, 661 (0x0e00 << 16) | (0x88d8 >> 2), 662 0x00000000, 663 (0x0e00 << 16) | (0x8980 >> 2), 664 0x00000000, 665 (0x0e00 << 16) | (0x30938 >> 2), 666 0x00000000, 667 (0x0e00 << 16) | (0x3093c >> 2), 668 0x00000000, 669 (0x0e00 << 16) | (0x30940 >> 2), 670 0x00000000, 671 (0x0e00 << 16) | (0x89a0 >> 2), 672 0x00000000, 673 (0x0e00 << 16) | (0x30900 >> 2), 674 0x00000000, 675 (0x0e00 << 16) | (0x30904 >> 2), 676 0x00000000, 677 (0x0e00 << 16) | (0x89b4 >> 2), 678 0x00000000, 679 (0x0e00 << 16) | (0x3c210 >> 2), 680 0x00000000, 681 (0x0e00 << 16) | (0x3c214 >> 2), 682 0x00000000, 683 (0x0e00 << 16) | (0x3c218 >> 2), 684 0x00000000, 685 (0x0e00 << 16) | (0x8904 >> 2), 686 0x00000000, 687 0x5, 688 (0x0e00 << 16) | (0x8c28 >> 2), 689 (0x0e00 << 16) | (0x8c2c >> 2), 690 (0x0e00 << 16) | (0x8c30 >> 2), 691 (0x0e00 << 16) | (0x8c34 >> 2), 692 (0x0e00 << 16) | (0x9600 >> 2), 693 }; 694 695 static const u32 kalindi_rlc_save_restore_register_list[] = 696 { 697 (0x0e00 << 16) | (0xc12c >> 2), 698 0x00000000, 699 (0x0e00 << 16) | (0xc140 >> 2), 700 0x00000000, 701 (0x0e00 << 16) | (0xc150 >> 2), 702 0x00000000, 703 (0x0e00 << 16) | (0xc15c >> 2), 704 0x00000000, 705 (0x0e00 << 16) | (0xc168 >> 2), 706 0x00000000, 707 (0x0e00 << 16) | (0xc170 >> 2), 708 0x00000000, 709 (0x0e00 << 16) | (0xc204 >> 2), 710 0x00000000, 711 (0x0e00 << 16) | (0xc2b4 >> 2), 712 0x00000000, 713 (0x0e00 << 16) | (0xc2b8 >> 2), 714 0x00000000, 715 (0x0e00 << 16) | (0xc2bc >> 2), 716 0x00000000, 717 (0x0e00 << 16) | (0xc2c0 >> 2), 718 0x00000000, 719 (0x0e00 << 16) | (0x8228 >> 2), 720 0x00000000, 721 (0x0e00 << 16) | (0x829c >> 2), 722 0x00000000, 723 (0x0e00 << 16) | (0x869c >> 2), 724 0x00000000, 725 (0x0600 << 16) | (0x98f4 >> 2), 726 0x00000000, 727 (0x0e00 << 16) | (0x98f8 >> 2), 728 0x00000000, 729 (0x0e00 << 16) | (0x9900 >> 2), 730 0x00000000, 731 (0x0e00 << 16) | (0xc260 >> 2), 732 0x00000000, 733 (0x0e00 << 16) | (0x90e8 >> 2), 734 0x00000000, 735 (0x0e00 << 16) | (0x3c000 >> 2), 736 0x00000000, 737 (0x0e00 << 16) | (0x3c00c >> 2), 738 0x00000000, 739 (0x0e00 << 16) | (0x8c1c >> 2), 740 0x00000000, 741 (0x0e00 << 16) | (0x9700 >> 2), 742 0x00000000, 743 (0x0e00 << 16) | (0xcd20 >> 2), 744 0x00000000, 745 (0x4e00 << 16) | (0xcd20 >> 2), 746 0x00000000, 747 (0x5e00 << 16) | (0xcd20 >> 2), 748 0x00000000, 749 (0x6e00 << 16) | (0xcd20 >> 2), 750 0x00000000, 751 (0x7e00 << 16) | (0xcd20 >> 2), 752 0x00000000, 753 (0x0e00 << 16) | (0x89bc >> 2), 754 0x00000000, 755 (0x0e00 << 16) | (0x8900 >> 2), 756 0x00000000, 757 0x3, 758 (0x0e00 << 16) | (0xc130 >> 2), 759 0x00000000, 760 (0x0e00 << 16) | (0xc134 >> 2), 761 0x00000000, 762 (0x0e00 << 16) | (0xc1fc >> 2), 763 0x00000000, 764 (0x0e00 << 16) | (0xc208 >> 2), 765 0x00000000, 766 (0x0e00 << 16) | (0xc264 >> 2), 767 0x00000000, 768 (0x0e00 << 16) | (0xc268 >> 2), 769 0x00000000, 770 (0x0e00 << 16) | (0xc26c >> 2), 771 0x00000000, 772 (0x0e00 << 16) | (0xc270 >> 2), 773 0x00000000, 774 (0x0e00 << 16) | (0xc274 >> 2), 775 0x00000000, 776 (0x0e00 << 16) | (0xc28c >> 2), 777 0x00000000, 778 (0x0e00 << 16) | (0xc290 >> 2), 779 0x00000000, 780 (0x0e00 << 16) | (0xc294 >> 2), 781 0x00000000, 782 (0x0e00 << 16) | (0xc298 >> 2), 783 0x00000000, 784 (0x0e00 << 16) | (0xc2a0 >> 2), 785 0x00000000, 786 (0x0e00 << 16) | (0xc2a4 >> 2), 787 0x00000000, 788 (0x0e00 << 16) | (0xc2a8 >> 2), 789 0x00000000, 790 (0x0e00 << 16) | (0xc2ac >> 2), 791 0x00000000, 792 (0x0e00 << 16) | (0x301d0 >> 2), 793 0x00000000, 794 (0x0e00 << 16) | (0x30238 >> 2), 795 0x00000000, 796 (0x0e00 << 16) | (0x30250 >> 2), 797 0x00000000, 798 (0x0e00 << 16) | (0x30254 >> 2), 799 0x00000000, 800 (0x0e00 << 16) | (0x30258 >> 2), 801 0x00000000, 802 (0x0e00 << 16) | (0x3025c >> 2), 803 0x00000000, 804 (0x4e00 << 16) | (0xc900 >> 2), 805 0x00000000, 806 (0x5e00 << 16) | (0xc900 >> 2), 807 0x00000000, 808 (0x6e00 << 16) | (0xc900 >> 2), 809 0x00000000, 810 (0x7e00 << 16) | (0xc900 >> 2), 811 0x00000000, 812 (0x4e00 << 16) | (0xc904 >> 2), 813 0x00000000, 814 (0x5e00 << 16) | (0xc904 >> 2), 815 0x00000000, 816 (0x6e00 << 16) | (0xc904 >> 2), 817 0x00000000, 818 (0x7e00 << 16) | (0xc904 >> 2), 819 0x00000000, 820 (0x4e00 << 16) | (0xc908 >> 2), 821 0x00000000, 822 (0x5e00 << 16) | (0xc908 >> 2), 823 0x00000000, 824 (0x6e00 << 16) | (0xc908 >> 2), 825 0x00000000, 826 (0x7e00 << 16) | (0xc908 >> 2), 827 0x00000000, 828 (0x4e00 << 16) | (0xc90c >> 2), 829 0x00000000, 830 (0x5e00 << 16) | (0xc90c >> 2), 831 0x00000000, 832 (0x6e00 << 16) | (0xc90c >> 2), 833 0x00000000, 834 (0x7e00 << 16) | (0xc90c >> 2), 835 0x00000000, 836 (0x4e00 << 16) | (0xc910 >> 2), 837 0x00000000, 838 (0x5e00 << 16) | (0xc910 >> 2), 839 0x00000000, 840 (0x6e00 << 16) | (0xc910 >> 2), 841 0x00000000, 842 (0x7e00 << 16) | (0xc910 >> 2), 843 0x00000000, 844 (0x0e00 << 16) | (0xc99c >> 2), 845 0x00000000, 846 (0x0e00 << 16) | (0x9834 >> 2), 847 0x00000000, 848 (0x0000 << 16) | (0x30f00 >> 2), 849 0x00000000, 850 (0x0000 << 16) | (0x30f04 >> 2), 851 0x00000000, 852 (0x0000 << 16) | (0x30f08 >> 2), 853 0x00000000, 854 (0x0000 << 16) | (0x30f0c >> 2), 855 0x00000000, 856 (0x0600 << 16) | (0x9b7c >> 2), 857 0x00000000, 858 (0x0e00 << 16) | (0x8a14 >> 2), 859 0x00000000, 860 (0x0e00 << 16) | (0x8a18 >> 2), 861 0x00000000, 862 (0x0600 << 16) | (0x30a00 >> 2), 863 0x00000000, 864 (0x0e00 << 16) | (0x8bf0 >> 2), 865 0x00000000, 866 (0x0e00 << 16) | (0x8bcc >> 2), 867 0x00000000, 868 (0x0e00 << 16) | (0x8b24 >> 2), 869 0x00000000, 870 (0x0e00 << 16) | (0x30a04 >> 2), 871 0x00000000, 872 (0x0600 << 16) | (0x30a10 >> 2), 873 0x00000000, 874 (0x0600 << 16) | (0x30a14 >> 2), 875 0x00000000, 876 (0x0600 << 16) | (0x30a18 >> 2), 877 0x00000000, 878 (0x0600 << 16) | (0x30a2c >> 2), 879 0x00000000, 880 (0x0e00 << 16) | (0xc700 >> 2), 881 0x00000000, 882 (0x0e00 << 16) | (0xc704 >> 2), 883 0x00000000, 884 (0x0e00 << 16) | (0xc708 >> 2), 885 0x00000000, 886 (0x0e00 << 16) | (0xc768 >> 2), 887 0x00000000, 888 (0x0400 << 16) | (0xc770 >> 2), 889 0x00000000, 890 (0x0400 << 16) | (0xc774 >> 2), 891 0x00000000, 892 (0x0400 << 16) | (0xc798 >> 2), 893 0x00000000, 894 (0x0400 << 16) | (0xc79c >> 2), 895 0x00000000, 896 (0x0e00 << 16) | (0x9100 >> 2), 897 0x00000000, 898 (0x0e00 << 16) | (0x3c010 >> 2), 899 0x00000000, 900 (0x0e00 << 16) | (0x8c00 >> 2), 901 0x00000000, 902 (0x0e00 << 16) | (0x8c04 >> 2), 903 0x00000000, 904 (0x0e00 << 16) | (0x8c20 >> 2), 905 0x00000000, 906 (0x0e00 << 16) | (0x8c38 >> 2), 907 0x00000000, 908 (0x0e00 << 16) | (0x8c3c >> 2), 909 0x00000000, 910 (0x0e00 << 16) | (0xae00 >> 2), 911 0x00000000, 912 (0x0e00 << 16) | (0x9604 >> 2), 913 0x00000000, 914 (0x0e00 << 16) | (0xac08 >> 2), 915 0x00000000, 916 (0x0e00 << 16) | (0xac0c >> 2), 917 0x00000000, 918 (0x0e00 << 16) | (0xac10 >> 2), 919 0x00000000, 920 (0x0e00 << 16) | (0xac14 >> 2), 921 0x00000000, 922 (0x0e00 << 16) | (0xac58 >> 2), 923 0x00000000, 924 (0x0e00 << 16) | (0xac68 >> 2), 925 0x00000000, 926 (0x0e00 << 16) | (0xac6c >> 2), 927 0x00000000, 928 (0x0e00 << 16) | (0xac70 >> 2), 929 0x00000000, 930 (0x0e00 << 16) | (0xac74 >> 2), 931 0x00000000, 932 (0x0e00 << 16) | (0xac78 >> 2), 933 0x00000000, 934 (0x0e00 << 16) | (0xac7c >> 2), 935 0x00000000, 936 (0x0e00 << 16) | (0xac80 >> 2), 937 0x00000000, 938 (0x0e00 << 16) | (0xac84 >> 2), 939 0x00000000, 940 (0x0e00 << 16) | (0xac88 >> 2), 941 0x00000000, 942 (0x0e00 << 16) | (0xac8c >> 2), 943 0x00000000, 944 (0x0e00 << 16) | (0x970c >> 2), 945 0x00000000, 946 (0x0e00 << 16) | (0x9714 >> 2), 947 0x00000000, 948 (0x0e00 << 16) | (0x9718 >> 2), 949 0x00000000, 950 (0x0e00 << 16) | (0x971c >> 2), 951 0x00000000, 952 (0x0e00 << 16) | (0x31068 >> 2), 953 0x00000000, 954 (0x4e00 << 16) | (0x31068 >> 2), 955 0x00000000, 956 (0x5e00 << 16) | (0x31068 >> 2), 957 0x00000000, 958 (0x6e00 << 16) | (0x31068 >> 2), 959 0x00000000, 960 (0x7e00 << 16) | (0x31068 >> 2), 961 0x00000000, 962 (0x0e00 << 16) | (0xcd10 >> 2), 963 0x00000000, 964 (0x0e00 << 16) | (0xcd14 >> 2), 965 0x00000000, 966 (0x0e00 << 16) | (0x88b0 >> 2), 967 0x00000000, 968 (0x0e00 << 16) | (0x88b4 >> 2), 969 0x00000000, 970 (0x0e00 << 16) | (0x88b8 >> 2), 971 0x00000000, 972 (0x0e00 << 16) | (0x88bc >> 2), 973 0x00000000, 974 (0x0400 << 16) | (0x89c0 >> 2), 975 0x00000000, 976 (0x0e00 << 16) | (0x88c4 >> 2), 977 0x00000000, 978 (0x0e00 << 16) | (0x88c8 >> 2), 979 0x00000000, 980 (0x0e00 << 16) | (0x88d0 >> 2), 981 0x00000000, 982 (0x0e00 << 16) | (0x88d4 >> 2), 983 0x00000000, 984 (0x0e00 << 16) | (0x88d8 >> 2), 985 0x00000000, 986 (0x0e00 << 16) | (0x8980 >> 2), 987 0x00000000, 988 (0x0e00 << 16) | (0x30938 >> 2), 989 0x00000000, 990 (0x0e00 << 16) | (0x3093c >> 2), 991 0x00000000, 992 (0x0e00 << 16) | (0x30940 >> 2), 993 0x00000000, 994 (0x0e00 << 16) | (0x89a0 >> 2), 995 0x00000000, 996 (0x0e00 << 16) | (0x30900 >> 2), 997 0x00000000, 998 (0x0e00 << 16) | (0x30904 >> 2), 999 0x00000000, 1000 (0x0e00 << 16) | (0x89b4 >> 2), 1001 0x00000000, 1002 (0x0e00 << 16) | (0x3e1fc >> 2), 1003 0x00000000, 1004 (0x0e00 << 16) | (0x3c210 >> 2), 1005 0x00000000, 1006 (0x0e00 << 16) | (0x3c214 >> 2), 1007 0x00000000, 1008 (0x0e00 << 16) | (0x3c218 >> 2), 1009 0x00000000, 1010 (0x0e00 << 16) | (0x8904 >> 2), 1011 0x00000000, 1012 0x5, 1013 (0x0e00 << 16) | (0x8c28 >> 2), 1014 (0x0e00 << 16) | (0x8c2c >> 2), 1015 (0x0e00 << 16) | (0x8c30 >> 2), 1016 (0x0e00 << 16) | (0x8c34 >> 2), 1017 (0x0e00 << 16) | (0x9600 >> 2), 1018 }; 1019 1020 static const u32 bonaire_golden_spm_registers[] = 1021 { 1022 0x30800, 0xe0ffffff, 0xe0000000 1023 }; 1024 1025 static const u32 bonaire_golden_common_registers[] = 1026 { 1027 0xc770, 0xffffffff, 0x00000800, 1028 0xc774, 0xffffffff, 0x00000800, 1029 0xc798, 0xffffffff, 0x00007fbf, 1030 0xc79c, 0xffffffff, 0x00007faf 1031 }; 1032 1033 static const u32 bonaire_golden_registers[] = 1034 { 1035 0x3354, 0x00000333, 0x00000333, 1036 0x3350, 0x000c0fc0, 0x00040200, 1037 0x9a10, 0x00010000, 0x00058208, 1038 0x3c000, 0xffff1fff, 0x00140000, 1039 0x3c200, 0xfdfc0fff, 0x00000100, 1040 0x3c234, 0x40000000, 0x40000200, 1041 0x9830, 0xffffffff, 0x00000000, 1042 0x9834, 0xf00fffff, 0x00000400, 1043 0x9838, 0x0002021c, 0x00020200, 1044 0xc78, 0x00000080, 0x00000000, 1045 0x5bb0, 0x000000f0, 0x00000070, 1046 0x5bc0, 0xf0311fff, 0x80300000, 1047 0x98f8, 0x73773777, 0x12010001, 1048 0x350c, 0x00810000, 0x408af000, 1049 0x7030, 0x31000111, 0x00000011, 1050 0x2f48, 0x73773777, 0x12010001, 1051 0x220c, 0x00007fb6, 0x0021a1b1, 1052 0x2210, 0x00007fb6, 0x002021b1, 1053 0x2180, 0x00007fb6, 0x00002191, 1054 0x2218, 0x00007fb6, 0x002121b1, 1055 0x221c, 0x00007fb6, 0x002021b1, 1056 0x21dc, 0x00007fb6, 0x00002191, 1057 0x21e0, 0x00007fb6, 0x00002191, 1058 0x3628, 0x0000003f, 0x0000000a, 1059 0x362c, 0x0000003f, 0x0000000a, 1060 0x2ae4, 0x00073ffe, 0x000022a2, 1061 0x240c, 0x000007ff, 0x00000000, 1062 0x8a14, 0xf000003f, 0x00000007, 1063 0x8bf0, 0x00002001, 0x00000001, 1064 0x8b24, 0xffffffff, 0x00ffffff, 1065 0x30a04, 0x0000ff0f, 0x00000000, 1066 0x28a4c, 0x07ffffff, 0x06000000, 1067 0x4d8, 0x00000fff, 0x00000100, 1068 0x3e78, 0x00000001, 0x00000002, 1069 0x9100, 0x03000000, 0x0362c688, 1070 0x8c00, 0x000000ff, 0x00000001, 1071 0xe40, 0x00001fff, 0x00001fff, 1072 0x9060, 0x0000007f, 0x00000020, 1073 0x9508, 0x00010000, 0x00010000, 1074 0xac14, 0x000003ff, 0x000000f3, 1075 0xac0c, 0xffffffff, 0x00001032 1076 }; 1077 1078 static const u32 bonaire_mgcg_cgcg_init[] = 1079 { 1080 0xc420, 0xffffffff, 0xfffffffc, 1081 0x30800, 0xffffffff, 0xe0000000, 1082 0x3c2a0, 0xffffffff, 0x00000100, 1083 0x3c208, 0xffffffff, 0x00000100, 1084 0x3c2c0, 0xffffffff, 0xc0000100, 1085 0x3c2c8, 0xffffffff, 0xc0000100, 1086 0x3c2c4, 0xffffffff, 0xc0000100, 1087 0x55e4, 0xffffffff, 0x00600100, 1088 0x3c280, 0xffffffff, 0x00000100, 1089 0x3c214, 0xffffffff, 0x06000100, 1090 0x3c220, 0xffffffff, 0x00000100, 1091 0x3c218, 0xffffffff, 0x06000100, 1092 0x3c204, 0xffffffff, 0x00000100, 1093 0x3c2e0, 0xffffffff, 0x00000100, 1094 0x3c224, 0xffffffff, 0x00000100, 1095 0x3c200, 0xffffffff, 0x00000100, 1096 0x3c230, 0xffffffff, 0x00000100, 1097 0x3c234, 0xffffffff, 0x00000100, 1098 0x3c250, 0xffffffff, 0x00000100, 1099 0x3c254, 0xffffffff, 0x00000100, 1100 0x3c258, 0xffffffff, 0x00000100, 1101 0x3c25c, 0xffffffff, 0x00000100, 1102 0x3c260, 0xffffffff, 0x00000100, 1103 0x3c27c, 0xffffffff, 0x00000100, 1104 0x3c278, 0xffffffff, 0x00000100, 1105 0x3c210, 0xffffffff, 0x06000100, 1106 0x3c290, 0xffffffff, 0x00000100, 1107 0x3c274, 0xffffffff, 0x00000100, 1108 0x3c2b4, 0xffffffff, 0x00000100, 1109 0x3c2b0, 0xffffffff, 0x00000100, 1110 0x3c270, 0xffffffff, 0x00000100, 1111 0x30800, 0xffffffff, 0xe0000000, 1112 0x3c020, 0xffffffff, 0x00010000, 1113 0x3c024, 0xffffffff, 0x00030002, 1114 0x3c028, 0xffffffff, 0x00040007, 1115 0x3c02c, 0xffffffff, 0x00060005, 1116 0x3c030, 0xffffffff, 0x00090008, 1117 0x3c034, 0xffffffff, 0x00010000, 1118 0x3c038, 0xffffffff, 0x00030002, 1119 0x3c03c, 0xffffffff, 0x00040007, 1120 0x3c040, 0xffffffff, 0x00060005, 1121 0x3c044, 0xffffffff, 0x00090008, 1122 0x3c048, 0xffffffff, 0x00010000, 1123 0x3c04c, 0xffffffff, 0x00030002, 1124 0x3c050, 0xffffffff, 0x00040007, 1125 0x3c054, 0xffffffff, 0x00060005, 1126 0x3c058, 0xffffffff, 0x00090008, 1127 0x3c05c, 0xffffffff, 0x00010000, 1128 0x3c060, 0xffffffff, 0x00030002, 1129 0x3c064, 0xffffffff, 0x00040007, 1130 0x3c068, 0xffffffff, 0x00060005, 1131 0x3c06c, 0xffffffff, 0x00090008, 1132 0x3c070, 0xffffffff, 0x00010000, 1133 0x3c074, 0xffffffff, 0x00030002, 1134 0x3c078, 0xffffffff, 0x00040007, 1135 0x3c07c, 0xffffffff, 0x00060005, 1136 0x3c080, 0xffffffff, 0x00090008, 1137 0x3c084, 0xffffffff, 0x00010000, 1138 0x3c088, 0xffffffff, 0x00030002, 1139 0x3c08c, 0xffffffff, 0x00040007, 1140 0x3c090, 0xffffffff, 0x00060005, 1141 0x3c094, 0xffffffff, 0x00090008, 1142 0x3c098, 0xffffffff, 0x00010000, 1143 0x3c09c, 0xffffffff, 0x00030002, 1144 0x3c0a0, 0xffffffff, 0x00040007, 1145 0x3c0a4, 0xffffffff, 0x00060005, 1146 0x3c0a8, 0xffffffff, 0x00090008, 1147 0x3c000, 0xffffffff, 0x96e00200, 1148 0x8708, 0xffffffff, 0x00900100, 1149 0xc424, 0xffffffff, 0x0020003f, 1150 0x38, 0xffffffff, 0x0140001c, 1151 0x3c, 0x000f0000, 0x000f0000, 1152 0x220, 0xffffffff, 0xC060000C, 1153 0x224, 0xc0000fff, 0x00000100, 1154 0xf90, 0xffffffff, 0x00000100, 1155 0xf98, 0x00000101, 0x00000000, 1156 0x20a8, 0xffffffff, 0x00000104, 1157 0x55e4, 0xff000fff, 0x00000100, 1158 0x30cc, 0xc0000fff, 0x00000104, 1159 0xc1e4, 0x00000001, 0x00000001, 1160 0xd00c, 0xff000ff0, 0x00000100, 1161 0xd80c, 0xff000ff0, 0x00000100 1162 }; 1163 1164 static const u32 spectre_golden_spm_registers[] = 1165 { 1166 0x30800, 0xe0ffffff, 0xe0000000 1167 }; 1168 1169 static const u32 spectre_golden_common_registers[] = 1170 { 1171 0xc770, 0xffffffff, 0x00000800, 1172 0xc774, 0xffffffff, 0x00000800, 1173 0xc798, 0xffffffff, 0x00007fbf, 1174 0xc79c, 0xffffffff, 0x00007faf 1175 }; 1176 1177 static const u32 spectre_golden_registers[] = 1178 { 1179 0x3c000, 0xffff1fff, 0x96940200, 1180 0x3c00c, 0xffff0001, 0xff000000, 1181 0x3c200, 0xfffc0fff, 0x00000100, 1182 0x6ed8, 0x00010101, 0x00010000, 1183 0x9834, 0xf00fffff, 0x00000400, 1184 0x9838, 0xfffffffc, 0x00020200, 1185 0x5bb0, 0x000000f0, 0x00000070, 1186 0x5bc0, 0xf0311fff, 0x80300000, 1187 0x98f8, 0x73773777, 0x12010001, 1188 0x9b7c, 0x00ff0000, 0x00fc0000, 1189 0x2f48, 0x73773777, 0x12010001, 1190 0x8a14, 0xf000003f, 0x00000007, 1191 0x8b24, 0xffffffff, 0x00ffffff, 1192 0x28350, 0x3f3f3fff, 0x00000082, 1193 0x28354, 0x0000003f, 0x00000000, 1194 0x3e78, 0x00000001, 0x00000002, 1195 0x913c, 0xffff03df, 0x00000004, 1196 0xc768, 0x00000008, 0x00000008, 1197 0x8c00, 0x000008ff, 0x00000800, 1198 0x9508, 0x00010000, 0x00010000, 1199 0xac0c, 0xffffffff, 0x54763210, 1200 0x214f8, 0x01ff01ff, 0x00000002, 1201 0x21498, 0x007ff800, 0x00200000, 1202 0x2015c, 0xffffffff, 0x00000f40, 1203 0x30934, 0xffffffff, 0x00000001 1204 }; 1205 1206 static const u32 spectre_mgcg_cgcg_init[] = 1207 { 1208 0xc420, 0xffffffff, 0xfffffffc, 1209 0x30800, 0xffffffff, 0xe0000000, 1210 0x3c2a0, 0xffffffff, 0x00000100, 1211 0x3c208, 0xffffffff, 0x00000100, 1212 0x3c2c0, 0xffffffff, 0x00000100, 1213 0x3c2c8, 0xffffffff, 0x00000100, 1214 0x3c2c4, 0xffffffff, 0x00000100, 1215 0x55e4, 0xffffffff, 0x00600100, 1216 0x3c280, 0xffffffff, 0x00000100, 1217 0x3c214, 0xffffffff, 0x06000100, 1218 0x3c220, 0xffffffff, 0x00000100, 1219 0x3c218, 0xffffffff, 0x06000100, 1220 0x3c204, 0xffffffff, 0x00000100, 1221 0x3c2e0, 0xffffffff, 0x00000100, 1222 0x3c224, 0xffffffff, 0x00000100, 1223 0x3c200, 0xffffffff, 0x00000100, 1224 0x3c230, 0xffffffff, 0x00000100, 1225 0x3c234, 0xffffffff, 0x00000100, 1226 0x3c250, 0xffffffff, 0x00000100, 1227 0x3c254, 0xffffffff, 0x00000100, 1228 0x3c258, 0xffffffff, 0x00000100, 1229 0x3c25c, 0xffffffff, 0x00000100, 1230 0x3c260, 0xffffffff, 0x00000100, 1231 0x3c27c, 0xffffffff, 0x00000100, 1232 0x3c278, 0xffffffff, 0x00000100, 1233 0x3c210, 0xffffffff, 0x06000100, 1234 0x3c290, 0xffffffff, 0x00000100, 1235 0x3c274, 0xffffffff, 0x00000100, 1236 0x3c2b4, 0xffffffff, 0x00000100, 1237 0x3c2b0, 0xffffffff, 0x00000100, 1238 0x3c270, 0xffffffff, 0x00000100, 1239 0x30800, 0xffffffff, 0xe0000000, 1240 0x3c020, 0xffffffff, 0x00010000, 1241 0x3c024, 0xffffffff, 0x00030002, 1242 0x3c028, 0xffffffff, 0x00040007, 1243 0x3c02c, 0xffffffff, 0x00060005, 1244 0x3c030, 0xffffffff, 0x00090008, 1245 0x3c034, 0xffffffff, 0x00010000, 1246 0x3c038, 0xffffffff, 0x00030002, 1247 0x3c03c, 0xffffffff, 0x00040007, 1248 0x3c040, 0xffffffff, 0x00060005, 1249 0x3c044, 0xffffffff, 0x00090008, 1250 0x3c048, 0xffffffff, 0x00010000, 1251 0x3c04c, 0xffffffff, 0x00030002, 1252 0x3c050, 0xffffffff, 0x00040007, 1253 0x3c054, 0xffffffff, 0x00060005, 1254 0x3c058, 0xffffffff, 0x00090008, 1255 0x3c05c, 0xffffffff, 0x00010000, 1256 0x3c060, 0xffffffff, 0x00030002, 1257 0x3c064, 0xffffffff, 0x00040007, 1258 0x3c068, 0xffffffff, 0x00060005, 1259 0x3c06c, 0xffffffff, 0x00090008, 1260 0x3c070, 0xffffffff, 0x00010000, 1261 0x3c074, 0xffffffff, 0x00030002, 1262 0x3c078, 0xffffffff, 0x00040007, 1263 0x3c07c, 0xffffffff, 0x00060005, 1264 0x3c080, 0xffffffff, 0x00090008, 1265 0x3c084, 0xffffffff, 0x00010000, 1266 0x3c088, 0xffffffff, 0x00030002, 1267 0x3c08c, 0xffffffff, 0x00040007, 1268 0x3c090, 0xffffffff, 0x00060005, 1269 0x3c094, 0xffffffff, 0x00090008, 1270 0x3c098, 0xffffffff, 0x00010000, 1271 0x3c09c, 0xffffffff, 0x00030002, 1272 0x3c0a0, 0xffffffff, 0x00040007, 1273 0x3c0a4, 0xffffffff, 0x00060005, 1274 0x3c0a8, 0xffffffff, 0x00090008, 1275 0x3c0ac, 0xffffffff, 0x00010000, 1276 0x3c0b0, 0xffffffff, 0x00030002, 1277 0x3c0b4, 0xffffffff, 0x00040007, 1278 0x3c0b8, 0xffffffff, 0x00060005, 1279 0x3c0bc, 0xffffffff, 0x00090008, 1280 0x3c000, 0xffffffff, 0x96e00200, 1281 0x8708, 0xffffffff, 0x00900100, 1282 0xc424, 0xffffffff, 0x0020003f, 1283 0x38, 0xffffffff, 0x0140001c, 1284 0x3c, 0x000f0000, 0x000f0000, 1285 0x220, 0xffffffff, 0xC060000C, 1286 0x224, 0xc0000fff, 0x00000100, 1287 0xf90, 0xffffffff, 0x00000100, 1288 0xf98, 0x00000101, 0x00000000, 1289 0x20a8, 0xffffffff, 0x00000104, 1290 0x55e4, 0xff000fff, 0x00000100, 1291 0x30cc, 0xc0000fff, 0x00000104, 1292 0xc1e4, 0x00000001, 0x00000001, 1293 0xd00c, 0xff000ff0, 0x00000100, 1294 0xd80c, 0xff000ff0, 0x00000100 1295 }; 1296 1297 static const u32 kalindi_golden_spm_registers[] = 1298 { 1299 0x30800, 0xe0ffffff, 0xe0000000 1300 }; 1301 1302 static const u32 kalindi_golden_common_registers[] = 1303 { 1304 0xc770, 0xffffffff, 0x00000800, 1305 0xc774, 0xffffffff, 0x00000800, 1306 0xc798, 0xffffffff, 0x00007fbf, 1307 0xc79c, 0xffffffff, 0x00007faf 1308 }; 1309 1310 static const u32 kalindi_golden_registers[] = 1311 { 1312 0x3c000, 0xffffdfff, 0x6e944040, 1313 0x55e4, 0xff607fff, 0xfc000100, 1314 0x3c220, 0xff000fff, 0x00000100, 1315 0x3c224, 0xff000fff, 0x00000100, 1316 0x3c200, 0xfffc0fff, 0x00000100, 1317 0x6ed8, 0x00010101, 0x00010000, 1318 0x9830, 0xffffffff, 0x00000000, 1319 0x9834, 0xf00fffff, 0x00000400, 1320 0x5bb0, 0x000000f0, 0x00000070, 1321 0x5bc0, 0xf0311fff, 0x80300000, 1322 0x98f8, 0x73773777, 0x12010001, 1323 0x98fc, 0xffffffff, 0x00000010, 1324 0x9b7c, 0x00ff0000, 0x00fc0000, 1325 0x8030, 0x00001f0f, 0x0000100a, 1326 0x2f48, 0x73773777, 0x12010001, 1327 0x2408, 0x000fffff, 0x000c007f, 1328 0x8a14, 0xf000003f, 0x00000007, 1329 0x8b24, 0x3fff3fff, 0x00ffcfff, 1330 0x30a04, 0x0000ff0f, 0x00000000, 1331 0x28a4c, 0x07ffffff, 0x06000000, 1332 0x4d8, 0x00000fff, 0x00000100, 1333 0x3e78, 0x00000001, 0x00000002, 1334 0xc768, 0x00000008, 0x00000008, 1335 0x8c00, 0x000000ff, 0x00000003, 1336 0x214f8, 0x01ff01ff, 0x00000002, 1337 0x21498, 0x007ff800, 0x00200000, 1338 0x2015c, 0xffffffff, 0x00000f40, 1339 0x88c4, 0x001f3ae3, 0x00000082, 1340 0x88d4, 0x0000001f, 0x00000010, 1341 0x30934, 0xffffffff, 0x00000000 1342 }; 1343 1344 static const u32 kalindi_mgcg_cgcg_init[] = 1345 { 1346 0xc420, 0xffffffff, 0xfffffffc, 1347 0x30800, 0xffffffff, 0xe0000000, 1348 0x3c2a0, 0xffffffff, 0x00000100, 1349 0x3c208, 0xffffffff, 0x00000100, 1350 0x3c2c0, 0xffffffff, 0x00000100, 1351 0x3c2c8, 0xffffffff, 0x00000100, 1352 0x3c2c4, 0xffffffff, 0x00000100, 1353 0x55e4, 0xffffffff, 0x00600100, 1354 0x3c280, 0xffffffff, 0x00000100, 1355 0x3c214, 0xffffffff, 0x06000100, 1356 0x3c220, 0xffffffff, 0x00000100, 1357 0x3c218, 0xffffffff, 0x06000100, 1358 0x3c204, 0xffffffff, 0x00000100, 1359 0x3c2e0, 0xffffffff, 0x00000100, 1360 0x3c224, 0xffffffff, 0x00000100, 1361 0x3c200, 0xffffffff, 0x00000100, 1362 0x3c230, 0xffffffff, 0x00000100, 1363 0x3c234, 0xffffffff, 0x00000100, 1364 0x3c250, 0xffffffff, 0x00000100, 1365 0x3c254, 0xffffffff, 0x00000100, 1366 0x3c258, 0xffffffff, 0x00000100, 1367 0x3c25c, 0xffffffff, 0x00000100, 1368 0x3c260, 0xffffffff, 0x00000100, 1369 0x3c27c, 0xffffffff, 0x00000100, 1370 0x3c278, 0xffffffff, 0x00000100, 1371 0x3c210, 0xffffffff, 0x06000100, 1372 0x3c290, 0xffffffff, 0x00000100, 1373 0x3c274, 0xffffffff, 0x00000100, 1374 0x3c2b4, 0xffffffff, 0x00000100, 1375 0x3c2b0, 0xffffffff, 0x00000100, 1376 0x3c270, 0xffffffff, 0x00000100, 1377 0x30800, 0xffffffff, 0xe0000000, 1378 0x3c020, 0xffffffff, 0x00010000, 1379 0x3c024, 0xffffffff, 0x00030002, 1380 0x3c028, 0xffffffff, 0x00040007, 1381 0x3c02c, 0xffffffff, 0x00060005, 1382 0x3c030, 0xffffffff, 0x00090008, 1383 0x3c034, 0xffffffff, 0x00010000, 1384 0x3c038, 0xffffffff, 0x00030002, 1385 0x3c03c, 0xffffffff, 0x00040007, 1386 0x3c040, 0xffffffff, 0x00060005, 1387 0x3c044, 0xffffffff, 0x00090008, 1388 0x3c000, 0xffffffff, 0x96e00200, 1389 0x8708, 0xffffffff, 0x00900100, 1390 0xc424, 0xffffffff, 0x0020003f, 1391 0x38, 0xffffffff, 0x0140001c, 1392 0x3c, 0x000f0000, 0x000f0000, 1393 0x220, 0xffffffff, 0xC060000C, 1394 0x224, 0xc0000fff, 0x00000100, 1395 0x20a8, 0xffffffff, 0x00000104, 1396 0x55e4, 0xff000fff, 0x00000100, 1397 0x30cc, 0xc0000fff, 0x00000104, 1398 0xc1e4, 0x00000001, 0x00000001, 1399 0xd00c, 0xff000ff0, 0x00000100, 1400 0xd80c, 0xff000ff0, 0x00000100 1401 }; 1402 1403 static const u32 hawaii_golden_spm_registers[] = 1404 { 1405 0x30800, 0xe0ffffff, 0xe0000000 1406 }; 1407 1408 static const u32 hawaii_golden_common_registers[] = 1409 { 1410 0x30800, 0xffffffff, 0xe0000000, 1411 0x28350, 0xffffffff, 0x3a00161a, 1412 0x28354, 0xffffffff, 0x0000002e, 1413 0x9a10, 0xffffffff, 0x00018208, 1414 0x98f8, 0xffffffff, 0x12011003 1415 }; 1416 1417 static const u32 hawaii_golden_registers[] = 1418 { 1419 0x3354, 0x00000333, 0x00000333, 1420 0x9a10, 0x00010000, 0x00058208, 1421 0x9830, 0xffffffff, 0x00000000, 1422 0x9834, 0xf00fffff, 0x00000400, 1423 0x9838, 0x0002021c, 0x00020200, 1424 0xc78, 0x00000080, 0x00000000, 1425 0x5bb0, 0x000000f0, 0x00000070, 1426 0x5bc0, 0xf0311fff, 0x80300000, 1427 0x350c, 0x00810000, 0x408af000, 1428 0x7030, 0x31000111, 0x00000011, 1429 0x2f48, 0x73773777, 0x12010001, 1430 0x2120, 0x0000007f, 0x0000001b, 1431 0x21dc, 0x00007fb6, 0x00002191, 1432 0x3628, 0x0000003f, 0x0000000a, 1433 0x362c, 0x0000003f, 0x0000000a, 1434 0x2ae4, 0x00073ffe, 0x000022a2, 1435 0x240c, 0x000007ff, 0x00000000, 1436 0x8bf0, 0x00002001, 0x00000001, 1437 0x8b24, 0xffffffff, 0x00ffffff, 1438 0x30a04, 0x0000ff0f, 0x00000000, 1439 0x28a4c, 0x07ffffff, 0x06000000, 1440 0x3e78, 0x00000001, 0x00000002, 1441 0xc768, 0x00000008, 0x00000008, 1442 0xc770, 0x00000f00, 0x00000800, 1443 0xc774, 0x00000f00, 0x00000800, 1444 0xc798, 0x00ffffff, 0x00ff7fbf, 1445 0xc79c, 0x00ffffff, 0x00ff7faf, 1446 0x8c00, 0x000000ff, 0x00000800, 1447 0xe40, 0x00001fff, 0x00001fff, 1448 0x9060, 0x0000007f, 0x00000020, 1449 0x9508, 0x00010000, 0x00010000, 1450 0xae00, 0x00100000, 0x000ff07c, 1451 0xac14, 0x000003ff, 0x0000000f, 1452 0xac10, 0xffffffff, 0x7564fdec, 1453 0xac0c, 0xffffffff, 0x3120b9a8, 1454 0xac08, 0x20000000, 0x0f9c0000 1455 }; 1456 1457 static const u32 hawaii_mgcg_cgcg_init[] = 1458 { 1459 0xc420, 0xffffffff, 0xfffffffd, 1460 0x30800, 0xffffffff, 0xe0000000, 1461 0x3c2a0, 0xffffffff, 0x00000100, 1462 0x3c208, 0xffffffff, 0x00000100, 1463 0x3c2c0, 0xffffffff, 0x00000100, 1464 0x3c2c8, 0xffffffff, 0x00000100, 1465 0x3c2c4, 0xffffffff, 0x00000100, 1466 0x55e4, 0xffffffff, 0x00200100, 1467 0x3c280, 0xffffffff, 0x00000100, 1468 0x3c214, 0xffffffff, 0x06000100, 1469 0x3c220, 0xffffffff, 0x00000100, 1470 0x3c218, 0xffffffff, 0x06000100, 1471 0x3c204, 0xffffffff, 0x00000100, 1472 0x3c2e0, 0xffffffff, 0x00000100, 1473 0x3c224, 0xffffffff, 0x00000100, 1474 0x3c200, 0xffffffff, 0x00000100, 1475 0x3c230, 0xffffffff, 0x00000100, 1476 0x3c234, 0xffffffff, 0x00000100, 1477 0x3c250, 0xffffffff, 0x00000100, 1478 0x3c254, 0xffffffff, 0x00000100, 1479 0x3c258, 0xffffffff, 0x00000100, 1480 0x3c25c, 0xffffffff, 0x00000100, 1481 0x3c260, 0xffffffff, 0x00000100, 1482 0x3c27c, 0xffffffff, 0x00000100, 1483 0x3c278, 0xffffffff, 0x00000100, 1484 0x3c210, 0xffffffff, 0x06000100, 1485 0x3c290, 0xffffffff, 0x00000100, 1486 0x3c274, 0xffffffff, 0x00000100, 1487 0x3c2b4, 0xffffffff, 0x00000100, 1488 0x3c2b0, 0xffffffff, 0x00000100, 1489 0x3c270, 0xffffffff, 0x00000100, 1490 0x30800, 0xffffffff, 0xe0000000, 1491 0x3c020, 0xffffffff, 0x00010000, 1492 0x3c024, 0xffffffff, 0x00030002, 1493 0x3c028, 0xffffffff, 0x00040007, 1494 0x3c02c, 0xffffffff, 0x00060005, 1495 0x3c030, 0xffffffff, 0x00090008, 1496 0x3c034, 0xffffffff, 0x00010000, 1497 0x3c038, 0xffffffff, 0x00030002, 1498 0x3c03c, 0xffffffff, 0x00040007, 1499 0x3c040, 0xffffffff, 0x00060005, 1500 0x3c044, 0xffffffff, 0x00090008, 1501 0x3c048, 0xffffffff, 0x00010000, 1502 0x3c04c, 0xffffffff, 0x00030002, 1503 0x3c050, 0xffffffff, 0x00040007, 1504 0x3c054, 0xffffffff, 0x00060005, 1505 0x3c058, 0xffffffff, 0x00090008, 1506 0x3c05c, 0xffffffff, 0x00010000, 1507 0x3c060, 0xffffffff, 0x00030002, 1508 0x3c064, 0xffffffff, 0x00040007, 1509 0x3c068, 0xffffffff, 0x00060005, 1510 0x3c06c, 0xffffffff, 0x00090008, 1511 0x3c070, 0xffffffff, 0x00010000, 1512 0x3c074, 0xffffffff, 0x00030002, 1513 0x3c078, 0xffffffff, 0x00040007, 1514 0x3c07c, 0xffffffff, 0x00060005, 1515 0x3c080, 0xffffffff, 0x00090008, 1516 0x3c084, 0xffffffff, 0x00010000, 1517 0x3c088, 0xffffffff, 0x00030002, 1518 0x3c08c, 0xffffffff, 0x00040007, 1519 0x3c090, 0xffffffff, 0x00060005, 1520 0x3c094, 0xffffffff, 0x00090008, 1521 0x3c098, 0xffffffff, 0x00010000, 1522 0x3c09c, 0xffffffff, 0x00030002, 1523 0x3c0a0, 0xffffffff, 0x00040007, 1524 0x3c0a4, 0xffffffff, 0x00060005, 1525 0x3c0a8, 0xffffffff, 0x00090008, 1526 0x3c0ac, 0xffffffff, 0x00010000, 1527 0x3c0b0, 0xffffffff, 0x00030002, 1528 0x3c0b4, 0xffffffff, 0x00040007, 1529 0x3c0b8, 0xffffffff, 0x00060005, 1530 0x3c0bc, 0xffffffff, 0x00090008, 1531 0x3c0c0, 0xffffffff, 0x00010000, 1532 0x3c0c4, 0xffffffff, 0x00030002, 1533 0x3c0c8, 0xffffffff, 0x00040007, 1534 0x3c0cc, 0xffffffff, 0x00060005, 1535 0x3c0d0, 0xffffffff, 0x00090008, 1536 0x3c0d4, 0xffffffff, 0x00010000, 1537 0x3c0d8, 0xffffffff, 0x00030002, 1538 0x3c0dc, 0xffffffff, 0x00040007, 1539 0x3c0e0, 0xffffffff, 0x00060005, 1540 0x3c0e4, 0xffffffff, 0x00090008, 1541 0x3c0e8, 0xffffffff, 0x00010000, 1542 0x3c0ec, 0xffffffff, 0x00030002, 1543 0x3c0f0, 0xffffffff, 0x00040007, 1544 0x3c0f4, 0xffffffff, 0x00060005, 1545 0x3c0f8, 0xffffffff, 0x00090008, 1546 0xc318, 0xffffffff, 0x00020200, 1547 0x3350, 0xffffffff, 0x00000200, 1548 0x15c0, 0xffffffff, 0x00000400, 1549 0x55e8, 0xffffffff, 0x00000000, 1550 0x2f50, 0xffffffff, 0x00000902, 1551 0x3c000, 0xffffffff, 0x96940200, 1552 0x8708, 0xffffffff, 0x00900100, 1553 0xc424, 0xffffffff, 0x0020003f, 1554 0x38, 0xffffffff, 0x0140001c, 1555 0x3c, 0x000f0000, 0x000f0000, 1556 0x220, 0xffffffff, 0xc060000c, 1557 0x224, 0xc0000fff, 0x00000100, 1558 0xf90, 0xffffffff, 0x00000100, 1559 0xf98, 0x00000101, 0x00000000, 1560 0x20a8, 0xffffffff, 0x00000104, 1561 0x55e4, 0xff000fff, 0x00000100, 1562 0x30cc, 0xc0000fff, 0x00000104, 1563 0xc1e4, 0x00000001, 0x00000001, 1564 0xd00c, 0xff000ff0, 0x00000100, 1565 0xd80c, 0xff000ff0, 0x00000100 1566 }; 1567 1568 static const u32 godavari_golden_registers[] = 1569 { 1570 0x55e4, 0xff607fff, 0xfc000100, 1571 0x6ed8, 0x00010101, 0x00010000, 1572 0x9830, 0xffffffff, 0x00000000, 1573 0x98302, 0xf00fffff, 0x00000400, 1574 0x6130, 0xffffffff, 0x00010000, 1575 0x5bb0, 0x000000f0, 0x00000070, 1576 0x5bc0, 0xf0311fff, 0x80300000, 1577 0x98f8, 0x73773777, 0x12010001, 1578 0x98fc, 0xffffffff, 0x00000010, 1579 0x8030, 0x00001f0f, 0x0000100a, 1580 0x2f48, 0x73773777, 0x12010001, 1581 0x2408, 0x000fffff, 0x000c007f, 1582 0x8a14, 0xf000003f, 0x00000007, 1583 0x8b24, 0xffffffff, 0x00ff0fff, 1584 0x30a04, 0x0000ff0f, 0x00000000, 1585 0x28a4c, 0x07ffffff, 0x06000000, 1586 0x4d8, 0x00000fff, 0x00000100, 1587 0xd014, 0x00010000, 0x00810001, 1588 0xd814, 0x00010000, 0x00810001, 1589 0x3e78, 0x00000001, 0x00000002, 1590 0xc768, 0x00000008, 0x00000008, 1591 0xc770, 0x00000f00, 0x00000800, 1592 0xc774, 0x00000f00, 0x00000800, 1593 0xc798, 0x00ffffff, 0x00ff7fbf, 1594 0xc79c, 0x00ffffff, 0x00ff7faf, 1595 0x8c00, 0x000000ff, 0x00000001, 1596 0x214f8, 0x01ff01ff, 0x00000002, 1597 0x21498, 0x007ff800, 0x00200000, 1598 0x2015c, 0xffffffff, 0x00000f40, 1599 0x88c4, 0x001f3ae3, 0x00000082, 1600 0x88d4, 0x0000001f, 0x00000010, 1601 0x30934, 0xffffffff, 0x00000000 1602 }; 1603 1604 1605 static void cik_init_golden_registers(struct radeon_device *rdev) 1606 { 1607 /* Some of the registers might be dependent on GRBM_GFX_INDEX */ 1608 mutex_lock(&rdev->grbm_idx_mutex); 1609 switch (rdev->family) { 1610 case CHIP_BONAIRE: 1611 radeon_program_register_sequence(rdev, 1612 bonaire_mgcg_cgcg_init, 1613 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 1614 radeon_program_register_sequence(rdev, 1615 bonaire_golden_registers, 1616 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 1617 radeon_program_register_sequence(rdev, 1618 bonaire_golden_common_registers, 1619 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 1620 radeon_program_register_sequence(rdev, 1621 bonaire_golden_spm_registers, 1622 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 1623 break; 1624 case CHIP_KABINI: 1625 radeon_program_register_sequence(rdev, 1626 kalindi_mgcg_cgcg_init, 1627 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1628 radeon_program_register_sequence(rdev, 1629 kalindi_golden_registers, 1630 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 1631 radeon_program_register_sequence(rdev, 1632 kalindi_golden_common_registers, 1633 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1634 radeon_program_register_sequence(rdev, 1635 kalindi_golden_spm_registers, 1636 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1637 break; 1638 case CHIP_MULLINS: 1639 radeon_program_register_sequence(rdev, 1640 kalindi_mgcg_cgcg_init, 1641 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1642 radeon_program_register_sequence(rdev, 1643 godavari_golden_registers, 1644 (const u32)ARRAY_SIZE(godavari_golden_registers)); 1645 radeon_program_register_sequence(rdev, 1646 kalindi_golden_common_registers, 1647 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1648 radeon_program_register_sequence(rdev, 1649 kalindi_golden_spm_registers, 1650 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1651 break; 1652 case CHIP_KAVERI: 1653 radeon_program_register_sequence(rdev, 1654 spectre_mgcg_cgcg_init, 1655 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 1656 radeon_program_register_sequence(rdev, 1657 spectre_golden_registers, 1658 (const u32)ARRAY_SIZE(spectre_golden_registers)); 1659 radeon_program_register_sequence(rdev, 1660 spectre_golden_common_registers, 1661 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 1662 radeon_program_register_sequence(rdev, 1663 spectre_golden_spm_registers, 1664 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 1665 break; 1666 case CHIP_HAWAII: 1667 radeon_program_register_sequence(rdev, 1668 hawaii_mgcg_cgcg_init, 1669 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init)); 1670 radeon_program_register_sequence(rdev, 1671 hawaii_golden_registers, 1672 (const u32)ARRAY_SIZE(hawaii_golden_registers)); 1673 radeon_program_register_sequence(rdev, 1674 hawaii_golden_common_registers, 1675 (const u32)ARRAY_SIZE(hawaii_golden_common_registers)); 1676 radeon_program_register_sequence(rdev, 1677 hawaii_golden_spm_registers, 1678 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers)); 1679 break; 1680 default: 1681 break; 1682 } 1683 mutex_unlock(&rdev->grbm_idx_mutex); 1684 } 1685 1686 /** 1687 * cik_get_xclk - get the xclk 1688 * 1689 * @rdev: radeon_device pointer 1690 * 1691 * Returns the reference clock used by the gfx engine 1692 * (CIK). 1693 */ 1694 u32 cik_get_xclk(struct radeon_device *rdev) 1695 { 1696 u32 reference_clock = rdev->clock.spll.reference_freq; 1697 1698 if (rdev->flags & RADEON_IS_IGP) { 1699 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 1700 return reference_clock / 2; 1701 } else { 1702 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 1703 return reference_clock / 4; 1704 } 1705 return reference_clock; 1706 } 1707 1708 /** 1709 * cik_mm_rdoorbell - read a doorbell dword 1710 * 1711 * @rdev: radeon_device pointer 1712 * @index: doorbell index 1713 * 1714 * Returns the value in the doorbell aperture at the 1715 * requested doorbell index (CIK). 1716 */ 1717 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index) 1718 { 1719 if (index < rdev->doorbell.num_doorbells) { 1720 return readl(rdev->doorbell.ptr + index); 1721 } else { 1722 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 1723 return 0; 1724 } 1725 } 1726 1727 /** 1728 * cik_mm_wdoorbell - write a doorbell dword 1729 * 1730 * @rdev: radeon_device pointer 1731 * @index: doorbell index 1732 * @v: value to write 1733 * 1734 * Writes @v to the doorbell aperture at the 1735 * requested doorbell index (CIK). 1736 */ 1737 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v) 1738 { 1739 if (index < rdev->doorbell.num_doorbells) { 1740 writel(v, rdev->doorbell.ptr + index); 1741 } else { 1742 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 1743 } 1744 } 1745 1746 #define BONAIRE_IO_MC_REGS_SIZE 36 1747 1748 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 1749 { 1750 {0x00000070, 0x04400000}, 1751 {0x00000071, 0x80c01803}, 1752 {0x00000072, 0x00004004}, 1753 {0x00000073, 0x00000100}, 1754 {0x00000074, 0x00ff0000}, 1755 {0x00000075, 0x34000000}, 1756 {0x00000076, 0x08000014}, 1757 {0x00000077, 0x00cc08ec}, 1758 {0x00000078, 0x00000400}, 1759 {0x00000079, 0x00000000}, 1760 {0x0000007a, 0x04090000}, 1761 {0x0000007c, 0x00000000}, 1762 {0x0000007e, 0x4408a8e8}, 1763 {0x0000007f, 0x00000304}, 1764 {0x00000080, 0x00000000}, 1765 {0x00000082, 0x00000001}, 1766 {0x00000083, 0x00000002}, 1767 {0x00000084, 0xf3e4f400}, 1768 {0x00000085, 0x052024e3}, 1769 {0x00000087, 0x00000000}, 1770 {0x00000088, 0x01000000}, 1771 {0x0000008a, 0x1c0a0000}, 1772 {0x0000008b, 0xff010000}, 1773 {0x0000008d, 0xffffefff}, 1774 {0x0000008e, 0xfff3efff}, 1775 {0x0000008f, 0xfff3efbf}, 1776 {0x00000092, 0xf7ffffff}, 1777 {0x00000093, 0xffffff7f}, 1778 {0x00000095, 0x00101101}, 1779 {0x00000096, 0x00000fff}, 1780 {0x00000097, 0x00116fff}, 1781 {0x00000098, 0x60010000}, 1782 {0x00000099, 0x10010000}, 1783 {0x0000009a, 0x00006000}, 1784 {0x0000009b, 0x00001000}, 1785 {0x0000009f, 0x00b48000} 1786 }; 1787 1788 #define HAWAII_IO_MC_REGS_SIZE 22 1789 1790 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] = 1791 { 1792 {0x0000007d, 0x40000000}, 1793 {0x0000007e, 0x40180304}, 1794 {0x0000007f, 0x0000ff00}, 1795 {0x00000081, 0x00000000}, 1796 {0x00000083, 0x00000800}, 1797 {0x00000086, 0x00000000}, 1798 {0x00000087, 0x00000100}, 1799 {0x00000088, 0x00020100}, 1800 {0x00000089, 0x00000000}, 1801 {0x0000008b, 0x00040000}, 1802 {0x0000008c, 0x00000100}, 1803 {0x0000008e, 0xff010000}, 1804 {0x00000090, 0xffffefff}, 1805 {0x00000091, 0xfff3efff}, 1806 {0x00000092, 0xfff3efbf}, 1807 {0x00000093, 0xf7ffffff}, 1808 {0x00000094, 0xffffff7f}, 1809 {0x00000095, 0x00000fff}, 1810 {0x00000096, 0x00116fff}, 1811 {0x00000097, 0x60010000}, 1812 {0x00000098, 0x10010000}, 1813 {0x0000009f, 0x00c79000} 1814 }; 1815 1816 1817 /** 1818 * cik_srbm_select - select specific register instances 1819 * 1820 * @rdev: radeon_device pointer 1821 * @me: selected ME (micro engine) 1822 * @pipe: pipe 1823 * @queue: queue 1824 * @vmid: VMID 1825 * 1826 * Switches the currently active registers instances. Some 1827 * registers are instanced per VMID, others are instanced per 1828 * me/pipe/queue combination. 1829 */ 1830 static void cik_srbm_select(struct radeon_device *rdev, 1831 u32 me, u32 pipe, u32 queue, u32 vmid) 1832 { 1833 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 1834 MEID(me & 0x3) | 1835 VMID(vmid & 0xf) | 1836 QUEUEID(queue & 0x7)); 1837 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 1838 } 1839 1840 /* ucode loading */ 1841 /** 1842 * ci_mc_load_microcode - load MC ucode into the hw 1843 * 1844 * @rdev: radeon_device pointer 1845 * 1846 * Load the GDDR MC ucode into the hw (CIK). 1847 * Returns 0 on success, error on failure. 1848 */ 1849 int ci_mc_load_microcode(struct radeon_device *rdev) 1850 { 1851 const __be32 *fw_data = NULL; 1852 const __le32 *new_fw_data = NULL; 1853 u32 running, tmp; 1854 u32 *io_mc_regs = NULL; 1855 const __le32 *new_io_mc_regs = NULL; 1856 int i, regs_size, ucode_size; 1857 1858 if (!rdev->mc_fw) 1859 return -EINVAL; 1860 1861 if (rdev->new_fw) { 1862 const struct mc_firmware_header_v1_0 *hdr = 1863 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data; 1864 1865 radeon_ucode_print_mc_hdr(&hdr->header); 1866 1867 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); 1868 new_io_mc_regs = (const __le32 *) 1869 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); 1870 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1871 new_fw_data = (const __le32 *) 1872 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1873 } else { 1874 ucode_size = rdev->mc_fw->datasize / 4; 1875 1876 switch (rdev->family) { 1877 case CHIP_BONAIRE: 1878 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 1879 regs_size = BONAIRE_IO_MC_REGS_SIZE; 1880 break; 1881 case CHIP_HAWAII: 1882 io_mc_regs = (u32 *)&hawaii_io_mc_regs; 1883 regs_size = HAWAII_IO_MC_REGS_SIZE; 1884 break; 1885 default: 1886 return -EINVAL; 1887 } 1888 fw_data = (const __be32 *)rdev->mc_fw->data; 1889 } 1890 1891 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 1892 1893 if (running == 0) { 1894 /* reset the engine and set to writable */ 1895 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1896 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 1897 1898 /* load mc io regs */ 1899 for (i = 0; i < regs_size; i++) { 1900 if (rdev->new_fw) { 1901 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++)); 1902 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++)); 1903 } else { 1904 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 1905 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 1906 } 1907 } 1908 1909 tmp = RREG32(MC_SEQ_MISC0); 1910 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) { 1911 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5); 1912 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023); 1913 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9); 1914 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0); 1915 } 1916 1917 /* load the MC ucode */ 1918 for (i = 0; i < ucode_size; i++) { 1919 if (rdev->new_fw) 1920 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++)); 1921 else 1922 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 1923 } 1924 1925 /* put the engine back into the active state */ 1926 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1927 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 1928 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 1929 1930 /* wait for training to complete */ 1931 for (i = 0; i < rdev->usec_timeout; i++) { 1932 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 1933 break; 1934 udelay(1); 1935 } 1936 for (i = 0; i < rdev->usec_timeout; i++) { 1937 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 1938 break; 1939 udelay(1); 1940 } 1941 } 1942 1943 return 0; 1944 } 1945 1946 /** 1947 * cik_init_microcode - load ucode images from disk 1948 * 1949 * @rdev: radeon_device pointer 1950 * 1951 * Use the firmware interface to load the ucode images into 1952 * the driver (not loaded into hw). 1953 * Returns 0 on success, error on failure. 1954 */ 1955 static int cik_init_microcode(struct radeon_device *rdev) 1956 { 1957 const char *chip_name; 1958 const char *new_chip_name; 1959 size_t pfp_req_size, me_req_size, ce_req_size, 1960 mec_req_size, rlc_req_size, mc_req_size = 0, 1961 sdma_req_size, smc_req_size = 0, mc2_req_size = 0; 1962 char fw_name[30]; 1963 int new_fw = 0; 1964 int err; 1965 int num_fw; 1966 bool new_smc = false; 1967 1968 DRM_DEBUG("\n"); 1969 1970 switch (rdev->family) { 1971 case CHIP_BONAIRE: 1972 chip_name = "BONAIRE"; 1973 if ((rdev->pdev->revision == 0x80) || 1974 (rdev->pdev->revision == 0x81) || 1975 (rdev->pdev->device == 0x665f)) 1976 new_smc = true; 1977 new_chip_name = "bonaire"; 1978 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1979 me_req_size = CIK_ME_UCODE_SIZE * 4; 1980 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1981 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1982 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1983 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4; 1984 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4; 1985 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1986 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); 1987 num_fw = 8; 1988 break; 1989 case CHIP_HAWAII: 1990 chip_name = "HAWAII"; 1991 if (rdev->pdev->revision == 0x80) 1992 new_smc = true; 1993 new_chip_name = "hawaii"; 1994 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1995 me_req_size = CIK_ME_UCODE_SIZE * 4; 1996 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1997 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1998 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1999 mc_req_size = HAWAII_MC_UCODE_SIZE * 4; 2000 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4; 2001 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 2002 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4); 2003 num_fw = 8; 2004 break; 2005 case CHIP_KAVERI: 2006 chip_name = "KAVERI"; 2007 new_chip_name = "kaveri"; 2008 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 2009 me_req_size = CIK_ME_UCODE_SIZE * 4; 2010 ce_req_size = CIK_CE_UCODE_SIZE * 4; 2011 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 2012 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 2013 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 2014 num_fw = 7; 2015 break; 2016 case CHIP_KABINI: 2017 chip_name = "KABINI"; 2018 new_chip_name = "kabini"; 2019 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 2020 me_req_size = CIK_ME_UCODE_SIZE * 4; 2021 ce_req_size = CIK_CE_UCODE_SIZE * 4; 2022 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 2023 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 2024 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 2025 num_fw = 6; 2026 break; 2027 case CHIP_MULLINS: 2028 chip_name = "MULLINS"; 2029 new_chip_name = "mullins"; 2030 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 2031 me_req_size = CIK_ME_UCODE_SIZE * 4; 2032 ce_req_size = CIK_CE_UCODE_SIZE * 4; 2033 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 2034 rlc_req_size = ML_RLC_UCODE_SIZE * 4; 2035 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 2036 num_fw = 6; 2037 break; 2038 default: BUG(); 2039 } 2040 2041 DRM_INFO("Loading %s Microcode\n", new_chip_name); 2042 2043 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name); 2044 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 2045 if (err) { 2046 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name); 2047 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 2048 if (err) 2049 goto out; 2050 if (rdev->pfp_fw->datasize != pfp_req_size) { 2051 printk(KERN_ERR 2052 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 2053 rdev->pfp_fw->datasize, fw_name); 2054 err = -EINVAL; 2055 goto out; 2056 } 2057 } else { 2058 err = radeon_ucode_validate(rdev->pfp_fw); 2059 if (err) { 2060 printk(KERN_ERR 2061 "cik_fw: validation failed for firmware \"%s\"\n", 2062 fw_name); 2063 goto out; 2064 } else { 2065 new_fw++; 2066 } 2067 } 2068 2069 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name); 2070 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 2071 if (err) { 2072 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name); 2073 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 2074 if (err) 2075 goto out; 2076 if (rdev->me_fw->datasize != me_req_size) { 2077 printk(KERN_ERR 2078 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 2079 rdev->me_fw->datasize, fw_name); 2080 err = -EINVAL; 2081 } 2082 } else { 2083 err = radeon_ucode_validate(rdev->me_fw); 2084 if (err) { 2085 printk(KERN_ERR 2086 "cik_fw: validation failed for firmware \"%s\"\n", 2087 fw_name); 2088 goto out; 2089 } else { 2090 new_fw++; 2091 } 2092 } 2093 2094 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name); 2095 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 2096 if (err) { 2097 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name); 2098 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 2099 if (err) 2100 goto out; 2101 if (rdev->ce_fw->datasize != ce_req_size) { 2102 printk(KERN_ERR 2103 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 2104 rdev->ce_fw->datasize, fw_name); 2105 err = -EINVAL; 2106 } 2107 } else { 2108 err = radeon_ucode_validate(rdev->ce_fw); 2109 if (err) { 2110 printk(KERN_ERR 2111 "cik_fw: validation failed for firmware \"%s\"\n", 2112 fw_name); 2113 goto out; 2114 } else { 2115 new_fw++; 2116 } 2117 } 2118 2119 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name); 2120 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 2121 if (err) { 2122 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name); 2123 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 2124 if (err) 2125 goto out; 2126 if (rdev->mec_fw->datasize != mec_req_size) { 2127 printk(KERN_ERR 2128 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 2129 rdev->mec_fw->datasize, fw_name); 2130 err = -EINVAL; 2131 } 2132 } else { 2133 err = radeon_ucode_validate(rdev->mec_fw); 2134 if (err) { 2135 printk(KERN_ERR 2136 "cik_fw: validation failed for firmware \"%s\"\n", 2137 fw_name); 2138 goto out; 2139 } else { 2140 new_fw++; 2141 } 2142 } 2143 2144 if (rdev->family == CHIP_KAVERI) { 2145 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name); 2146 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev); 2147 if (err) { 2148 goto out; 2149 } else { 2150 err = radeon_ucode_validate(rdev->mec2_fw); 2151 if (err) { 2152 goto out; 2153 } else { 2154 new_fw++; 2155 } 2156 } 2157 } 2158 2159 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name); 2160 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 2161 if (err) { 2162 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name); 2163 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 2164 if (err) 2165 goto out; 2166 if (rdev->rlc_fw->datasize != rlc_req_size) { 2167 printk(KERN_ERR 2168 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 2169 rdev->rlc_fw->datasize, fw_name); 2170 err = -EINVAL; 2171 } 2172 } else { 2173 err = radeon_ucode_validate(rdev->rlc_fw); 2174 if (err) { 2175 printk(KERN_ERR 2176 "cik_fw: validation failed for firmware \"%s\"\n", 2177 fw_name); 2178 goto out; 2179 } else { 2180 new_fw++; 2181 } 2182 } 2183 2184 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name); 2185 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 2186 if (err) { 2187 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name); 2188 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 2189 if (err) 2190 goto out; 2191 if (rdev->sdma_fw->datasize != sdma_req_size) { 2192 printk(KERN_ERR 2193 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 2194 rdev->sdma_fw->datasize, fw_name); 2195 err = -EINVAL; 2196 } 2197 } else { 2198 err = radeon_ucode_validate(rdev->sdma_fw); 2199 if (err) { 2200 printk(KERN_ERR 2201 "cik_fw: validation failed for firmware \"%s\"\n", 2202 fw_name); 2203 goto out; 2204 } else { 2205 new_fw++; 2206 } 2207 } 2208 2209 /* No SMC, MC ucode on APUs */ 2210 if (!(rdev->flags & RADEON_IS_IGP)) { 2211 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name); 2212 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 2213 if (err) { 2214 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name); 2215 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 2216 if (err) { 2217 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name); 2218 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 2219 if (err) 2220 goto out; 2221 } 2222 if ((rdev->mc_fw->datasize != mc_req_size) && 2223 (rdev->mc_fw->datasize != mc2_req_size)){ 2224 printk(KERN_ERR 2225 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 2226 rdev->mc_fw->datasize, fw_name); 2227 err = -EINVAL; 2228 } 2229 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize); 2230 } else { 2231 err = radeon_ucode_validate(rdev->mc_fw); 2232 if (err) { 2233 printk(KERN_ERR 2234 "cik_fw: validation failed for firmware \"%s\"\n", 2235 fw_name); 2236 goto out; 2237 } else { 2238 new_fw++; 2239 } 2240 } 2241 2242 if (new_smc) 2243 ksnprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_k_smc", new_chip_name); 2244 else 2245 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name); 2246 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 2247 if (err) { 2248 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name); 2249 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 2250 if (err) { 2251 printk(KERN_ERR 2252 "smc: error loading firmware \"%s\"\n", 2253 fw_name); 2254 release_firmware(rdev->smc_fw); 2255 rdev->smc_fw = NULL; 2256 err = 0; 2257 } else if (rdev->smc_fw->datasize != smc_req_size) { 2258 printk(KERN_ERR 2259 "cik_smc: Bogus length %zu in firmware \"%s\"\n", 2260 rdev->smc_fw->datasize, fw_name); 2261 err = -EINVAL; 2262 } 2263 } else { 2264 err = radeon_ucode_validate(rdev->smc_fw); 2265 if (err) { 2266 printk(KERN_ERR 2267 "cik_fw: validation failed for firmware \"%s\"\n", 2268 fw_name); 2269 goto out; 2270 } else { 2271 new_fw++; 2272 } 2273 } 2274 } 2275 2276 if (new_fw == 0) { 2277 rdev->new_fw = false; 2278 } else if (new_fw < num_fw) { 2279 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n"); 2280 err = -EINVAL; 2281 } else { 2282 rdev->new_fw = true; 2283 } 2284 2285 out: 2286 if (err) { 2287 if (err != -EINVAL) 2288 printk(KERN_ERR 2289 "cik_cp: Failed to load firmware \"%s\"\n", 2290 fw_name); 2291 release_firmware(rdev->pfp_fw); 2292 rdev->pfp_fw = NULL; 2293 release_firmware(rdev->me_fw); 2294 rdev->me_fw = NULL; 2295 release_firmware(rdev->ce_fw); 2296 rdev->ce_fw = NULL; 2297 release_firmware(rdev->mec_fw); 2298 rdev->mec_fw = NULL; 2299 release_firmware(rdev->mec2_fw); 2300 rdev->mec2_fw = NULL; 2301 release_firmware(rdev->rlc_fw); 2302 rdev->rlc_fw = NULL; 2303 release_firmware(rdev->sdma_fw); 2304 rdev->sdma_fw = NULL; 2305 release_firmware(rdev->mc_fw); 2306 rdev->mc_fw = NULL; 2307 release_firmware(rdev->smc_fw); 2308 rdev->smc_fw = NULL; 2309 } 2310 return err; 2311 } 2312 2313 /** 2314 * cik_fini_microcode - drop the firmwares image references 2315 * 2316 * @rdev: radeon_device pointer 2317 * 2318 * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references. 2319 * Called at driver shutdown. 2320 */ 2321 static void cik_fini_microcode(struct radeon_device *rdev) 2322 { 2323 release_firmware(rdev->pfp_fw); 2324 rdev->pfp_fw = NULL; 2325 release_firmware(rdev->me_fw); 2326 rdev->me_fw = NULL; 2327 release_firmware(rdev->ce_fw); 2328 rdev->ce_fw = NULL; 2329 release_firmware(rdev->mec_fw); 2330 rdev->mec_fw = NULL; 2331 release_firmware(rdev->mec2_fw); 2332 rdev->mec2_fw = NULL; 2333 release_firmware(rdev->rlc_fw); 2334 rdev->rlc_fw = NULL; 2335 release_firmware(rdev->sdma_fw); 2336 rdev->sdma_fw = NULL; 2337 release_firmware(rdev->mc_fw); 2338 rdev->mc_fw = NULL; 2339 release_firmware(rdev->smc_fw); 2340 rdev->smc_fw = NULL; 2341 } 2342 2343 /* 2344 * Core functions 2345 */ 2346 /** 2347 * cik_tiling_mode_table_init - init the hw tiling table 2348 * 2349 * @rdev: radeon_device pointer 2350 * 2351 * Starting with SI, the tiling setup is done globally in a 2352 * set of 32 tiling modes. Rather than selecting each set of 2353 * parameters per surface as on older asics, we just select 2354 * which index in the tiling table we want to use, and the 2355 * surface uses those parameters (CIK). 2356 */ 2357 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 2358 { 2359 u32 *tile = rdev->config.cik.tile_mode_array; 2360 u32 *macrotile = rdev->config.cik.macrotile_mode_array; 2361 const u32 num_tile_mode_states = 2362 ARRAY_SIZE(rdev->config.cik.tile_mode_array); 2363 const u32 num_secondary_tile_mode_states = 2364 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array); 2365 u32 reg_offset, split_equal_to_row_size; 2366 u32 num_pipe_configs; 2367 u32 num_rbs = rdev->config.cik.max_backends_per_se * 2368 rdev->config.cik.max_shader_engines; 2369 2370 switch (rdev->config.cik.mem_row_size_in_kb) { 2371 case 1: 2372 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 2373 break; 2374 case 2: 2375 default: 2376 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 2377 break; 2378 case 4: 2379 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 2380 break; 2381 } 2382 2383 num_pipe_configs = rdev->config.cik.max_tile_pipes; 2384 if (num_pipe_configs > 8) 2385 num_pipe_configs = 16; 2386 2387 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2388 tile[reg_offset] = 0; 2389 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2390 macrotile[reg_offset] = 0; 2391 2392 switch(num_pipe_configs) { 2393 case 16: 2394 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2395 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2396 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2398 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2399 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2400 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2402 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2403 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2405 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2406 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2410 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2413 TILE_SPLIT(split_equal_to_row_size)); 2414 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2415 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2417 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2421 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2424 TILE_SPLIT(split_equal_to_row_size)); 2425 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2426 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2427 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2430 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2434 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2435 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) | 2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2438 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2442 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2444 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2445 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2449 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) | 2452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2453 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2457 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2458 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2459 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2460 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2464 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2466 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) | 2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2468 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2470 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2472 2473 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2476 NUM_BANKS(ADDR_SURF_16_BANK)); 2477 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2480 NUM_BANKS(ADDR_SURF_16_BANK)); 2481 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2484 NUM_BANKS(ADDR_SURF_16_BANK)); 2485 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2488 NUM_BANKS(ADDR_SURF_16_BANK)); 2489 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2492 NUM_BANKS(ADDR_SURF_8_BANK)); 2493 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2496 NUM_BANKS(ADDR_SURF_4_BANK)); 2497 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2500 NUM_BANKS(ADDR_SURF_2_BANK)); 2501 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2504 NUM_BANKS(ADDR_SURF_16_BANK)); 2505 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2508 NUM_BANKS(ADDR_SURF_16_BANK)); 2509 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2512 NUM_BANKS(ADDR_SURF_16_BANK)); 2513 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2516 NUM_BANKS(ADDR_SURF_8_BANK)); 2517 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2520 NUM_BANKS(ADDR_SURF_4_BANK)); 2521 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2524 NUM_BANKS(ADDR_SURF_2_BANK)); 2525 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2528 NUM_BANKS(ADDR_SURF_2_BANK)); 2529 2530 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2531 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]); 2532 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2533 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]); 2534 break; 2535 2536 case 8: 2537 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2539 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2540 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2541 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2543 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2544 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2545 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2548 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2549 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2552 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2553 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2556 TILE_SPLIT(split_equal_to_row_size)); 2557 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2559 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2560 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2563 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2564 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2565 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2567 TILE_SPLIT(split_equal_to_row_size)); 2568 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2569 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2570 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2571 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2572 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2573 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2577 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2578 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2581 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2582 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2585 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2586 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2588 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2592 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2596 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2598 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2600 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2601 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2602 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2603 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2604 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2607 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2608 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2611 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2612 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2615 2616 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2617 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2618 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2619 NUM_BANKS(ADDR_SURF_16_BANK)); 2620 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2623 NUM_BANKS(ADDR_SURF_16_BANK)); 2624 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2625 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2626 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2627 NUM_BANKS(ADDR_SURF_16_BANK)); 2628 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2629 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2630 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2631 NUM_BANKS(ADDR_SURF_16_BANK)); 2632 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2635 NUM_BANKS(ADDR_SURF_8_BANK)); 2636 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2639 NUM_BANKS(ADDR_SURF_4_BANK)); 2640 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2643 NUM_BANKS(ADDR_SURF_2_BANK)); 2644 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2647 NUM_BANKS(ADDR_SURF_16_BANK)); 2648 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2651 NUM_BANKS(ADDR_SURF_16_BANK)); 2652 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2655 NUM_BANKS(ADDR_SURF_16_BANK)); 2656 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2659 NUM_BANKS(ADDR_SURF_16_BANK)); 2660 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2663 NUM_BANKS(ADDR_SURF_8_BANK)); 2664 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2667 NUM_BANKS(ADDR_SURF_4_BANK)); 2668 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2671 NUM_BANKS(ADDR_SURF_2_BANK)); 2672 2673 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2674 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]); 2675 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2676 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]); 2677 break; 2678 2679 case 4: 2680 if (num_rbs == 4) { 2681 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2682 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2684 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2685 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2688 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2689 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2692 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2693 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2696 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2697 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2700 TILE_SPLIT(split_equal_to_row_size)); 2701 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2702 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2704 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2705 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2706 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2707 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2708 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2711 TILE_SPLIT(split_equal_to_row_size)); 2712 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2713 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2714 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2716 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2717 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2718 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2721 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2723 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2725 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2726 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2729 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2730 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2732 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2733 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2734 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2736 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2737 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2738 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2740 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2741 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2744 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2745 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2746 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2747 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2748 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2749 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2751 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2752 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2753 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2755 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2756 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2757 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2759 2760 } else if (num_rbs < 4) { 2761 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2762 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2763 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2764 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2765 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2766 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2767 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2768 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2769 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2770 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2771 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2772 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2773 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2775 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2776 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2777 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2779 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2780 TILE_SPLIT(split_equal_to_row_size)); 2781 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2782 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2784 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2786 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2788 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2789 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2790 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2791 TILE_SPLIT(split_equal_to_row_size)); 2792 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2793 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 2794 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2795 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2796 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2797 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2798 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2799 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2801 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2802 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2803 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2805 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2806 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2807 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2809 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2810 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2811 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2812 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2814 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2816 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2818 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2820 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2821 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2822 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2824 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2825 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2826 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2827 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2829 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2831 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2833 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2835 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2837 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2839 } 2840 2841 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2844 NUM_BANKS(ADDR_SURF_16_BANK)); 2845 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2848 NUM_BANKS(ADDR_SURF_16_BANK)); 2849 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2852 NUM_BANKS(ADDR_SURF_16_BANK)); 2853 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2854 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2855 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2856 NUM_BANKS(ADDR_SURF_16_BANK)); 2857 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2860 NUM_BANKS(ADDR_SURF_16_BANK)); 2861 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2864 NUM_BANKS(ADDR_SURF_8_BANK)); 2865 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2868 NUM_BANKS(ADDR_SURF_4_BANK)); 2869 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2872 NUM_BANKS(ADDR_SURF_16_BANK)); 2873 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2876 NUM_BANKS(ADDR_SURF_16_BANK)); 2877 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2880 NUM_BANKS(ADDR_SURF_16_BANK)); 2881 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2884 NUM_BANKS(ADDR_SURF_16_BANK)); 2885 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2888 NUM_BANKS(ADDR_SURF_16_BANK)); 2889 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2892 NUM_BANKS(ADDR_SURF_8_BANK)); 2893 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2894 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2895 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2896 NUM_BANKS(ADDR_SURF_4_BANK)); 2897 2898 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2899 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]); 2900 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2901 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]); 2902 break; 2903 2904 case 2: 2905 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2906 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2907 PIPE_CONFIG(ADDR_SURF_P2) | 2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2909 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2910 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2911 PIPE_CONFIG(ADDR_SURF_P2) | 2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2913 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2914 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2915 PIPE_CONFIG(ADDR_SURF_P2) | 2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2917 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2918 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2919 PIPE_CONFIG(ADDR_SURF_P2) | 2920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2921 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2923 PIPE_CONFIG(ADDR_SURF_P2) | 2924 TILE_SPLIT(split_equal_to_row_size)); 2925 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2926 PIPE_CONFIG(ADDR_SURF_P2) | 2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2928 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2930 PIPE_CONFIG(ADDR_SURF_P2) | 2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2932 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2934 PIPE_CONFIG(ADDR_SURF_P2) | 2935 TILE_SPLIT(split_equal_to_row_size)); 2936 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2937 PIPE_CONFIG(ADDR_SURF_P2); 2938 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2939 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2940 PIPE_CONFIG(ADDR_SURF_P2)); 2941 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2942 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2943 PIPE_CONFIG(ADDR_SURF_P2) | 2944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2945 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2947 PIPE_CONFIG(ADDR_SURF_P2) | 2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2949 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2950 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2951 PIPE_CONFIG(ADDR_SURF_P2) | 2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2953 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2954 PIPE_CONFIG(ADDR_SURF_P2) | 2955 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2956 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2958 PIPE_CONFIG(ADDR_SURF_P2) | 2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2960 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2962 PIPE_CONFIG(ADDR_SURF_P2) | 2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2964 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2966 PIPE_CONFIG(ADDR_SURF_P2) | 2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2968 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2969 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2970 PIPE_CONFIG(ADDR_SURF_P2)); 2971 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2972 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2973 PIPE_CONFIG(ADDR_SURF_P2) | 2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2975 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2976 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2977 PIPE_CONFIG(ADDR_SURF_P2) | 2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2979 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2980 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2981 PIPE_CONFIG(ADDR_SURF_P2) | 2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2983 2984 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2987 NUM_BANKS(ADDR_SURF_16_BANK)); 2988 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2991 NUM_BANKS(ADDR_SURF_16_BANK)); 2992 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2995 NUM_BANKS(ADDR_SURF_16_BANK)); 2996 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2999 NUM_BANKS(ADDR_SURF_16_BANK)); 3000 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3003 NUM_BANKS(ADDR_SURF_16_BANK)); 3004 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3007 NUM_BANKS(ADDR_SURF_16_BANK)); 3008 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3011 NUM_BANKS(ADDR_SURF_8_BANK)); 3012 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3015 NUM_BANKS(ADDR_SURF_16_BANK)); 3016 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3017 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3018 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3019 NUM_BANKS(ADDR_SURF_16_BANK)); 3020 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3023 NUM_BANKS(ADDR_SURF_16_BANK)); 3024 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3027 NUM_BANKS(ADDR_SURF_16_BANK)); 3028 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3031 NUM_BANKS(ADDR_SURF_16_BANK)); 3032 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3035 NUM_BANKS(ADDR_SURF_16_BANK)); 3036 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3039 NUM_BANKS(ADDR_SURF_8_BANK)); 3040 3041 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3042 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]); 3043 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3044 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]); 3045 break; 3046 3047 default: 3048 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 3049 } 3050 } 3051 3052 /** 3053 * cik_select_se_sh - select which SE, SH to address 3054 * 3055 * @rdev: radeon_device pointer 3056 * @se_num: shader engine to address 3057 * @sh_num: sh block to address 3058 * 3059 * Select which SE, SH combinations to address. Certain 3060 * registers are instanced per SE or SH. 0xffffffff means 3061 * broadcast to all SEs or SHs (CIK). 3062 */ 3063 static void cik_select_se_sh(struct radeon_device *rdev, 3064 u32 se_num, u32 sh_num) 3065 { 3066 u32 data = INSTANCE_BROADCAST_WRITES; 3067 3068 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 3069 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 3070 else if (se_num == 0xffffffff) 3071 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 3072 else if (sh_num == 0xffffffff) 3073 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 3074 else 3075 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 3076 WREG32(GRBM_GFX_INDEX, data); 3077 } 3078 3079 /** 3080 * cik_create_bitmask - create a bitmask 3081 * 3082 * @bit_width: length of the mask 3083 * 3084 * create a variable length bit mask (CIK). 3085 * Returns the bitmask. 3086 */ 3087 static u32 cik_create_bitmask(u32 bit_width) 3088 { 3089 u32 i, mask = 0; 3090 3091 for (i = 0; i < bit_width; i++) { 3092 mask <<= 1; 3093 mask |= 1; 3094 } 3095 return mask; 3096 } 3097 3098 /** 3099 * cik_get_rb_disabled - computes the mask of disabled RBs 3100 * 3101 * @rdev: radeon_device pointer 3102 * @max_rb_num: max RBs (render backends) for the asic 3103 * @se_num: number of SEs (shader engines) for the asic 3104 * @sh_per_se: number of SH blocks per SE for the asic 3105 * 3106 * Calculates the bitmask of disabled RBs (CIK). 3107 * Returns the disabled RB bitmask. 3108 */ 3109 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 3110 u32 max_rb_num_per_se, 3111 u32 sh_per_se) 3112 { 3113 u32 data, mask; 3114 3115 data = RREG32(CC_RB_BACKEND_DISABLE); 3116 if (data & 1) 3117 data &= BACKEND_DISABLE_MASK; 3118 else 3119 data = 0; 3120 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 3121 3122 data >>= BACKEND_DISABLE_SHIFT; 3123 3124 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se); 3125 3126 return data & mask; 3127 } 3128 3129 /** 3130 * cik_setup_rb - setup the RBs on the asic 3131 * 3132 * @rdev: radeon_device pointer 3133 * @se_num: number of SEs (shader engines) for the asic 3134 * @sh_per_se: number of SH blocks per SE for the asic 3135 * @max_rb_num: max RBs (render backends) for the asic 3136 * 3137 * Configures per-SE/SH RB registers (CIK). 3138 */ 3139 static void cik_setup_rb(struct radeon_device *rdev, 3140 u32 se_num, u32 sh_per_se, 3141 u32 max_rb_num_per_se) 3142 { 3143 int i, j; 3144 u32 data, mask; 3145 u32 disabled_rbs = 0; 3146 u32 enabled_rbs = 0; 3147 3148 mutex_lock(&rdev->grbm_idx_mutex); 3149 for (i = 0; i < se_num; i++) { 3150 for (j = 0; j < sh_per_se; j++) { 3151 cik_select_se_sh(rdev, i, j); 3152 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se); 3153 if (rdev->family == CHIP_HAWAII) 3154 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); 3155 else 3156 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 3157 } 3158 } 3159 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 3160 mutex_unlock(&rdev->grbm_idx_mutex); 3161 3162 mask = 1; 3163 for (i = 0; i < max_rb_num_per_se * se_num; i++) { 3164 if (!(disabled_rbs & mask)) 3165 enabled_rbs |= mask; 3166 mask <<= 1; 3167 } 3168 3169 rdev->config.cik.backend_enable_mask = enabled_rbs; 3170 3171 mutex_lock(&rdev->grbm_idx_mutex); 3172 for (i = 0; i < se_num; i++) { 3173 cik_select_se_sh(rdev, i, 0xffffffff); 3174 data = 0; 3175 for (j = 0; j < sh_per_se; j++) { 3176 switch (enabled_rbs & 3) { 3177 case 0: 3178 if (j == 0) 3179 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3); 3180 else 3181 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0); 3182 break; 3183 case 1: 3184 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 3185 break; 3186 case 2: 3187 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 3188 break; 3189 case 3: 3190 default: 3191 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 3192 break; 3193 } 3194 enabled_rbs >>= 2; 3195 } 3196 WREG32(PA_SC_RASTER_CONFIG, data); 3197 } 3198 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 3199 mutex_unlock(&rdev->grbm_idx_mutex); 3200 } 3201 3202 /** 3203 * cik_gpu_init - setup the 3D engine 3204 * 3205 * @rdev: radeon_device pointer 3206 * 3207 * Configures the 3D engine and tiling configuration 3208 * registers so that the 3D engine is usable. 3209 */ 3210 static void cik_gpu_init(struct radeon_device *rdev) 3211 { 3212 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 3213 u32 mc_shared_chmap, mc_arb_ramcfg; 3214 u32 hdp_host_path_cntl; 3215 u32 tmp; 3216 int i, j; 3217 3218 switch (rdev->family) { 3219 case CHIP_BONAIRE: 3220 rdev->config.cik.max_shader_engines = 2; 3221 rdev->config.cik.max_tile_pipes = 4; 3222 rdev->config.cik.max_cu_per_sh = 7; 3223 rdev->config.cik.max_sh_per_se = 1; 3224 rdev->config.cik.max_backends_per_se = 2; 3225 rdev->config.cik.max_texture_channel_caches = 4; 3226 rdev->config.cik.max_gprs = 256; 3227 rdev->config.cik.max_gs_threads = 32; 3228 rdev->config.cik.max_hw_contexts = 8; 3229 3230 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3231 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3232 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3233 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3234 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 3235 break; 3236 case CHIP_HAWAII: 3237 rdev->config.cik.max_shader_engines = 4; 3238 rdev->config.cik.max_tile_pipes = 16; 3239 rdev->config.cik.max_cu_per_sh = 11; 3240 rdev->config.cik.max_sh_per_se = 1; 3241 rdev->config.cik.max_backends_per_se = 4; 3242 rdev->config.cik.max_texture_channel_caches = 16; 3243 rdev->config.cik.max_gprs = 256; 3244 rdev->config.cik.max_gs_threads = 32; 3245 rdev->config.cik.max_hw_contexts = 8; 3246 3247 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3248 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3249 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3250 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3251 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN; 3252 break; 3253 case CHIP_KAVERI: 3254 rdev->config.cik.max_shader_engines = 1; 3255 rdev->config.cik.max_tile_pipes = 4; 3256 rdev->config.cik.max_cu_per_sh = 8; 3257 rdev->config.cik.max_backends_per_se = 2; 3258 rdev->config.cik.max_sh_per_se = 1; 3259 rdev->config.cik.max_texture_channel_caches = 4; 3260 rdev->config.cik.max_gprs = 256; 3261 rdev->config.cik.max_gs_threads = 16; 3262 rdev->config.cik.max_hw_contexts = 8; 3263 3264 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3265 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3266 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3267 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3268 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 3269 break; 3270 case CHIP_KABINI: 3271 case CHIP_MULLINS: 3272 default: 3273 rdev->config.cik.max_shader_engines = 1; 3274 rdev->config.cik.max_tile_pipes = 2; 3275 rdev->config.cik.max_cu_per_sh = 2; 3276 rdev->config.cik.max_sh_per_se = 1; 3277 rdev->config.cik.max_backends_per_se = 1; 3278 rdev->config.cik.max_texture_channel_caches = 2; 3279 rdev->config.cik.max_gprs = 256; 3280 rdev->config.cik.max_gs_threads = 16; 3281 rdev->config.cik.max_hw_contexts = 8; 3282 3283 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3284 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3285 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3286 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3287 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 3288 break; 3289 } 3290 3291 /* Initialize HDP */ 3292 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 3293 WREG32((0x2c14 + j), 0x00000000); 3294 WREG32((0x2c18 + j), 0x00000000); 3295 WREG32((0x2c1c + j), 0x00000000); 3296 WREG32((0x2c20 + j), 0x00000000); 3297 WREG32((0x2c24 + j), 0x00000000); 3298 } 3299 3300 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 3301 WREG32(SRBM_INT_CNTL, 0x1); 3302 WREG32(SRBM_INT_ACK, 0x1); 3303 3304 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 3305 3306 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 3307 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 3308 3309 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 3310 rdev->config.cik.mem_max_burst_length_bytes = 256; 3311 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 3312 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 3313 if (rdev->config.cik.mem_row_size_in_kb > 4) 3314 rdev->config.cik.mem_row_size_in_kb = 4; 3315 /* XXX use MC settings? */ 3316 rdev->config.cik.shader_engine_tile_size = 32; 3317 rdev->config.cik.num_gpus = 1; 3318 rdev->config.cik.multi_gpu_tile_size = 64; 3319 3320 /* fix up row size */ 3321 gb_addr_config &= ~ROW_SIZE_MASK; 3322 switch (rdev->config.cik.mem_row_size_in_kb) { 3323 case 1: 3324 default: 3325 gb_addr_config |= ROW_SIZE(0); 3326 break; 3327 case 2: 3328 gb_addr_config |= ROW_SIZE(1); 3329 break; 3330 case 4: 3331 gb_addr_config |= ROW_SIZE(2); 3332 break; 3333 } 3334 3335 /* setup tiling info dword. gb_addr_config is not adequate since it does 3336 * not have bank info, so create a custom tiling dword. 3337 * bits 3:0 num_pipes 3338 * bits 7:4 num_banks 3339 * bits 11:8 group_size 3340 * bits 15:12 row_size 3341 */ 3342 rdev->config.cik.tile_config = 0; 3343 switch (rdev->config.cik.num_tile_pipes) { 3344 case 1: 3345 rdev->config.cik.tile_config |= (0 << 0); 3346 break; 3347 case 2: 3348 rdev->config.cik.tile_config |= (1 << 0); 3349 break; 3350 case 4: 3351 rdev->config.cik.tile_config |= (2 << 0); 3352 break; 3353 case 8: 3354 default: 3355 /* XXX what about 12? */ 3356 rdev->config.cik.tile_config |= (3 << 0); 3357 break; 3358 } 3359 rdev->config.cik.tile_config |= 3360 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 3361 rdev->config.cik.tile_config |= 3362 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 3363 rdev->config.cik.tile_config |= 3364 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 3365 3366 WREG32(GB_ADDR_CONFIG, gb_addr_config); 3367 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 3368 WREG32(DMIF_ADDR_CALC, gb_addr_config); 3369 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 3370 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 3371 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 3372 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 3373 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 3374 3375 cik_tiling_mode_table_init(rdev); 3376 3377 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 3378 rdev->config.cik.max_sh_per_se, 3379 rdev->config.cik.max_backends_per_se); 3380 3381 rdev->config.cik.active_cus = 0; 3382 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 3383 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 3384 rdev->config.cik.active_cus += 3385 hweight32(cik_get_cu_active_bitmap(rdev, i, j)); 3386 } 3387 } 3388 3389 /* set HW defaults for 3D engine */ 3390 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 3391 3392 mutex_lock(&rdev->grbm_idx_mutex); 3393 /* 3394 * making sure that the following register writes will be broadcasted 3395 * to all the shaders 3396 */ 3397 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 3398 WREG32(SX_DEBUG_1, 0x20); 3399 3400 WREG32(TA_CNTL_AUX, 0x00010000); 3401 3402 tmp = RREG32(SPI_CONFIG_CNTL); 3403 tmp |= 0x03000000; 3404 WREG32(SPI_CONFIG_CNTL, tmp); 3405 3406 WREG32(SQ_CONFIG, 1); 3407 3408 WREG32(DB_DEBUG, 0); 3409 3410 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 3411 tmp |= 0x00000400; 3412 WREG32(DB_DEBUG2, tmp); 3413 3414 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 3415 tmp |= 0x00020200; 3416 WREG32(DB_DEBUG3, tmp); 3417 3418 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 3419 tmp |= 0x00018208; 3420 WREG32(CB_HW_CONTROL, tmp); 3421 3422 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 3423 3424 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 3425 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 3426 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 3427 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 3428 3429 WREG32(VGT_NUM_INSTANCES, 1); 3430 3431 WREG32(CP_PERFMON_CNTL, 0); 3432 3433 WREG32(SQ_CONFIG, 0); 3434 3435 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 3436 FORCE_EOV_MAX_REZ_CNT(255))); 3437 3438 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 3439 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 3440 3441 WREG32(VGT_GS_VERTEX_REUSE, 16); 3442 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 3443 3444 tmp = RREG32(HDP_MISC_CNTL); 3445 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 3446 WREG32(HDP_MISC_CNTL, tmp); 3447 3448 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 3449 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 3450 3451 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 3452 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 3453 mutex_unlock(&rdev->grbm_idx_mutex); 3454 3455 udelay(50); 3456 } 3457 3458 /* 3459 * GPU scratch registers helpers function. 3460 */ 3461 /** 3462 * cik_scratch_init - setup driver info for CP scratch regs 3463 * 3464 * @rdev: radeon_device pointer 3465 * 3466 * Set up the number and offset of the CP scratch registers. 3467 * NOTE: use of CP scratch registers is a legacy inferface and 3468 * is not used by default on newer asics (r6xx+). On newer asics, 3469 * memory buffers are used for fences rather than scratch regs. 3470 */ 3471 static void cik_scratch_init(struct radeon_device *rdev) 3472 { 3473 int i; 3474 3475 rdev->scratch.num_reg = 7; 3476 rdev->scratch.reg_base = SCRATCH_REG0; 3477 for (i = 0; i < rdev->scratch.num_reg; i++) { 3478 rdev->scratch.free[i] = true; 3479 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 3480 } 3481 } 3482 3483 /** 3484 * cik_ring_test - basic gfx ring test 3485 * 3486 * @rdev: radeon_device pointer 3487 * @ring: radeon_ring structure holding ring information 3488 * 3489 * Allocate a scratch register and write to it using the gfx ring (CIK). 3490 * Provides a basic gfx ring test to verify that the ring is working. 3491 * Used by cik_cp_gfx_resume(); 3492 * Returns 0 on success, error on failure. 3493 */ 3494 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3495 { 3496 uint32_t scratch; 3497 uint32_t tmp = 0; 3498 unsigned i; 3499 int r; 3500 3501 r = radeon_scratch_get(rdev, &scratch); 3502 if (r) { 3503 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 3504 return r; 3505 } 3506 WREG32(scratch, 0xCAFEDEAD); 3507 r = radeon_ring_lock(rdev, ring, 3); 3508 if (r) { 3509 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 3510 radeon_scratch_free(rdev, scratch); 3511 return r; 3512 } 3513 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3514 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 3515 radeon_ring_write(ring, 0xDEADBEEF); 3516 radeon_ring_unlock_commit(rdev, ring, false); 3517 3518 for (i = 0; i < rdev->usec_timeout; i++) { 3519 tmp = RREG32(scratch); 3520 if (tmp == 0xDEADBEEF) 3521 break; 3522 DRM_UDELAY(1); 3523 } 3524 if (i < rdev->usec_timeout) { 3525 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3526 } else { 3527 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 3528 ring->idx, scratch, tmp); 3529 r = -EINVAL; 3530 } 3531 radeon_scratch_free(rdev, scratch); 3532 return r; 3533 } 3534 3535 /** 3536 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp 3537 * 3538 * @rdev: radeon_device pointer 3539 * @ridx: radeon ring index 3540 * 3541 * Emits an hdp flush on the cp. 3542 */ 3543 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev, 3544 int ridx) 3545 { 3546 struct radeon_ring *ring = &rdev->ring[ridx]; 3547 u32 ref_and_mask; 3548 3549 switch (ring->idx) { 3550 case CAYMAN_RING_TYPE_CP1_INDEX: 3551 case CAYMAN_RING_TYPE_CP2_INDEX: 3552 default: 3553 switch (ring->me) { 3554 case 0: 3555 ref_and_mask = CP2 << ring->pipe; 3556 break; 3557 case 1: 3558 ref_and_mask = CP6 << ring->pipe; 3559 break; 3560 default: 3561 return; 3562 } 3563 break; 3564 case RADEON_RING_TYPE_GFX_INDEX: 3565 ref_and_mask = CP0; 3566 break; 3567 } 3568 3569 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 3570 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 3571 WAIT_REG_MEM_FUNCTION(3) | /* == */ 3572 WAIT_REG_MEM_ENGINE(1))); /* pfp */ 3573 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2); 3574 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2); 3575 radeon_ring_write(ring, ref_and_mask); 3576 radeon_ring_write(ring, ref_and_mask); 3577 radeon_ring_write(ring, 0x20); /* poll interval */ 3578 } 3579 3580 /** 3581 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 3582 * 3583 * @rdev: radeon_device pointer 3584 * @fence: radeon fence object 3585 * 3586 * Emits a fence sequnce number on the gfx ring and flushes 3587 * GPU caches. 3588 */ 3589 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 3590 struct radeon_fence *fence) 3591 { 3592 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3593 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3594 3595 /* Workaround for cache flush problems. First send a dummy EOP 3596 * event down the pipe with seq one below. 3597 */ 3598 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3599 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3600 EOP_TC_ACTION_EN | 3601 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3602 EVENT_INDEX(5))); 3603 radeon_ring_write(ring, addr & 0xfffffffc); 3604 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 3605 DATA_SEL(1) | INT_SEL(0)); 3606 radeon_ring_write(ring, fence->seq - 1); 3607 radeon_ring_write(ring, 0); 3608 3609 /* Then send the real EOP event down the pipe. */ 3610 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3611 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3612 EOP_TC_ACTION_EN | 3613 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3614 EVENT_INDEX(5))); 3615 radeon_ring_write(ring, addr & 0xfffffffc); 3616 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3617 radeon_ring_write(ring, fence->seq); 3618 radeon_ring_write(ring, 0); 3619 } 3620 3621 /** 3622 * cik_fence_compute_ring_emit - emit a fence on the compute ring 3623 * 3624 * @rdev: radeon_device pointer 3625 * @fence: radeon fence object 3626 * 3627 * Emits a fence sequnce number on the compute ring and flushes 3628 * GPU caches. 3629 */ 3630 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 3631 struct radeon_fence *fence) 3632 { 3633 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3634 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3635 3636 /* RELEASE_MEM - flush caches, send int */ 3637 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 3638 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3639 EOP_TC_ACTION_EN | 3640 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3641 EVENT_INDEX(5))); 3642 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 3643 radeon_ring_write(ring, addr & 0xfffffffc); 3644 radeon_ring_write(ring, upper_32_bits(addr)); 3645 radeon_ring_write(ring, fence->seq); 3646 radeon_ring_write(ring, 0); 3647 } 3648 3649 /** 3650 * cik_semaphore_ring_emit - emit a semaphore on the CP ring 3651 * 3652 * @rdev: radeon_device pointer 3653 * @ring: radeon ring buffer object 3654 * @semaphore: radeon semaphore object 3655 * @emit_wait: Is this a sempahore wait? 3656 * 3657 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP 3658 * from running ahead of semaphore waits. 3659 */ 3660 bool cik_semaphore_ring_emit(struct radeon_device *rdev, 3661 struct radeon_ring *ring, 3662 struct radeon_semaphore *semaphore, 3663 bool emit_wait) 3664 { 3665 uint64_t addr = semaphore->gpu_addr; 3666 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3667 3668 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3669 radeon_ring_write(ring, lower_32_bits(addr)); 3670 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3671 3672 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) { 3673 /* Prevent the PFP from running ahead of the semaphore wait */ 3674 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 3675 radeon_ring_write(ring, 0x0); 3676 } 3677 3678 return true; 3679 } 3680 3681 /** 3682 * cik_copy_cpdma - copy pages using the CP DMA engine 3683 * 3684 * @rdev: radeon_device pointer 3685 * @src_offset: src GPU address 3686 * @dst_offset: dst GPU address 3687 * @num_gpu_pages: number of GPU pages to xfer 3688 * @resv: reservation object to sync to 3689 * 3690 * Copy GPU paging using the CP DMA engine (CIK+). 3691 * Used by the radeon ttm implementation to move pages if 3692 * registered as the asic copy callback. 3693 */ 3694 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, 3695 uint64_t src_offset, uint64_t dst_offset, 3696 unsigned num_gpu_pages, 3697 struct reservation_object *resv) 3698 { 3699 struct radeon_fence *fence; 3700 struct radeon_sync sync; 3701 int ring_index = rdev->asic->copy.blit_ring_index; 3702 struct radeon_ring *ring = &rdev->ring[ring_index]; 3703 u32 size_in_bytes, cur_size_in_bytes, control; 3704 int i, num_loops; 3705 int r = 0; 3706 3707 radeon_sync_create(&sync); 3708 3709 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); 3710 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); 3711 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18); 3712 if (r) { 3713 DRM_ERROR("radeon: moving bo (%d).\n", r); 3714 radeon_sync_free(rdev, &sync, NULL); 3715 return ERR_PTR(r); 3716 } 3717 3718 radeon_sync_resv(rdev, &sync, resv, false); 3719 radeon_sync_rings(rdev, &sync, ring->idx); 3720 3721 for (i = 0; i < num_loops; i++) { 3722 cur_size_in_bytes = size_in_bytes; 3723 if (cur_size_in_bytes > 0x1fffff) 3724 cur_size_in_bytes = 0x1fffff; 3725 size_in_bytes -= cur_size_in_bytes; 3726 control = 0; 3727 if (size_in_bytes == 0) 3728 control |= PACKET3_DMA_DATA_CP_SYNC; 3729 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3730 radeon_ring_write(ring, control); 3731 radeon_ring_write(ring, lower_32_bits(src_offset)); 3732 radeon_ring_write(ring, upper_32_bits(src_offset)); 3733 radeon_ring_write(ring, lower_32_bits(dst_offset)); 3734 radeon_ring_write(ring, upper_32_bits(dst_offset)); 3735 radeon_ring_write(ring, cur_size_in_bytes); 3736 src_offset += cur_size_in_bytes; 3737 dst_offset += cur_size_in_bytes; 3738 } 3739 3740 r = radeon_fence_emit(rdev, &fence, ring->idx); 3741 if (r) { 3742 radeon_ring_unlock_undo(rdev, ring); 3743 radeon_sync_free(rdev, &sync, NULL); 3744 return ERR_PTR(r); 3745 } 3746 3747 radeon_ring_unlock_commit(rdev, ring, false); 3748 radeon_sync_free(rdev, &sync, fence); 3749 3750 return fence; 3751 } 3752 3753 /* 3754 * IB stuff 3755 */ 3756 /** 3757 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 3758 * 3759 * @rdev: radeon_device pointer 3760 * @ib: radeon indirect buffer object 3761 * 3762 * Emits a DE (drawing engine) or CE (constant engine) IB 3763 * on the gfx ring. IBs are usually generated by userspace 3764 * acceleration drivers and submitted to the kernel for 3765 * scheduling on the ring. This function schedules the IB 3766 * on the gfx ring for execution by the GPU. 3767 */ 3768 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3769 { 3770 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3771 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 3772 u32 header, control = INDIRECT_BUFFER_VALID; 3773 3774 if (ib->is_const_ib) { 3775 /* set switch buffer packet before const IB */ 3776 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3777 radeon_ring_write(ring, 0); 3778 3779 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3780 } else { 3781 u32 next_rptr; 3782 if (ring->rptr_save_reg) { 3783 next_rptr = ring->wptr + 3 + 4; 3784 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3785 radeon_ring_write(ring, ((ring->rptr_save_reg - 3786 PACKET3_SET_UCONFIG_REG_START) >> 2)); 3787 radeon_ring_write(ring, next_rptr); 3788 } else if (rdev->wb.enabled) { 3789 next_rptr = ring->wptr + 5 + 4; 3790 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3791 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 3792 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3793 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr)); 3794 radeon_ring_write(ring, next_rptr); 3795 } 3796 3797 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3798 } 3799 3800 control |= ib->length_dw | (vm_id << 24); 3801 3802 radeon_ring_write(ring, header); 3803 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC)); 3804 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3805 radeon_ring_write(ring, control); 3806 } 3807 3808 /** 3809 * cik_ib_test - basic gfx ring IB test 3810 * 3811 * @rdev: radeon_device pointer 3812 * @ring: radeon_ring structure holding ring information 3813 * 3814 * Allocate an IB and execute it on the gfx ring (CIK). 3815 * Provides a basic gfx ring test to verify that IBs are working. 3816 * Returns 0 on success, error on failure. 3817 */ 3818 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3819 { 3820 struct radeon_ib ib; 3821 uint32_t scratch; 3822 uint32_t tmp = 0; 3823 unsigned i; 3824 int r; 3825 3826 r = radeon_scratch_get(rdev, &scratch); 3827 if (r) { 3828 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3829 return r; 3830 } 3831 WREG32(scratch, 0xCAFEDEAD); 3832 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3833 if (r) { 3834 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3835 radeon_scratch_free(rdev, scratch); 3836 return r; 3837 } 3838 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3839 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 3840 ib.ptr[2] = 0xDEADBEEF; 3841 ib.length_dw = 3; 3842 r = radeon_ib_schedule(rdev, &ib, NULL, false); 3843 if (r) { 3844 radeon_scratch_free(rdev, scratch); 3845 radeon_ib_free(rdev, &ib); 3846 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3847 return r; 3848 } 3849 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( 3850 RADEON_USEC_IB_TEST_TIMEOUT)); 3851 if (r < 0) { 3852 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3853 radeon_scratch_free(rdev, scratch); 3854 radeon_ib_free(rdev, &ib); 3855 return r; 3856 } else if (r == 0) { 3857 DRM_ERROR("radeon: fence wait timed out.\n"); 3858 radeon_scratch_free(rdev, scratch); 3859 radeon_ib_free(rdev, &ib); 3860 return -ETIMEDOUT; 3861 } 3862 r = 0; 3863 for (i = 0; i < rdev->usec_timeout; i++) { 3864 tmp = RREG32(scratch); 3865 if (tmp == 0xDEADBEEF) 3866 break; 3867 DRM_UDELAY(1); 3868 } 3869 if (i < rdev->usec_timeout) { 3870 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3871 } else { 3872 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3873 scratch, tmp); 3874 r = -EINVAL; 3875 } 3876 radeon_scratch_free(rdev, scratch); 3877 radeon_ib_free(rdev, &ib); 3878 return r; 3879 } 3880 3881 /* 3882 * CP. 3883 * On CIK, gfx and compute now have independant command processors. 3884 * 3885 * GFX 3886 * Gfx consists of a single ring and can process both gfx jobs and 3887 * compute jobs. The gfx CP consists of three microengines (ME): 3888 * PFP - Pre-Fetch Parser 3889 * ME - Micro Engine 3890 * CE - Constant Engine 3891 * The PFP and ME make up what is considered the Drawing Engine (DE). 3892 * The CE is an asynchronous engine used for updating buffer desciptors 3893 * used by the DE so that they can be loaded into cache in parallel 3894 * while the DE is processing state update packets. 3895 * 3896 * Compute 3897 * The compute CP consists of two microengines (ME): 3898 * MEC1 - Compute MicroEngine 1 3899 * MEC2 - Compute MicroEngine 2 3900 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 3901 * The queues are exposed to userspace and are programmed directly 3902 * by the compute runtime. 3903 */ 3904 /** 3905 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 3906 * 3907 * @rdev: radeon_device pointer 3908 * @enable: enable or disable the MEs 3909 * 3910 * Halts or unhalts the gfx MEs. 3911 */ 3912 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 3913 { 3914 if (enable) 3915 WREG32(CP_ME_CNTL, 0); 3916 else { 3917 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX) 3918 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 3919 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 3920 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3921 } 3922 udelay(50); 3923 } 3924 3925 /** 3926 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 3927 * 3928 * @rdev: radeon_device pointer 3929 * 3930 * Loads the gfx PFP, ME, and CE ucode. 3931 * Returns 0 for success, -EINVAL if the ucode is not available. 3932 */ 3933 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 3934 { 3935 int i; 3936 3937 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 3938 return -EINVAL; 3939 3940 cik_cp_gfx_enable(rdev, false); 3941 3942 if (rdev->new_fw) { 3943 const struct gfx_firmware_header_v1_0 *pfp_hdr = 3944 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data; 3945 const struct gfx_firmware_header_v1_0 *ce_hdr = 3946 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data; 3947 const struct gfx_firmware_header_v1_0 *me_hdr = 3948 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data; 3949 const __le32 *fw_data; 3950 u32 fw_size; 3951 3952 radeon_ucode_print_gfx_hdr(&pfp_hdr->header); 3953 radeon_ucode_print_gfx_hdr(&ce_hdr->header); 3954 radeon_ucode_print_gfx_hdr(&me_hdr->header); 3955 3956 /* PFP */ 3957 fw_data = (const __le32 *) 3958 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3959 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3960 WREG32(CP_PFP_UCODE_ADDR, 0); 3961 for (i = 0; i < fw_size; i++) 3962 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3963 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version)); 3964 3965 /* CE */ 3966 fw_data = (const __le32 *) 3967 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3968 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3969 WREG32(CP_CE_UCODE_ADDR, 0); 3970 for (i = 0; i < fw_size; i++) 3971 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3972 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version)); 3973 3974 /* ME */ 3975 fw_data = (const __be32 *) 3976 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3977 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3978 WREG32(CP_ME_RAM_WADDR, 0); 3979 for (i = 0; i < fw_size; i++) 3980 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3981 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version)); 3982 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version)); 3983 } else { 3984 const __be32 *fw_data; 3985 3986 /* PFP */ 3987 fw_data = (const __be32 *)rdev->pfp_fw->data; 3988 WREG32(CP_PFP_UCODE_ADDR, 0); 3989 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 3990 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 3991 WREG32(CP_PFP_UCODE_ADDR, 0); 3992 3993 /* CE */ 3994 fw_data = (const __be32 *)rdev->ce_fw->data; 3995 WREG32(CP_CE_UCODE_ADDR, 0); 3996 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 3997 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 3998 WREG32(CP_CE_UCODE_ADDR, 0); 3999 4000 /* ME */ 4001 fw_data = (const __be32 *)rdev->me_fw->data; 4002 WREG32(CP_ME_RAM_WADDR, 0); 4003 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 4004 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 4005 WREG32(CP_ME_RAM_WADDR, 0); 4006 } 4007 4008 return 0; 4009 } 4010 4011 /** 4012 * cik_cp_gfx_start - start the gfx ring 4013 * 4014 * @rdev: radeon_device pointer 4015 * 4016 * Enables the ring and loads the clear state context and other 4017 * packets required to init the ring. 4018 * Returns 0 for success, error for failure. 4019 */ 4020 static int cik_cp_gfx_start(struct radeon_device *rdev) 4021 { 4022 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 4023 int r, i; 4024 4025 /* init the CP */ 4026 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 4027 WREG32(CP_ENDIAN_SWAP, 0); 4028 WREG32(CP_DEVICE_ID, 1); 4029 4030 cik_cp_gfx_enable(rdev, true); 4031 4032 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 4033 if (r) { 4034 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 4035 return r; 4036 } 4037 4038 /* init the CE partitions. CE only used for gfx on CIK */ 4039 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4040 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4041 radeon_ring_write(ring, 0x8000); 4042 radeon_ring_write(ring, 0x8000); 4043 4044 /* setup clear context state */ 4045 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4046 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4047 4048 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4049 radeon_ring_write(ring, 0x80000000); 4050 radeon_ring_write(ring, 0x80000000); 4051 4052 for (i = 0; i < cik_default_size; i++) 4053 radeon_ring_write(ring, cik_default_state[i]); 4054 4055 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4056 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4057 4058 /* set clear context state */ 4059 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4060 radeon_ring_write(ring, 0); 4061 4062 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4063 radeon_ring_write(ring, 0x00000316); 4064 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 4065 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 4066 4067 radeon_ring_unlock_commit(rdev, ring, false); 4068 4069 return 0; 4070 } 4071 4072 /** 4073 * cik_cp_gfx_fini - stop the gfx ring 4074 * 4075 * @rdev: radeon_device pointer 4076 * 4077 * Stop the gfx ring and tear down the driver ring 4078 * info. 4079 */ 4080 static void cik_cp_gfx_fini(struct radeon_device *rdev) 4081 { 4082 cik_cp_gfx_enable(rdev, false); 4083 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 4084 } 4085 4086 /** 4087 * cik_cp_gfx_resume - setup the gfx ring buffer registers 4088 * 4089 * @rdev: radeon_device pointer 4090 * 4091 * Program the location and size of the gfx ring buffer 4092 * and test it to make sure it's working. 4093 * Returns 0 for success, error for failure. 4094 */ 4095 static int cik_cp_gfx_resume(struct radeon_device *rdev) 4096 { 4097 struct radeon_ring *ring; 4098 u32 tmp; 4099 u32 rb_bufsz; 4100 u64 rb_addr; 4101 int r; 4102 4103 WREG32(CP_SEM_WAIT_TIMER, 0x0); 4104 if (rdev->family != CHIP_HAWAII) 4105 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 4106 4107 /* Set the write pointer delay */ 4108 WREG32(CP_RB_WPTR_DELAY, 0); 4109 4110 /* set the RB to use vmid 0 */ 4111 WREG32(CP_RB_VMID, 0); 4112 4113 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 4114 4115 /* ring 0 - compute and gfx */ 4116 /* Set ring buffer size */ 4117 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 4118 rb_bufsz = order_base_2(ring->ring_size / 8); 4119 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 4120 #ifdef __BIG_ENDIAN 4121 tmp |= BUF_SWAP_32BIT; 4122 #endif 4123 WREG32(CP_RB0_CNTL, tmp); 4124 4125 /* Initialize the ring buffer's read and write pointers */ 4126 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 4127 ring->wptr = 0; 4128 WREG32(CP_RB0_WPTR, ring->wptr); 4129 4130 /* set the wb address wether it's enabled or not */ 4131 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 4132 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 4133 4134 /* scratch register shadowing is no longer supported */ 4135 WREG32(SCRATCH_UMSK, 0); 4136 4137 if (!rdev->wb.enabled) 4138 tmp |= RB_NO_UPDATE; 4139 4140 mdelay(1); 4141 WREG32(CP_RB0_CNTL, tmp); 4142 4143 rb_addr = ring->gpu_addr >> 8; 4144 WREG32(CP_RB0_BASE, rb_addr); 4145 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4146 4147 /* start the ring */ 4148 cik_cp_gfx_start(rdev); 4149 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 4150 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 4151 if (r) { 4152 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 4153 return r; 4154 } 4155 4156 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX) 4157 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 4158 4159 return 0; 4160 } 4161 4162 u32 cik_gfx_get_rptr(struct radeon_device *rdev, 4163 struct radeon_ring *ring) 4164 { 4165 u32 rptr; 4166 4167 if (rdev->wb.enabled) 4168 rptr = rdev->wb.wb[ring->rptr_offs/4]; 4169 else 4170 rptr = RREG32(CP_RB0_RPTR); 4171 4172 return rptr; 4173 } 4174 4175 u32 cik_gfx_get_wptr(struct radeon_device *rdev, 4176 struct radeon_ring *ring) 4177 { 4178 return RREG32(CP_RB0_WPTR); 4179 } 4180 4181 void cik_gfx_set_wptr(struct radeon_device *rdev, 4182 struct radeon_ring *ring) 4183 { 4184 WREG32(CP_RB0_WPTR, ring->wptr); 4185 (void)RREG32(CP_RB0_WPTR); 4186 } 4187 4188 u32 cik_compute_get_rptr(struct radeon_device *rdev, 4189 struct radeon_ring *ring) 4190 { 4191 u32 rptr; 4192 4193 if (rdev->wb.enabled) { 4194 rptr = rdev->wb.wb[ring->rptr_offs/4]; 4195 } else { 4196 mutex_lock(&rdev->srbm_mutex); 4197 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 4198 rptr = RREG32(CP_HQD_PQ_RPTR); 4199 cik_srbm_select(rdev, 0, 0, 0, 0); 4200 mutex_unlock(&rdev->srbm_mutex); 4201 } 4202 4203 return rptr; 4204 } 4205 4206 u32 cik_compute_get_wptr(struct radeon_device *rdev, 4207 struct radeon_ring *ring) 4208 { 4209 u32 wptr; 4210 4211 if (rdev->wb.enabled) { 4212 /* XXX check if swapping is necessary on BE */ 4213 wptr = rdev->wb.wb[ring->wptr_offs/4]; 4214 } else { 4215 mutex_lock(&rdev->srbm_mutex); 4216 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 4217 wptr = RREG32(CP_HQD_PQ_WPTR); 4218 cik_srbm_select(rdev, 0, 0, 0, 0); 4219 mutex_unlock(&rdev->srbm_mutex); 4220 } 4221 4222 return wptr; 4223 } 4224 4225 void cik_compute_set_wptr(struct radeon_device *rdev, 4226 struct radeon_ring *ring) 4227 { 4228 /* XXX check if swapping is necessary on BE */ 4229 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr; 4230 WDOORBELL32(ring->doorbell_index, ring->wptr); 4231 } 4232 4233 static void cik_compute_stop(struct radeon_device *rdev, 4234 struct radeon_ring *ring) 4235 { 4236 u32 j, tmp; 4237 4238 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 4239 /* Disable wptr polling. */ 4240 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 4241 tmp &= ~WPTR_POLL_EN; 4242 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 4243 /* Disable HQD. */ 4244 if (RREG32(CP_HQD_ACTIVE) & 1) { 4245 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 4246 for (j = 0; j < rdev->usec_timeout; j++) { 4247 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 4248 break; 4249 udelay(1); 4250 } 4251 WREG32(CP_HQD_DEQUEUE_REQUEST, 0); 4252 WREG32(CP_HQD_PQ_RPTR, 0); 4253 WREG32(CP_HQD_PQ_WPTR, 0); 4254 } 4255 cik_srbm_select(rdev, 0, 0, 0, 0); 4256 } 4257 4258 /** 4259 * cik_cp_compute_enable - enable/disable the compute CP MEs 4260 * 4261 * @rdev: radeon_device pointer 4262 * @enable: enable or disable the MEs 4263 * 4264 * Halts or unhalts the compute MEs. 4265 */ 4266 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 4267 { 4268 if (enable) 4269 WREG32(CP_MEC_CNTL, 0); 4270 else { 4271 /* 4272 * To make hibernation reliable we need to clear compute ring 4273 * configuration before halting the compute ring. 4274 */ 4275 mutex_lock(&rdev->srbm_mutex); 4276 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); 4277 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); 4278 mutex_unlock(&rdev->srbm_mutex); 4279 4280 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 4281 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; 4282 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; 4283 } 4284 udelay(50); 4285 } 4286 4287 /** 4288 * cik_cp_compute_load_microcode - load the compute CP ME ucode 4289 * 4290 * @rdev: radeon_device pointer 4291 * 4292 * Loads the compute MEC1&2 ucode. 4293 * Returns 0 for success, -EINVAL if the ucode is not available. 4294 */ 4295 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 4296 { 4297 int i; 4298 4299 if (!rdev->mec_fw) 4300 return -EINVAL; 4301 4302 cik_cp_compute_enable(rdev, false); 4303 4304 if (rdev->new_fw) { 4305 const struct gfx_firmware_header_v1_0 *mec_hdr = 4306 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data; 4307 const __le32 *fw_data; 4308 u32 fw_size; 4309 4310 radeon_ucode_print_gfx_hdr(&mec_hdr->header); 4311 4312 /* MEC1 */ 4313 fw_data = (const __le32 *) 4314 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4315 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4316 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 4317 for (i = 0; i < fw_size; i++) 4318 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++)); 4319 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version)); 4320 4321 /* MEC2 */ 4322 if (rdev->family == CHIP_KAVERI) { 4323 const struct gfx_firmware_header_v1_0 *mec2_hdr = 4324 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data; 4325 4326 fw_data = (const __le32 *) 4327 (rdev->mec2_fw->data + 4328 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4329 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4330 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 4331 for (i = 0; i < fw_size; i++) 4332 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++)); 4333 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version)); 4334 } 4335 } else { 4336 const __be32 *fw_data; 4337 4338 /* MEC1 */ 4339 fw_data = (const __be32 *)rdev->mec_fw->data; 4340 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 4341 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 4342 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 4343 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 4344 4345 if (rdev->family == CHIP_KAVERI) { 4346 /* MEC2 */ 4347 fw_data = (const __be32 *)rdev->mec_fw->data; 4348 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 4349 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 4350 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 4351 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 4352 } 4353 } 4354 4355 return 0; 4356 } 4357 4358 /** 4359 * cik_cp_compute_start - start the compute queues 4360 * 4361 * @rdev: radeon_device pointer 4362 * 4363 * Enable the compute queues. 4364 * Returns 0 for success, error for failure. 4365 */ 4366 static int cik_cp_compute_start(struct radeon_device *rdev) 4367 { 4368 cik_cp_compute_enable(rdev, true); 4369 4370 return 0; 4371 } 4372 4373 /** 4374 * cik_cp_compute_fini - stop the compute queues 4375 * 4376 * @rdev: radeon_device pointer 4377 * 4378 * Stop the compute queues and tear down the driver queue 4379 * info. 4380 */ 4381 static void cik_cp_compute_fini(struct radeon_device *rdev) 4382 { 4383 int i, idx, r; 4384 4385 cik_cp_compute_enable(rdev, false); 4386 4387 for (i = 0; i < 2; i++) { 4388 if (i == 0) 4389 idx = CAYMAN_RING_TYPE_CP1_INDEX; 4390 else 4391 idx = CAYMAN_RING_TYPE_CP2_INDEX; 4392 4393 if (rdev->ring[idx].mqd_obj) { 4394 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 4395 if (unlikely(r != 0)) 4396 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 4397 4398 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 4399 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 4400 4401 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 4402 rdev->ring[idx].mqd_obj = NULL; 4403 } 4404 } 4405 } 4406 4407 static void cik_mec_fini(struct radeon_device *rdev) 4408 { 4409 int r; 4410 4411 if (rdev->mec.hpd_eop_obj) { 4412 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 4413 if (unlikely(r != 0)) 4414 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 4415 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 4416 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 4417 4418 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 4419 rdev->mec.hpd_eop_obj = NULL; 4420 } 4421 } 4422 4423 #define MEC_HPD_SIZE 2048 4424 4425 static int cik_mec_init(struct radeon_device *rdev) 4426 { 4427 int r; 4428 u32 *hpd; 4429 4430 /* 4431 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 4432 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 4433 * Nonetheless, we assign only 1 pipe because all other pipes will 4434 * be handled by KFD 4435 */ 4436 rdev->mec.num_mec = 1; 4437 rdev->mec.num_pipe = 1; 4438 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 4439 4440 if (rdev->mec.hpd_eop_obj == NULL) { 4441 r = radeon_bo_create(rdev, 4442 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 4443 PAGE_SIZE, true, 4444 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, 4445 &rdev->mec.hpd_eop_obj); 4446 if (r) { 4447 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 4448 return r; 4449 } 4450 } 4451 4452 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 4453 if (unlikely(r != 0)) { 4454 cik_mec_fini(rdev); 4455 return r; 4456 } 4457 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 4458 &rdev->mec.hpd_eop_gpu_addr); 4459 if (r) { 4460 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 4461 cik_mec_fini(rdev); 4462 return r; 4463 } 4464 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 4465 if (r) { 4466 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 4467 cik_mec_fini(rdev); 4468 return r; 4469 } 4470 4471 /* clear memory. Not sure if this is required or not */ 4472 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 4473 4474 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 4475 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 4476 4477 return 0; 4478 } 4479 4480 struct hqd_registers 4481 { 4482 u32 cp_mqd_base_addr; 4483 u32 cp_mqd_base_addr_hi; 4484 u32 cp_hqd_active; 4485 u32 cp_hqd_vmid; 4486 u32 cp_hqd_persistent_state; 4487 u32 cp_hqd_pipe_priority; 4488 u32 cp_hqd_queue_priority; 4489 u32 cp_hqd_quantum; 4490 u32 cp_hqd_pq_base; 4491 u32 cp_hqd_pq_base_hi; 4492 u32 cp_hqd_pq_rptr; 4493 u32 cp_hqd_pq_rptr_report_addr; 4494 u32 cp_hqd_pq_rptr_report_addr_hi; 4495 u32 cp_hqd_pq_wptr_poll_addr; 4496 u32 cp_hqd_pq_wptr_poll_addr_hi; 4497 u32 cp_hqd_pq_doorbell_control; 4498 u32 cp_hqd_pq_wptr; 4499 u32 cp_hqd_pq_control; 4500 u32 cp_hqd_ib_base_addr; 4501 u32 cp_hqd_ib_base_addr_hi; 4502 u32 cp_hqd_ib_rptr; 4503 u32 cp_hqd_ib_control; 4504 u32 cp_hqd_iq_timer; 4505 u32 cp_hqd_iq_rptr; 4506 u32 cp_hqd_dequeue_request; 4507 u32 cp_hqd_dma_offload; 4508 u32 cp_hqd_sema_cmd; 4509 u32 cp_hqd_msg_type; 4510 u32 cp_hqd_atomic0_preop_lo; 4511 u32 cp_hqd_atomic0_preop_hi; 4512 u32 cp_hqd_atomic1_preop_lo; 4513 u32 cp_hqd_atomic1_preop_hi; 4514 u32 cp_hqd_hq_scheduler0; 4515 u32 cp_hqd_hq_scheduler1; 4516 u32 cp_mqd_control; 4517 }; 4518 4519 struct bonaire_mqd 4520 { 4521 u32 header; 4522 u32 dispatch_initiator; 4523 u32 dimensions[3]; 4524 u32 start_idx[3]; 4525 u32 num_threads[3]; 4526 u32 pipeline_stat_enable; 4527 u32 perf_counter_enable; 4528 u32 pgm[2]; 4529 u32 tba[2]; 4530 u32 tma[2]; 4531 u32 pgm_rsrc[2]; 4532 u32 vmid; 4533 u32 resource_limits; 4534 u32 static_thread_mgmt01[2]; 4535 u32 tmp_ring_size; 4536 u32 static_thread_mgmt23[2]; 4537 u32 restart[3]; 4538 u32 thread_trace_enable; 4539 u32 reserved1; 4540 u32 user_data[16]; 4541 u32 vgtcs_invoke_count[2]; 4542 struct hqd_registers queue_state; 4543 u32 dequeue_cntr; 4544 u32 interrupt_queue[64]; 4545 }; 4546 4547 /** 4548 * cik_cp_compute_resume - setup the compute queue registers 4549 * 4550 * @rdev: radeon_device pointer 4551 * 4552 * Program the compute queues and test them to make sure they 4553 * are working. 4554 * Returns 0 for success, error for failure. 4555 */ 4556 static int cik_cp_compute_resume(struct radeon_device *rdev) 4557 { 4558 int r, i, j, idx; 4559 u32 tmp; 4560 bool use_doorbell = true; 4561 u64 hqd_gpu_addr; 4562 u64 mqd_gpu_addr; 4563 u64 eop_gpu_addr; 4564 u64 wb_gpu_addr; 4565 u32 *buf; 4566 struct bonaire_mqd *mqd; 4567 4568 r = cik_cp_compute_start(rdev); 4569 if (r) 4570 return r; 4571 4572 /* fix up chicken bits */ 4573 tmp = RREG32(CP_CPF_DEBUG); 4574 tmp |= (1 << 23); 4575 WREG32(CP_CPF_DEBUG, tmp); 4576 4577 /* init the pipes */ 4578 mutex_lock(&rdev->srbm_mutex); 4579 4580 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr; 4581 4582 cik_srbm_select(rdev, 0, 0, 0, 0); 4583 4584 /* write the EOP addr */ 4585 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 4586 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 4587 4588 /* set the VMID assigned */ 4589 WREG32(CP_HPD_EOP_VMID, 0); 4590 4591 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4592 tmp = RREG32(CP_HPD_EOP_CONTROL); 4593 tmp &= ~EOP_SIZE_MASK; 4594 tmp |= order_base_2(MEC_HPD_SIZE / 8); 4595 WREG32(CP_HPD_EOP_CONTROL, tmp); 4596 4597 mutex_unlock(&rdev->srbm_mutex); 4598 4599 /* init the queues. Just two for now. */ 4600 for (i = 0; i < 2; i++) { 4601 if (i == 0) 4602 idx = CAYMAN_RING_TYPE_CP1_INDEX; 4603 else 4604 idx = CAYMAN_RING_TYPE_CP2_INDEX; 4605 4606 if (rdev->ring[idx].mqd_obj == NULL) { 4607 r = radeon_bo_create(rdev, 4608 sizeof(struct bonaire_mqd), 4609 PAGE_SIZE, true, 4610 RADEON_GEM_DOMAIN_GTT, 0, NULL, 4611 NULL, &rdev->ring[idx].mqd_obj); 4612 if (r) { 4613 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 4614 return r; 4615 } 4616 } 4617 4618 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 4619 if (unlikely(r != 0)) { 4620 cik_cp_compute_fini(rdev); 4621 return r; 4622 } 4623 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 4624 &mqd_gpu_addr); 4625 if (r) { 4626 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 4627 cik_cp_compute_fini(rdev); 4628 return r; 4629 } 4630 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 4631 if (r) { 4632 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 4633 cik_cp_compute_fini(rdev); 4634 return r; 4635 } 4636 4637 /* init the mqd struct */ 4638 memset(buf, 0, sizeof(struct bonaire_mqd)); 4639 4640 mqd = (struct bonaire_mqd *)buf; 4641 mqd->header = 0xC0310800; 4642 mqd->static_thread_mgmt01[0] = 0xffffffff; 4643 mqd->static_thread_mgmt01[1] = 0xffffffff; 4644 mqd->static_thread_mgmt23[0] = 0xffffffff; 4645 mqd->static_thread_mgmt23[1] = 0xffffffff; 4646 4647 mutex_lock(&rdev->srbm_mutex); 4648 cik_srbm_select(rdev, rdev->ring[idx].me, 4649 rdev->ring[idx].pipe, 4650 rdev->ring[idx].queue, 0); 4651 4652 /* disable wptr polling */ 4653 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 4654 tmp &= ~WPTR_POLL_EN; 4655 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 4656 4657 /* enable doorbell? */ 4658 mqd->queue_state.cp_hqd_pq_doorbell_control = 4659 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 4660 if (use_doorbell) 4661 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 4662 else 4663 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 4664 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 4665 mqd->queue_state.cp_hqd_pq_doorbell_control); 4666 4667 /* disable the queue if it's active */ 4668 mqd->queue_state.cp_hqd_dequeue_request = 0; 4669 mqd->queue_state.cp_hqd_pq_rptr = 0; 4670 mqd->queue_state.cp_hqd_pq_wptr= 0; 4671 if (RREG32(CP_HQD_ACTIVE) & 1) { 4672 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 4673 for (j = 0; j < rdev->usec_timeout; j++) { 4674 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 4675 break; 4676 udelay(1); 4677 } 4678 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 4679 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 4680 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 4681 } 4682 4683 /* set the pointer to the MQD */ 4684 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 4685 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4686 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 4687 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 4688 /* set MQD vmid to 0 */ 4689 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 4690 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 4691 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 4692 4693 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4694 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 4695 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 4696 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4697 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 4698 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 4699 4700 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4701 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 4702 mqd->queue_state.cp_hqd_pq_control &= 4703 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 4704 4705 mqd->queue_state.cp_hqd_pq_control |= 4706 order_base_2(rdev->ring[idx].ring_size / 8); 4707 mqd->queue_state.cp_hqd_pq_control |= 4708 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); 4709 #ifdef __BIG_ENDIAN 4710 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 4711 #endif 4712 mqd->queue_state.cp_hqd_pq_control &= 4713 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 4714 mqd->queue_state.cp_hqd_pq_control |= 4715 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 4716 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 4717 4718 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 4719 if (i == 0) 4720 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 4721 else 4722 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 4723 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4724 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4725 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 4726 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 4727 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 4728 4729 /* set the wb address wether it's enabled or not */ 4730 if (i == 0) 4731 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 4732 else 4733 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 4734 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 4735 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 4736 upper_32_bits(wb_gpu_addr) & 0xffff; 4737 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 4738 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 4739 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4740 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 4741 4742 /* enable the doorbell if requested */ 4743 if (use_doorbell) { 4744 mqd->queue_state.cp_hqd_pq_doorbell_control = 4745 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 4746 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 4747 mqd->queue_state.cp_hqd_pq_doorbell_control |= 4748 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index); 4749 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 4750 mqd->queue_state.cp_hqd_pq_doorbell_control &= 4751 ~(DOORBELL_SOURCE | DOORBELL_HIT); 4752 4753 } else { 4754 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 4755 } 4756 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 4757 mqd->queue_state.cp_hqd_pq_doorbell_control); 4758 4759 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4760 rdev->ring[idx].wptr = 0; 4761 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 4762 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 4763 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR); 4764 4765 /* set the vmid for the queue */ 4766 mqd->queue_state.cp_hqd_vmid = 0; 4767 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 4768 4769 /* activate the queue */ 4770 mqd->queue_state.cp_hqd_active = 1; 4771 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 4772 4773 cik_srbm_select(rdev, 0, 0, 0, 0); 4774 mutex_unlock(&rdev->srbm_mutex); 4775 4776 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 4777 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 4778 4779 rdev->ring[idx].ready = true; 4780 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 4781 if (r) 4782 rdev->ring[idx].ready = false; 4783 } 4784 4785 return 0; 4786 } 4787 4788 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 4789 { 4790 cik_cp_gfx_enable(rdev, enable); 4791 cik_cp_compute_enable(rdev, enable); 4792 } 4793 4794 static int cik_cp_load_microcode(struct radeon_device *rdev) 4795 { 4796 int r; 4797 4798 r = cik_cp_gfx_load_microcode(rdev); 4799 if (r) 4800 return r; 4801 r = cik_cp_compute_load_microcode(rdev); 4802 if (r) 4803 return r; 4804 4805 return 0; 4806 } 4807 4808 static void cik_cp_fini(struct radeon_device *rdev) 4809 { 4810 cik_cp_gfx_fini(rdev); 4811 cik_cp_compute_fini(rdev); 4812 } 4813 4814 static int cik_cp_resume(struct radeon_device *rdev) 4815 { 4816 int r; 4817 4818 cik_enable_gui_idle_interrupt(rdev, false); 4819 4820 r = cik_cp_load_microcode(rdev); 4821 if (r) 4822 return r; 4823 4824 r = cik_cp_gfx_resume(rdev); 4825 if (r) 4826 return r; 4827 r = cik_cp_compute_resume(rdev); 4828 if (r) 4829 return r; 4830 4831 cik_enable_gui_idle_interrupt(rdev, true); 4832 4833 return 0; 4834 } 4835 4836 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 4837 { 4838 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4839 RREG32(GRBM_STATUS)); 4840 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 4841 RREG32(GRBM_STATUS2)); 4842 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 4843 RREG32(GRBM_STATUS_SE0)); 4844 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 4845 RREG32(GRBM_STATUS_SE1)); 4846 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 4847 RREG32(GRBM_STATUS_SE2)); 4848 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 4849 RREG32(GRBM_STATUS_SE3)); 4850 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 4851 RREG32(SRBM_STATUS)); 4852 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 4853 RREG32(SRBM_STATUS2)); 4854 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 4855 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 4856 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 4857 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 4858 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 4859 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 4860 RREG32(CP_STALLED_STAT1)); 4861 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 4862 RREG32(CP_STALLED_STAT2)); 4863 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 4864 RREG32(CP_STALLED_STAT3)); 4865 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 4866 RREG32(CP_CPF_BUSY_STAT)); 4867 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 4868 RREG32(CP_CPF_STALLED_STAT1)); 4869 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 4870 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 4871 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 4872 RREG32(CP_CPC_STALLED_STAT1)); 4873 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 4874 } 4875 4876 /** 4877 * cik_gpu_check_soft_reset - check which blocks are busy 4878 * 4879 * @rdev: radeon_device pointer 4880 * 4881 * Check which blocks are busy and return the relevant reset 4882 * mask to be used by cik_gpu_soft_reset(). 4883 * Returns a mask of the blocks to be reset. 4884 */ 4885 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4886 { 4887 u32 reset_mask = 0; 4888 u32 tmp; 4889 4890 /* GRBM_STATUS */ 4891 tmp = RREG32(GRBM_STATUS); 4892 if (tmp & (PA_BUSY | SC_BUSY | 4893 BCI_BUSY | SX_BUSY | 4894 TA_BUSY | VGT_BUSY | 4895 DB_BUSY | CB_BUSY | 4896 GDS_BUSY | SPI_BUSY | 4897 IA_BUSY | IA_BUSY_NO_DMA)) 4898 reset_mask |= RADEON_RESET_GFX; 4899 4900 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 4901 reset_mask |= RADEON_RESET_CP; 4902 4903 /* GRBM_STATUS2 */ 4904 tmp = RREG32(GRBM_STATUS2); 4905 if (tmp & RLC_BUSY) 4906 reset_mask |= RADEON_RESET_RLC; 4907 4908 /* SDMA0_STATUS_REG */ 4909 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 4910 if (!(tmp & SDMA_IDLE)) 4911 reset_mask |= RADEON_RESET_DMA; 4912 4913 /* SDMA1_STATUS_REG */ 4914 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 4915 if (!(tmp & SDMA_IDLE)) 4916 reset_mask |= RADEON_RESET_DMA1; 4917 4918 /* SRBM_STATUS2 */ 4919 tmp = RREG32(SRBM_STATUS2); 4920 if (tmp & SDMA_BUSY) 4921 reset_mask |= RADEON_RESET_DMA; 4922 4923 if (tmp & SDMA1_BUSY) 4924 reset_mask |= RADEON_RESET_DMA1; 4925 4926 /* SRBM_STATUS */ 4927 tmp = RREG32(SRBM_STATUS); 4928 4929 if (tmp & IH_BUSY) 4930 reset_mask |= RADEON_RESET_IH; 4931 4932 if (tmp & SEM_BUSY) 4933 reset_mask |= RADEON_RESET_SEM; 4934 4935 if (tmp & GRBM_RQ_PENDING) 4936 reset_mask |= RADEON_RESET_GRBM; 4937 4938 if (tmp & VMC_BUSY) 4939 reset_mask |= RADEON_RESET_VMC; 4940 4941 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 4942 MCC_BUSY | MCD_BUSY)) 4943 reset_mask |= RADEON_RESET_MC; 4944 4945 if (evergreen_is_display_hung(rdev)) 4946 reset_mask |= RADEON_RESET_DISPLAY; 4947 4948 /* Skip MC reset as it's mostly likely not hung, just busy */ 4949 if (reset_mask & RADEON_RESET_MC) { 4950 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 4951 reset_mask &= ~RADEON_RESET_MC; 4952 } 4953 4954 return reset_mask; 4955 } 4956 4957 /** 4958 * cik_gpu_soft_reset - soft reset GPU 4959 * 4960 * @rdev: radeon_device pointer 4961 * @reset_mask: mask of which blocks to reset 4962 * 4963 * Soft reset the blocks specified in @reset_mask. 4964 */ 4965 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 4966 { 4967 struct evergreen_mc_save save; 4968 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4969 u32 tmp; 4970 4971 if (reset_mask == 0) 4972 return; 4973 4974 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 4975 4976 cik_print_gpu_status_regs(rdev); 4977 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 4978 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 4979 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 4980 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 4981 4982 /* disable CG/PG */ 4983 cik_fini_pg(rdev); 4984 cik_fini_cg(rdev); 4985 4986 /* stop the rlc */ 4987 cik_rlc_stop(rdev); 4988 4989 /* Disable GFX parsing/prefetching */ 4990 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 4991 4992 /* Disable MEC parsing/prefetching */ 4993 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 4994 4995 if (reset_mask & RADEON_RESET_DMA) { 4996 /* sdma0 */ 4997 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 4998 tmp |= SDMA_HALT; 4999 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5000 } 5001 if (reset_mask & RADEON_RESET_DMA1) { 5002 /* sdma1 */ 5003 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 5004 tmp |= SDMA_HALT; 5005 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5006 } 5007 5008 evergreen_mc_stop(rdev, &save); 5009 if (evergreen_mc_wait_for_idle(rdev)) { 5010 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 5011 } 5012 5013 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 5014 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 5015 5016 if (reset_mask & RADEON_RESET_CP) { 5017 grbm_soft_reset |= SOFT_RESET_CP; 5018 5019 srbm_soft_reset |= SOFT_RESET_GRBM; 5020 } 5021 5022 if (reset_mask & RADEON_RESET_DMA) 5023 srbm_soft_reset |= SOFT_RESET_SDMA; 5024 5025 if (reset_mask & RADEON_RESET_DMA1) 5026 srbm_soft_reset |= SOFT_RESET_SDMA1; 5027 5028 if (reset_mask & RADEON_RESET_DISPLAY) 5029 srbm_soft_reset |= SOFT_RESET_DC; 5030 5031 if (reset_mask & RADEON_RESET_RLC) 5032 grbm_soft_reset |= SOFT_RESET_RLC; 5033 5034 if (reset_mask & RADEON_RESET_SEM) 5035 srbm_soft_reset |= SOFT_RESET_SEM; 5036 5037 if (reset_mask & RADEON_RESET_IH) 5038 srbm_soft_reset |= SOFT_RESET_IH; 5039 5040 if (reset_mask & RADEON_RESET_GRBM) 5041 srbm_soft_reset |= SOFT_RESET_GRBM; 5042 5043 if (reset_mask & RADEON_RESET_VMC) 5044 srbm_soft_reset |= SOFT_RESET_VMC; 5045 5046 if (!(rdev->flags & RADEON_IS_IGP)) { 5047 if (reset_mask & RADEON_RESET_MC) 5048 srbm_soft_reset |= SOFT_RESET_MC; 5049 } 5050 5051 if (grbm_soft_reset) { 5052 tmp = RREG32(GRBM_SOFT_RESET); 5053 tmp |= grbm_soft_reset; 5054 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5055 WREG32(GRBM_SOFT_RESET, tmp); 5056 tmp = RREG32(GRBM_SOFT_RESET); 5057 5058 udelay(50); 5059 5060 tmp &= ~grbm_soft_reset; 5061 WREG32(GRBM_SOFT_RESET, tmp); 5062 tmp = RREG32(GRBM_SOFT_RESET); 5063 } 5064 5065 if (srbm_soft_reset) { 5066 tmp = RREG32(SRBM_SOFT_RESET); 5067 tmp |= srbm_soft_reset; 5068 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5069 WREG32(SRBM_SOFT_RESET, tmp); 5070 tmp = RREG32(SRBM_SOFT_RESET); 5071 5072 udelay(50); 5073 5074 tmp &= ~srbm_soft_reset; 5075 WREG32(SRBM_SOFT_RESET, tmp); 5076 tmp = RREG32(SRBM_SOFT_RESET); 5077 } 5078 5079 /* Wait a little for things to settle down */ 5080 udelay(50); 5081 5082 evergreen_mc_resume(rdev, &save); 5083 udelay(50); 5084 5085 cik_print_gpu_status_regs(rdev); 5086 } 5087 5088 struct kv_reset_save_regs { 5089 u32 gmcon_reng_execute; 5090 u32 gmcon_misc; 5091 u32 gmcon_misc3; 5092 }; 5093 5094 static void kv_save_regs_for_reset(struct radeon_device *rdev, 5095 struct kv_reset_save_regs *save) 5096 { 5097 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE); 5098 save->gmcon_misc = RREG32(GMCON_MISC); 5099 save->gmcon_misc3 = RREG32(GMCON_MISC3); 5100 5101 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP); 5102 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE | 5103 STCTRL_STUTTER_EN)); 5104 } 5105 5106 static void kv_restore_regs_for_reset(struct radeon_device *rdev, 5107 struct kv_reset_save_regs *save) 5108 { 5109 int i; 5110 5111 WREG32(GMCON_PGFSM_WRITE, 0); 5112 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff); 5113 5114 for (i = 0; i < 5; i++) 5115 WREG32(GMCON_PGFSM_WRITE, 0); 5116 5117 WREG32(GMCON_PGFSM_WRITE, 0); 5118 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff); 5119 5120 for (i = 0; i < 5; i++) 5121 WREG32(GMCON_PGFSM_WRITE, 0); 5122 5123 WREG32(GMCON_PGFSM_WRITE, 0x210000); 5124 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff); 5125 5126 for (i = 0; i < 5; i++) 5127 WREG32(GMCON_PGFSM_WRITE, 0); 5128 5129 WREG32(GMCON_PGFSM_WRITE, 0x21003); 5130 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff); 5131 5132 for (i = 0; i < 5; i++) 5133 WREG32(GMCON_PGFSM_WRITE, 0); 5134 5135 WREG32(GMCON_PGFSM_WRITE, 0x2b00); 5136 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff); 5137 5138 for (i = 0; i < 5; i++) 5139 WREG32(GMCON_PGFSM_WRITE, 0); 5140 5141 WREG32(GMCON_PGFSM_WRITE, 0); 5142 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff); 5143 5144 for (i = 0; i < 5; i++) 5145 WREG32(GMCON_PGFSM_WRITE, 0); 5146 5147 WREG32(GMCON_PGFSM_WRITE, 0x420000); 5148 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff); 5149 5150 for (i = 0; i < 5; i++) 5151 WREG32(GMCON_PGFSM_WRITE, 0); 5152 5153 WREG32(GMCON_PGFSM_WRITE, 0x120202); 5154 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff); 5155 5156 for (i = 0; i < 5; i++) 5157 WREG32(GMCON_PGFSM_WRITE, 0); 5158 5159 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36); 5160 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff); 5161 5162 for (i = 0; i < 5; i++) 5163 WREG32(GMCON_PGFSM_WRITE, 0); 5164 5165 WREG32(GMCON_PGFSM_WRITE, 0x373f3e); 5166 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff); 5167 5168 for (i = 0; i < 5; i++) 5169 WREG32(GMCON_PGFSM_WRITE, 0); 5170 5171 WREG32(GMCON_PGFSM_WRITE, 0x3e1332); 5172 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff); 5173 5174 WREG32(GMCON_MISC3, save->gmcon_misc3); 5175 WREG32(GMCON_MISC, save->gmcon_misc); 5176 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute); 5177 } 5178 5179 static void cik_gpu_pci_config_reset(struct radeon_device *rdev) 5180 { 5181 struct evergreen_mc_save save; 5182 struct kv_reset_save_regs kv_save = { 0 }; 5183 u32 tmp, i; 5184 5185 dev_info(rdev->dev, "GPU pci config reset\n"); 5186 5187 /* disable dpm? */ 5188 5189 /* disable cg/pg */ 5190 cik_fini_pg(rdev); 5191 cik_fini_cg(rdev); 5192 5193 /* Disable GFX parsing/prefetching */ 5194 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 5195 5196 /* Disable MEC parsing/prefetching */ 5197 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 5198 5199 /* sdma0 */ 5200 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 5201 tmp |= SDMA_HALT; 5202 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5203 /* sdma1 */ 5204 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 5205 tmp |= SDMA_HALT; 5206 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5207 /* XXX other engines? */ 5208 5209 /* halt the rlc, disable cp internal ints */ 5210 cik_rlc_stop(rdev); 5211 5212 udelay(50); 5213 5214 /* disable mem access */ 5215 evergreen_mc_stop(rdev, &save); 5216 if (evergreen_mc_wait_for_idle(rdev)) { 5217 dev_warn(rdev->dev, "Wait for MC idle timed out !\n"); 5218 } 5219 5220 if (rdev->flags & RADEON_IS_IGP) 5221 kv_save_regs_for_reset(rdev, &kv_save); 5222 5223 /* disable BM */ 5224 pci_clear_master(rdev->pdev); 5225 /* reset */ 5226 radeon_pci_config_reset(rdev); 5227 5228 udelay(100); 5229 5230 /* wait for asic to come out of reset */ 5231 for (i = 0; i < rdev->usec_timeout; i++) { 5232 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff) 5233 break; 5234 udelay(1); 5235 } 5236 5237 /* does asic init need to be run first??? */ 5238 if (rdev->flags & RADEON_IS_IGP) 5239 kv_restore_regs_for_reset(rdev, &kv_save); 5240 } 5241 5242 /** 5243 * cik_asic_reset - soft reset GPU 5244 * 5245 * @rdev: radeon_device pointer 5246 * @hard: force hard reset 5247 * 5248 * Look up which blocks are hung and attempt 5249 * to reset them. 5250 * Returns 0 for success. 5251 */ 5252 int cik_asic_reset(struct radeon_device *rdev, bool hard) 5253 { 5254 u32 reset_mask; 5255 5256 if (hard) { 5257 cik_gpu_pci_config_reset(rdev); 5258 return 0; 5259 } 5260 5261 reset_mask = cik_gpu_check_soft_reset(rdev); 5262 5263 if (reset_mask) 5264 r600_set_bios_scratch_engine_hung(rdev, true); 5265 5266 /* try soft reset */ 5267 cik_gpu_soft_reset(rdev, reset_mask); 5268 5269 reset_mask = cik_gpu_check_soft_reset(rdev); 5270 5271 /* try pci config reset */ 5272 if (reset_mask && radeon_hard_reset) 5273 cik_gpu_pci_config_reset(rdev); 5274 5275 reset_mask = cik_gpu_check_soft_reset(rdev); 5276 5277 if (!reset_mask) 5278 r600_set_bios_scratch_engine_hung(rdev, false); 5279 5280 return 0; 5281 } 5282 5283 /** 5284 * cik_gfx_is_lockup - check if the 3D engine is locked up 5285 * 5286 * @rdev: radeon_device pointer 5287 * @ring: radeon_ring structure holding ring information 5288 * 5289 * Check if the 3D engine is locked up (CIK). 5290 * Returns true if the engine is locked, false if not. 5291 */ 5292 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 5293 { 5294 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 5295 5296 if (!(reset_mask & (RADEON_RESET_GFX | 5297 RADEON_RESET_COMPUTE | 5298 RADEON_RESET_CP))) { 5299 radeon_ring_lockup_update(rdev, ring); 5300 return false; 5301 } 5302 return radeon_ring_test_lockup(rdev, ring); 5303 } 5304 5305 /* MC */ 5306 /** 5307 * cik_mc_program - program the GPU memory controller 5308 * 5309 * @rdev: radeon_device pointer 5310 * 5311 * Set the location of vram, gart, and AGP in the GPU's 5312 * physical address space (CIK). 5313 */ 5314 static void cik_mc_program(struct radeon_device *rdev) 5315 { 5316 struct evergreen_mc_save save; 5317 u32 tmp; 5318 int i, j; 5319 5320 /* Initialize HDP */ 5321 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 5322 WREG32((0x2c14 + j), 0x00000000); 5323 WREG32((0x2c18 + j), 0x00000000); 5324 WREG32((0x2c1c + j), 0x00000000); 5325 WREG32((0x2c20 + j), 0x00000000); 5326 WREG32((0x2c24 + j), 0x00000000); 5327 } 5328 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 5329 5330 evergreen_mc_stop(rdev, &save); 5331 if (radeon_mc_wait_for_idle(rdev)) { 5332 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 5333 } 5334 /* Lockout access through VGA aperture*/ 5335 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 5336 /* Update configuration */ 5337 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 5338 rdev->mc.vram_start >> 12); 5339 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 5340 rdev->mc.vram_end >> 12); 5341 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 5342 rdev->vram_scratch.gpu_addr >> 12); 5343 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 5344 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 5345 WREG32(MC_VM_FB_LOCATION, tmp); 5346 /* XXX double check these! */ 5347 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 5348 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 5349 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 5350 WREG32(MC_VM_AGP_BASE, 0); 5351 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 5352 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 5353 if (radeon_mc_wait_for_idle(rdev)) { 5354 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 5355 } 5356 evergreen_mc_resume(rdev, &save); 5357 /* we need to own VRAM, so turn off the VGA renderer here 5358 * to stop it overwriting our objects */ 5359 rv515_vga_render_disable(rdev); 5360 } 5361 5362 /** 5363 * cik_mc_init - initialize the memory controller driver params 5364 * 5365 * @rdev: radeon_device pointer 5366 * 5367 * Look up the amount of vram, vram width, and decide how to place 5368 * vram and gart within the GPU's physical address space (CIK). 5369 * Returns 0 for success. 5370 */ 5371 static int cik_mc_init(struct radeon_device *rdev) 5372 { 5373 u32 tmp; 5374 int chansize, numchan; 5375 5376 /* Get VRAM informations */ 5377 rdev->mc.vram_is_ddr = true; 5378 tmp = RREG32(MC_ARB_RAMCFG); 5379 if (tmp & CHANSIZE_MASK) { 5380 chansize = 64; 5381 } else { 5382 chansize = 32; 5383 } 5384 tmp = RREG32(MC_SHARED_CHMAP); 5385 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 5386 case 0: 5387 default: 5388 numchan = 1; 5389 break; 5390 case 1: 5391 numchan = 2; 5392 break; 5393 case 2: 5394 numchan = 4; 5395 break; 5396 case 3: 5397 numchan = 8; 5398 break; 5399 case 4: 5400 numchan = 3; 5401 break; 5402 case 5: 5403 numchan = 6; 5404 break; 5405 case 6: 5406 numchan = 10; 5407 break; 5408 case 7: 5409 numchan = 12; 5410 break; 5411 case 8: 5412 numchan = 16; 5413 break; 5414 } 5415 rdev->mc.vram_width = numchan * chansize; 5416 /* Could aper size report 0 ? */ 5417 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 5418 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 5419 /* size in MB on si */ 5420 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 5421 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 5422 rdev->mc.visible_vram_size = rdev->mc.aper_size; 5423 si_vram_gtt_location(rdev, &rdev->mc); 5424 radeon_update_bandwidth_info(rdev); 5425 5426 return 0; 5427 } 5428 5429 /* 5430 * GART 5431 * VMID 0 is the physical GPU addresses as used by the kernel. 5432 * VMIDs 1-15 are used for userspace clients and are handled 5433 * by the radeon vm/hsa code. 5434 */ 5435 /** 5436 * cik_pcie_gart_tlb_flush - gart tlb flush callback 5437 * 5438 * @rdev: radeon_device pointer 5439 * 5440 * Flush the TLB for the VMID 0 page table (CIK). 5441 */ 5442 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 5443 { 5444 /* flush hdp cache */ 5445 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 5446 5447 /* bits 0-15 are the VM contexts0-15 */ 5448 WREG32(VM_INVALIDATE_REQUEST, 0x1); 5449 } 5450 5451 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev) 5452 { 5453 int i; 5454 uint32_t sh_mem_bases, sh_mem_config; 5455 5456 sh_mem_bases = 0x6000 | 0x6000 << 16; 5457 sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); 5458 sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); 5459 5460 mutex_lock(&rdev->srbm_mutex); 5461 for (i = 8; i < 16; i++) { 5462 cik_srbm_select(rdev, 0, 0, 0, i); 5463 /* CP and shaders */ 5464 WREG32(SH_MEM_CONFIG, sh_mem_config); 5465 WREG32(SH_MEM_APE1_BASE, 1); 5466 WREG32(SH_MEM_APE1_LIMIT, 0); 5467 WREG32(SH_MEM_BASES, sh_mem_bases); 5468 } 5469 cik_srbm_select(rdev, 0, 0, 0, 0); 5470 mutex_unlock(&rdev->srbm_mutex); 5471 } 5472 5473 /** 5474 * cik_pcie_gart_enable - gart enable 5475 * 5476 * @rdev: radeon_device pointer 5477 * 5478 * This sets up the TLBs, programs the page tables for VMID0, 5479 * sets up the hw for VMIDs 1-15 which are allocated on 5480 * demand, and sets up the global locations for the LDS, GDS, 5481 * and GPUVM for FSA64 clients (CIK). 5482 * Returns 0 for success, errors for failure. 5483 */ 5484 static int cik_pcie_gart_enable(struct radeon_device *rdev) 5485 { 5486 int r, i; 5487 5488 if (rdev->gart.robj == NULL) { 5489 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 5490 return -EINVAL; 5491 } 5492 r = radeon_gart_table_vram_pin(rdev); 5493 if (r) 5494 return r; 5495 /* Setup TLB control */ 5496 WREG32(MC_VM_MX_L1_TLB_CNTL, 5497 (0xA << 7) | 5498 ENABLE_L1_TLB | 5499 ENABLE_L1_FRAGMENT_PROCESSING | 5500 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 5501 ENABLE_ADVANCED_DRIVER_MODEL | 5502 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 5503 /* Setup L2 cache */ 5504 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 5505 ENABLE_L2_FRAGMENT_PROCESSING | 5506 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 5507 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 5508 EFFECTIVE_L2_QUEUE_SIZE(7) | 5509 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 5510 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 5511 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 5512 BANK_SELECT(4) | 5513 L2_CACHE_BIGK_FRAGMENT_SIZE(4)); 5514 /* setup context0 */ 5515 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 5516 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 5517 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 5518 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 5519 (u32)(rdev->dummy_page.addr >> 12)); 5520 WREG32(VM_CONTEXT0_CNTL2, 0); 5521 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 5522 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 5523 5524 WREG32(0x15D4, 0); 5525 WREG32(0x15D8, 0); 5526 WREG32(0x15DC, 0); 5527 5528 /* restore context1-15 */ 5529 /* set vm size, must be a multiple of 4 */ 5530 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 5531 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1); 5532 for (i = 1; i < 16; i++) { 5533 if (i < 8) 5534 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 5535 rdev->vm_manager.saved_table_addr[i]); 5536 else 5537 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 5538 rdev->vm_manager.saved_table_addr[i]); 5539 } 5540 5541 /* enable context1-15 */ 5542 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 5543 (u32)(rdev->dummy_page.addr >> 12)); 5544 WREG32(VM_CONTEXT1_CNTL2, 4); 5545 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 5546 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) | 5547 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 5548 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 5549 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 5550 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 5551 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 5552 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 5553 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 5554 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 5555 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 5556 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 5557 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 5558 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 5559 5560 if (rdev->family == CHIP_KAVERI) { 5561 u32 tmp = RREG32(CHUB_CONTROL); 5562 tmp &= ~BYPASS_VM; 5563 WREG32(CHUB_CONTROL, tmp); 5564 } 5565 5566 /* XXX SH_MEM regs */ 5567 /* where to put LDS, scratch, GPUVM in FSA64 space */ 5568 mutex_lock(&rdev->srbm_mutex); 5569 for (i = 0; i < 16; i++) { 5570 cik_srbm_select(rdev, 0, 0, 0, i); 5571 /* CP and shaders */ 5572 WREG32(SH_MEM_CONFIG, 0); 5573 WREG32(SH_MEM_APE1_BASE, 1); 5574 WREG32(SH_MEM_APE1_LIMIT, 0); 5575 WREG32(SH_MEM_BASES, 0); 5576 /* SDMA GFX */ 5577 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 5578 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 5579 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 5580 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 5581 /* XXX SDMA RLC - todo */ 5582 } 5583 cik_srbm_select(rdev, 0, 0, 0, 0); 5584 mutex_unlock(&rdev->srbm_mutex); 5585 5586 cik_pcie_init_compute_vmid(rdev); 5587 5588 cik_pcie_gart_tlb_flush(rdev); 5589 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 5590 (unsigned)(rdev->mc.gtt_size >> 20), 5591 (unsigned long long)rdev->gart.table_addr); 5592 rdev->gart.ready = true; 5593 return 0; 5594 } 5595 5596 /** 5597 * cik_pcie_gart_disable - gart disable 5598 * 5599 * @rdev: radeon_device pointer 5600 * 5601 * This disables all VM page table (CIK). 5602 */ 5603 static void cik_pcie_gart_disable(struct radeon_device *rdev) 5604 { 5605 unsigned i; 5606 5607 for (i = 1; i < 16; ++i) { 5608 uint32_t reg; 5609 if (i < 8) 5610 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2); 5611 else 5612 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2); 5613 rdev->vm_manager.saved_table_addr[i] = RREG32(reg); 5614 } 5615 5616 /* Disable all tables */ 5617 WREG32(VM_CONTEXT0_CNTL, 0); 5618 WREG32(VM_CONTEXT1_CNTL, 0); 5619 /* Setup TLB control */ 5620 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 5621 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 5622 /* Setup L2 cache */ 5623 WREG32(VM_L2_CNTL, 5624 ENABLE_L2_FRAGMENT_PROCESSING | 5625 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 5626 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 5627 EFFECTIVE_L2_QUEUE_SIZE(7) | 5628 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 5629 WREG32(VM_L2_CNTL2, 0); 5630 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 5631 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 5632 radeon_gart_table_vram_unpin(rdev); 5633 } 5634 5635 /** 5636 * cik_pcie_gart_fini - vm fini callback 5637 * 5638 * @rdev: radeon_device pointer 5639 * 5640 * Tears down the driver GART/VM setup (CIK). 5641 */ 5642 static void cik_pcie_gart_fini(struct radeon_device *rdev) 5643 { 5644 cik_pcie_gart_disable(rdev); 5645 radeon_gart_table_vram_free(rdev); 5646 radeon_gart_fini(rdev); 5647 } 5648 5649 /* vm parser */ 5650 /** 5651 * cik_ib_parse - vm ib_parse callback 5652 * 5653 * @rdev: radeon_device pointer 5654 * @ib: indirect buffer pointer 5655 * 5656 * CIK uses hw IB checking so this is a nop (CIK). 5657 */ 5658 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 5659 { 5660 return 0; 5661 } 5662 5663 /* 5664 * vm 5665 * VMID 0 is the physical GPU addresses as used by the kernel. 5666 * VMIDs 1-15 are used for userspace clients and are handled 5667 * by the radeon vm/hsa code. 5668 */ 5669 /** 5670 * cik_vm_init - cik vm init callback 5671 * 5672 * @rdev: radeon_device pointer 5673 * 5674 * Inits cik specific vm parameters (number of VMs, base of vram for 5675 * VMIDs 1-15) (CIK). 5676 * Returns 0 for success. 5677 */ 5678 int cik_vm_init(struct radeon_device *rdev) 5679 { 5680 /* 5681 * number of VMs 5682 * VMID 0 is reserved for System 5683 * radeon graphics/compute will use VMIDs 1-7 5684 * amdkfd will use VMIDs 8-15 5685 */ 5686 rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; 5687 /* base offset of vram pages */ 5688 if (rdev->flags & RADEON_IS_IGP) { 5689 u64 tmp = RREG32(MC_VM_FB_OFFSET); 5690 tmp <<= 22; 5691 rdev->vm_manager.vram_base_offset = tmp; 5692 } else 5693 rdev->vm_manager.vram_base_offset = 0; 5694 5695 return 0; 5696 } 5697 5698 /** 5699 * cik_vm_fini - cik vm fini callback 5700 * 5701 * @rdev: radeon_device pointer 5702 * 5703 * Tear down any asic specific VM setup (CIK). 5704 */ 5705 void cik_vm_fini(struct radeon_device *rdev) 5706 { 5707 } 5708 5709 /** 5710 * cik_vm_decode_fault - print human readable fault info 5711 * 5712 * @rdev: radeon_device pointer 5713 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 5714 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 5715 * 5716 * Print human readable fault information (CIK). 5717 */ 5718 static void cik_vm_decode_fault(struct radeon_device *rdev, 5719 u32 status, u32 addr, u32 mc_client) 5720 { 5721 u32 mc_id; 5722 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 5723 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 5724 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 5725 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 5726 5727 if (rdev->family == CHIP_HAWAII) 5728 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 5729 else 5730 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 5731 5732 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 5733 protections, vmid, addr, 5734 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 5735 block, mc_client, mc_id); 5736 } 5737 5738 /** 5739 * cik_vm_flush - cik vm flush using the CP 5740 * 5741 * @rdev: radeon_device pointer 5742 * 5743 * Update the page table base and flush the VM TLB 5744 * using the CP (CIK). 5745 */ 5746 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 5747 unsigned vm_id, uint64_t pd_addr) 5748 { 5749 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX); 5750 5751 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5752 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5753 WRITE_DATA_DST_SEL(0))); 5754 if (vm_id < 8) { 5755 radeon_ring_write(ring, 5756 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); 5757 } else { 5758 radeon_ring_write(ring, 5759 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); 5760 } 5761 radeon_ring_write(ring, 0); 5762 radeon_ring_write(ring, pd_addr >> 12); 5763 5764 /* update SH_MEM_* regs */ 5765 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5766 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5767 WRITE_DATA_DST_SEL(0))); 5768 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 5769 radeon_ring_write(ring, 0); 5770 radeon_ring_write(ring, VMID(vm_id)); 5771 5772 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 5773 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5774 WRITE_DATA_DST_SEL(0))); 5775 radeon_ring_write(ring, SH_MEM_BASES >> 2); 5776 radeon_ring_write(ring, 0); 5777 5778 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 5779 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 5780 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 5781 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 5782 5783 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5784 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5785 WRITE_DATA_DST_SEL(0))); 5786 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 5787 radeon_ring_write(ring, 0); 5788 radeon_ring_write(ring, VMID(0)); 5789 5790 /* HDP flush */ 5791 cik_hdp_flush_cp_ring_emit(rdev, ring->idx); 5792 5793 /* bits 0-15 are the VM contexts0-15 */ 5794 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5795 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5796 WRITE_DATA_DST_SEL(0))); 5797 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 5798 radeon_ring_write(ring, 0); 5799 radeon_ring_write(ring, 1 << vm_id); 5800 5801 /* wait for the invalidate to complete */ 5802 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5803 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 5804 WAIT_REG_MEM_FUNCTION(0) | /* always */ 5805 WAIT_REG_MEM_ENGINE(0))); /* me */ 5806 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 5807 radeon_ring_write(ring, 0); 5808 radeon_ring_write(ring, 0); /* ref */ 5809 radeon_ring_write(ring, 0); /* mask */ 5810 radeon_ring_write(ring, 0x20); /* poll interval */ 5811 5812 /* compute doesn't have PFP */ 5813 if (usepfp) { 5814 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5815 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5816 radeon_ring_write(ring, 0x0); 5817 } 5818 } 5819 5820 /* 5821 * RLC 5822 * The RLC is a multi-purpose microengine that handles a 5823 * variety of functions, the most important of which is 5824 * the interrupt controller. 5825 */ 5826 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 5827 bool enable) 5828 { 5829 u32 tmp = RREG32(CP_INT_CNTL_RING0); 5830 5831 if (enable) 5832 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5833 else 5834 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5835 WREG32(CP_INT_CNTL_RING0, tmp); 5836 } 5837 5838 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) 5839 { 5840 u32 tmp; 5841 5842 tmp = RREG32(RLC_LB_CNTL); 5843 if (enable) 5844 tmp |= LOAD_BALANCE_ENABLE; 5845 else 5846 tmp &= ~LOAD_BALANCE_ENABLE; 5847 WREG32(RLC_LB_CNTL, tmp); 5848 } 5849 5850 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) 5851 { 5852 u32 i, j, k; 5853 u32 mask; 5854 5855 mutex_lock(&rdev->grbm_idx_mutex); 5856 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 5857 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 5858 cik_select_se_sh(rdev, i, j); 5859 for (k = 0; k < rdev->usec_timeout; k++) { 5860 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 5861 break; 5862 udelay(1); 5863 } 5864 } 5865 } 5866 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5867 mutex_unlock(&rdev->grbm_idx_mutex); 5868 5869 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 5870 for (k = 0; k < rdev->usec_timeout; k++) { 5871 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 5872 break; 5873 udelay(1); 5874 } 5875 } 5876 5877 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) 5878 { 5879 u32 tmp; 5880 5881 tmp = RREG32(RLC_CNTL); 5882 if (tmp != rlc) 5883 WREG32(RLC_CNTL, rlc); 5884 } 5885 5886 static u32 cik_halt_rlc(struct radeon_device *rdev) 5887 { 5888 u32 data, orig; 5889 5890 orig = data = RREG32(RLC_CNTL); 5891 5892 if (data & RLC_ENABLE) { 5893 u32 i; 5894 5895 data &= ~RLC_ENABLE; 5896 WREG32(RLC_CNTL, data); 5897 5898 for (i = 0; i < rdev->usec_timeout; i++) { 5899 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) 5900 break; 5901 udelay(1); 5902 } 5903 5904 cik_wait_for_rlc_serdes(rdev); 5905 } 5906 5907 return orig; 5908 } 5909 5910 void cik_enter_rlc_safe_mode(struct radeon_device *rdev) 5911 { 5912 u32 tmp, i, mask; 5913 5914 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); 5915 WREG32(RLC_GPR_REG2, tmp); 5916 5917 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; 5918 for (i = 0; i < rdev->usec_timeout; i++) { 5919 if ((RREG32(RLC_GPM_STAT) & mask) == mask) 5920 break; 5921 udelay(1); 5922 } 5923 5924 for (i = 0; i < rdev->usec_timeout; i++) { 5925 if ((RREG32(RLC_GPR_REG2) & REQ) == 0) 5926 break; 5927 udelay(1); 5928 } 5929 } 5930 5931 void cik_exit_rlc_safe_mode(struct radeon_device *rdev) 5932 { 5933 u32 tmp; 5934 5935 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); 5936 WREG32(RLC_GPR_REG2, tmp); 5937 } 5938 5939 /** 5940 * cik_rlc_stop - stop the RLC ME 5941 * 5942 * @rdev: radeon_device pointer 5943 * 5944 * Halt the RLC ME (MicroEngine) (CIK). 5945 */ 5946 static void cik_rlc_stop(struct radeon_device *rdev) 5947 { 5948 WREG32(RLC_CNTL, 0); 5949 5950 cik_enable_gui_idle_interrupt(rdev, false); 5951 5952 cik_wait_for_rlc_serdes(rdev); 5953 } 5954 5955 /** 5956 * cik_rlc_start - start the RLC ME 5957 * 5958 * @rdev: radeon_device pointer 5959 * 5960 * Unhalt the RLC ME (MicroEngine) (CIK). 5961 */ 5962 static void cik_rlc_start(struct radeon_device *rdev) 5963 { 5964 WREG32(RLC_CNTL, RLC_ENABLE); 5965 5966 cik_enable_gui_idle_interrupt(rdev, true); 5967 5968 udelay(50); 5969 } 5970 5971 /** 5972 * cik_rlc_resume - setup the RLC hw 5973 * 5974 * @rdev: radeon_device pointer 5975 * 5976 * Initialize the RLC registers, load the ucode, 5977 * and start the RLC (CIK). 5978 * Returns 0 for success, -EINVAL if the ucode is not available. 5979 */ 5980 static int cik_rlc_resume(struct radeon_device *rdev) 5981 { 5982 u32 i, size, tmp; 5983 5984 if (!rdev->rlc_fw) 5985 return -EINVAL; 5986 5987 cik_rlc_stop(rdev); 5988 5989 /* disable CG */ 5990 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 5991 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 5992 5993 si_rlc_reset(rdev); 5994 5995 cik_init_pg(rdev); 5996 5997 cik_init_cg(rdev); 5998 5999 WREG32(RLC_LB_CNTR_INIT, 0); 6000 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 6001 6002 mutex_lock(&rdev->grbm_idx_mutex); 6003 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6004 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 6005 WREG32(RLC_LB_PARAMS, 0x00600408); 6006 WREG32(RLC_LB_CNTL, 0x80000004); 6007 mutex_unlock(&rdev->grbm_idx_mutex); 6008 6009 WREG32(RLC_MC_CNTL, 0); 6010 WREG32(RLC_UCODE_CNTL, 0); 6011 6012 if (rdev->new_fw) { 6013 const struct rlc_firmware_header_v1_0 *hdr = 6014 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data; 6015 const __le32 *fw_data = (const __le32 *) 6016 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 6017 6018 radeon_ucode_print_rlc_hdr(&hdr->header); 6019 6020 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 6021 WREG32(RLC_GPM_UCODE_ADDR, 0); 6022 for (i = 0; i < size; i++) 6023 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 6024 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version)); 6025 } else { 6026 const __be32 *fw_data; 6027 6028 switch (rdev->family) { 6029 case CHIP_BONAIRE: 6030 case CHIP_HAWAII: 6031 default: 6032 size = BONAIRE_RLC_UCODE_SIZE; 6033 break; 6034 case CHIP_KAVERI: 6035 size = KV_RLC_UCODE_SIZE; 6036 break; 6037 case CHIP_KABINI: 6038 size = KB_RLC_UCODE_SIZE; 6039 break; 6040 case CHIP_MULLINS: 6041 size = ML_RLC_UCODE_SIZE; 6042 break; 6043 } 6044 6045 fw_data = (const __be32 *)rdev->rlc_fw->data; 6046 WREG32(RLC_GPM_UCODE_ADDR, 0); 6047 for (i = 0; i < size; i++) 6048 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 6049 WREG32(RLC_GPM_UCODE_ADDR, 0); 6050 } 6051 6052 /* XXX - find out what chips support lbpw */ 6053 cik_enable_lbpw(rdev, false); 6054 6055 if (rdev->family == CHIP_BONAIRE) 6056 WREG32(RLC_DRIVER_DMA_STATUS, 0); 6057 6058 cik_rlc_start(rdev); 6059 6060 return 0; 6061 } 6062 6063 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) 6064 { 6065 u32 data, orig, tmp, tmp2; 6066 6067 orig = data = RREG32(RLC_CGCG_CGLS_CTRL); 6068 6069 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { 6070 cik_enable_gui_idle_interrupt(rdev, true); 6071 6072 tmp = cik_halt_rlc(rdev); 6073 6074 mutex_lock(&rdev->grbm_idx_mutex); 6075 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6076 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 6077 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 6078 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; 6079 WREG32(RLC_SERDES_WR_CTRL, tmp2); 6080 mutex_unlock(&rdev->grbm_idx_mutex); 6081 6082 cik_update_rlc(rdev, tmp); 6083 6084 data |= CGCG_EN | CGLS_EN; 6085 } else { 6086 cik_enable_gui_idle_interrupt(rdev, false); 6087 6088 RREG32(CB_CGTT_SCLK_CTRL); 6089 RREG32(CB_CGTT_SCLK_CTRL); 6090 RREG32(CB_CGTT_SCLK_CTRL); 6091 RREG32(CB_CGTT_SCLK_CTRL); 6092 6093 data &= ~(CGCG_EN | CGLS_EN); 6094 } 6095 6096 if (orig != data) 6097 WREG32(RLC_CGCG_CGLS_CTRL, data); 6098 6099 } 6100 6101 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) 6102 { 6103 u32 data, orig, tmp = 0; 6104 6105 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { 6106 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { 6107 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { 6108 orig = data = RREG32(CP_MEM_SLP_CNTL); 6109 data |= CP_MEM_LS_EN; 6110 if (orig != data) 6111 WREG32(CP_MEM_SLP_CNTL, data); 6112 } 6113 } 6114 6115 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 6116 data |= 0x00000001; 6117 data &= 0xfffffffd; 6118 if (orig != data) 6119 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 6120 6121 tmp = cik_halt_rlc(rdev); 6122 6123 mutex_lock(&rdev->grbm_idx_mutex); 6124 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6125 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 6126 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 6127 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; 6128 WREG32(RLC_SERDES_WR_CTRL, data); 6129 mutex_unlock(&rdev->grbm_idx_mutex); 6130 6131 cik_update_rlc(rdev, tmp); 6132 6133 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { 6134 orig = data = RREG32(CGTS_SM_CTRL_REG); 6135 data &= ~SM_MODE_MASK; 6136 data |= SM_MODE(0x2); 6137 data |= SM_MODE_ENABLE; 6138 data &= ~CGTS_OVERRIDE; 6139 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && 6140 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) 6141 data &= ~CGTS_LS_OVERRIDE; 6142 data &= ~ON_MONITOR_ADD_MASK; 6143 data |= ON_MONITOR_ADD_EN; 6144 data |= ON_MONITOR_ADD(0x96); 6145 if (orig != data) 6146 WREG32(CGTS_SM_CTRL_REG, data); 6147 } 6148 } else { 6149 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 6150 data |= 0x00000003; 6151 if (orig != data) 6152 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 6153 6154 data = RREG32(RLC_MEM_SLP_CNTL); 6155 if (data & RLC_MEM_LS_EN) { 6156 data &= ~RLC_MEM_LS_EN; 6157 WREG32(RLC_MEM_SLP_CNTL, data); 6158 } 6159 6160 data = RREG32(CP_MEM_SLP_CNTL); 6161 if (data & CP_MEM_LS_EN) { 6162 data &= ~CP_MEM_LS_EN; 6163 WREG32(CP_MEM_SLP_CNTL, data); 6164 } 6165 6166 orig = data = RREG32(CGTS_SM_CTRL_REG); 6167 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; 6168 if (orig != data) 6169 WREG32(CGTS_SM_CTRL_REG, data); 6170 6171 tmp = cik_halt_rlc(rdev); 6172 6173 mutex_lock(&rdev->grbm_idx_mutex); 6174 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6175 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 6176 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 6177 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; 6178 WREG32(RLC_SERDES_WR_CTRL, data); 6179 mutex_unlock(&rdev->grbm_idx_mutex); 6180 6181 cik_update_rlc(rdev, tmp); 6182 } 6183 } 6184 6185 static const u32 mc_cg_registers[] = 6186 { 6187 MC_HUB_MISC_HUB_CG, 6188 MC_HUB_MISC_SIP_CG, 6189 MC_HUB_MISC_VM_CG, 6190 MC_XPB_CLK_GAT, 6191 ATC_MISC_CG, 6192 MC_CITF_MISC_WR_CG, 6193 MC_CITF_MISC_RD_CG, 6194 MC_CITF_MISC_VM_CG, 6195 VM_L2_CG, 6196 }; 6197 6198 static void cik_enable_mc_ls(struct radeon_device *rdev, 6199 bool enable) 6200 { 6201 int i; 6202 u32 orig, data; 6203 6204 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 6205 orig = data = RREG32(mc_cg_registers[i]); 6206 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) 6207 data |= MC_LS_ENABLE; 6208 else 6209 data &= ~MC_LS_ENABLE; 6210 if (data != orig) 6211 WREG32(mc_cg_registers[i], data); 6212 } 6213 } 6214 6215 static void cik_enable_mc_mgcg(struct radeon_device *rdev, 6216 bool enable) 6217 { 6218 int i; 6219 u32 orig, data; 6220 6221 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 6222 orig = data = RREG32(mc_cg_registers[i]); 6223 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) 6224 data |= MC_CG_ENABLE; 6225 else 6226 data &= ~MC_CG_ENABLE; 6227 if (data != orig) 6228 WREG32(mc_cg_registers[i], data); 6229 } 6230 } 6231 6232 static void cik_enable_sdma_mgcg(struct radeon_device *rdev, 6233 bool enable) 6234 { 6235 u32 orig, data; 6236 6237 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { 6238 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); 6239 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); 6240 } else { 6241 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); 6242 data |= 0xff000000; 6243 if (data != orig) 6244 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); 6245 6246 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); 6247 data |= 0xff000000; 6248 if (data != orig) 6249 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); 6250 } 6251 } 6252 6253 static void cik_enable_sdma_mgls(struct radeon_device *rdev, 6254 bool enable) 6255 { 6256 u32 orig, data; 6257 6258 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { 6259 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 6260 data |= 0x100; 6261 if (orig != data) 6262 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 6263 6264 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 6265 data |= 0x100; 6266 if (orig != data) 6267 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 6268 } else { 6269 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 6270 data &= ~0x100; 6271 if (orig != data) 6272 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 6273 6274 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 6275 data &= ~0x100; 6276 if (orig != data) 6277 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 6278 } 6279 } 6280 6281 static void cik_enable_uvd_mgcg(struct radeon_device *rdev, 6282 bool enable) 6283 { 6284 u32 orig, data; 6285 6286 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { 6287 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 6288 data = 0xfff; 6289 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 6290 6291 orig = data = RREG32(UVD_CGC_CTRL); 6292 data |= DCM; 6293 if (orig != data) 6294 WREG32(UVD_CGC_CTRL, data); 6295 } else { 6296 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 6297 data &= ~0xfff; 6298 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 6299 6300 orig = data = RREG32(UVD_CGC_CTRL); 6301 data &= ~DCM; 6302 if (orig != data) 6303 WREG32(UVD_CGC_CTRL, data); 6304 } 6305 } 6306 6307 static void cik_enable_bif_mgls(struct radeon_device *rdev, 6308 bool enable) 6309 { 6310 u32 orig, data; 6311 6312 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 6313 6314 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) 6315 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | 6316 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; 6317 else 6318 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | 6319 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); 6320 6321 if (orig != data) 6322 WREG32_PCIE_PORT(PCIE_CNTL2, data); 6323 } 6324 6325 static void cik_enable_hdp_mgcg(struct radeon_device *rdev, 6326 bool enable) 6327 { 6328 u32 orig, data; 6329 6330 orig = data = RREG32(HDP_HOST_PATH_CNTL); 6331 6332 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) 6333 data &= ~CLOCK_GATING_DIS; 6334 else 6335 data |= CLOCK_GATING_DIS; 6336 6337 if (orig != data) 6338 WREG32(HDP_HOST_PATH_CNTL, data); 6339 } 6340 6341 static void cik_enable_hdp_ls(struct radeon_device *rdev, 6342 bool enable) 6343 { 6344 u32 orig, data; 6345 6346 orig = data = RREG32(HDP_MEM_POWER_LS); 6347 6348 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) 6349 data |= HDP_LS_ENABLE; 6350 else 6351 data &= ~HDP_LS_ENABLE; 6352 6353 if (orig != data) 6354 WREG32(HDP_MEM_POWER_LS, data); 6355 } 6356 6357 void cik_update_cg(struct radeon_device *rdev, 6358 u32 block, bool enable) 6359 { 6360 6361 if (block & RADEON_CG_BLOCK_GFX) { 6362 cik_enable_gui_idle_interrupt(rdev, false); 6363 /* order matters! */ 6364 if (enable) { 6365 cik_enable_mgcg(rdev, true); 6366 cik_enable_cgcg(rdev, true); 6367 } else { 6368 cik_enable_cgcg(rdev, false); 6369 cik_enable_mgcg(rdev, false); 6370 } 6371 cik_enable_gui_idle_interrupt(rdev, true); 6372 } 6373 6374 if (block & RADEON_CG_BLOCK_MC) { 6375 if (!(rdev->flags & RADEON_IS_IGP)) { 6376 cik_enable_mc_mgcg(rdev, enable); 6377 cik_enable_mc_ls(rdev, enable); 6378 } 6379 } 6380 6381 if (block & RADEON_CG_BLOCK_SDMA) { 6382 cik_enable_sdma_mgcg(rdev, enable); 6383 cik_enable_sdma_mgls(rdev, enable); 6384 } 6385 6386 if (block & RADEON_CG_BLOCK_BIF) { 6387 cik_enable_bif_mgls(rdev, enable); 6388 } 6389 6390 if (block & RADEON_CG_BLOCK_UVD) { 6391 if (rdev->has_uvd) 6392 cik_enable_uvd_mgcg(rdev, enable); 6393 } 6394 6395 if (block & RADEON_CG_BLOCK_HDP) { 6396 cik_enable_hdp_mgcg(rdev, enable); 6397 cik_enable_hdp_ls(rdev, enable); 6398 } 6399 6400 if (block & RADEON_CG_BLOCK_VCE) { 6401 vce_v2_0_enable_mgcg(rdev, enable); 6402 } 6403 } 6404 6405 static void cik_init_cg(struct radeon_device *rdev) 6406 { 6407 6408 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); 6409 6410 if (rdev->has_uvd) 6411 si_init_uvd_internal_cg(rdev); 6412 6413 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 6414 RADEON_CG_BLOCK_SDMA | 6415 RADEON_CG_BLOCK_BIF | 6416 RADEON_CG_BLOCK_UVD | 6417 RADEON_CG_BLOCK_HDP), true); 6418 } 6419 6420 static void cik_fini_cg(struct radeon_device *rdev) 6421 { 6422 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 6423 RADEON_CG_BLOCK_SDMA | 6424 RADEON_CG_BLOCK_BIF | 6425 RADEON_CG_BLOCK_UVD | 6426 RADEON_CG_BLOCK_HDP), false); 6427 6428 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); 6429 } 6430 6431 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, 6432 bool enable) 6433 { 6434 u32 data, orig; 6435 6436 orig = data = RREG32(RLC_PG_CNTL); 6437 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 6438 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 6439 else 6440 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 6441 if (orig != data) 6442 WREG32(RLC_PG_CNTL, data); 6443 } 6444 6445 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, 6446 bool enable) 6447 { 6448 u32 data, orig; 6449 6450 orig = data = RREG32(RLC_PG_CNTL); 6451 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 6452 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 6453 else 6454 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 6455 if (orig != data) 6456 WREG32(RLC_PG_CNTL, data); 6457 } 6458 6459 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) 6460 { 6461 u32 data, orig; 6462 6463 orig = data = RREG32(RLC_PG_CNTL); 6464 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) 6465 data &= ~DISABLE_CP_PG; 6466 else 6467 data |= DISABLE_CP_PG; 6468 if (orig != data) 6469 WREG32(RLC_PG_CNTL, data); 6470 } 6471 6472 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) 6473 { 6474 u32 data, orig; 6475 6476 orig = data = RREG32(RLC_PG_CNTL); 6477 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) 6478 data &= ~DISABLE_GDS_PG; 6479 else 6480 data |= DISABLE_GDS_PG; 6481 if (orig != data) 6482 WREG32(RLC_PG_CNTL, data); 6483 } 6484 6485 #define CP_ME_TABLE_SIZE 96 6486 #define CP_ME_TABLE_OFFSET 2048 6487 #define CP_MEC_TABLE_OFFSET 4096 6488 6489 void cik_init_cp_pg_table(struct radeon_device *rdev) 6490 { 6491 volatile u32 *dst_ptr; 6492 int me, i, max_me = 4; 6493 u32 bo_offset = 0; 6494 u32 table_offset, table_size; 6495 6496 if (rdev->family == CHIP_KAVERI) 6497 max_me = 5; 6498 6499 if (rdev->rlc.cp_table_ptr == NULL) 6500 return; 6501 6502 /* write the cp table buffer */ 6503 dst_ptr = rdev->rlc.cp_table_ptr; 6504 for (me = 0; me < max_me; me++) { 6505 if (rdev->new_fw) { 6506 const __le32 *fw_data; 6507 const struct gfx_firmware_header_v1_0 *hdr; 6508 6509 if (me == 0) { 6510 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data; 6511 fw_data = (const __le32 *) 6512 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 6513 table_offset = le32_to_cpu(hdr->jt_offset); 6514 table_size = le32_to_cpu(hdr->jt_size); 6515 } else if (me == 1) { 6516 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data; 6517 fw_data = (const __le32 *) 6518 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 6519 table_offset = le32_to_cpu(hdr->jt_offset); 6520 table_size = le32_to_cpu(hdr->jt_size); 6521 } else if (me == 2) { 6522 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data; 6523 fw_data = (const __le32 *) 6524 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 6525 table_offset = le32_to_cpu(hdr->jt_offset); 6526 table_size = le32_to_cpu(hdr->jt_size); 6527 } else if (me == 3) { 6528 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data; 6529 fw_data = (const __le32 *) 6530 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 6531 table_offset = le32_to_cpu(hdr->jt_offset); 6532 table_size = le32_to_cpu(hdr->jt_size); 6533 } else { 6534 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data; 6535 fw_data = (const __le32 *) 6536 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 6537 table_offset = le32_to_cpu(hdr->jt_offset); 6538 table_size = le32_to_cpu(hdr->jt_size); 6539 } 6540 6541 for (i = 0; i < table_size; i ++) { 6542 dst_ptr[bo_offset + i] = 6543 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 6544 } 6545 bo_offset += table_size; 6546 } else { 6547 const __be32 *fw_data; 6548 table_size = CP_ME_TABLE_SIZE; 6549 6550 if (me == 0) { 6551 fw_data = (const __be32 *)rdev->ce_fw->data; 6552 table_offset = CP_ME_TABLE_OFFSET; 6553 } else if (me == 1) { 6554 fw_data = (const __be32 *)rdev->pfp_fw->data; 6555 table_offset = CP_ME_TABLE_OFFSET; 6556 } else if (me == 2) { 6557 fw_data = (const __be32 *)rdev->me_fw->data; 6558 table_offset = CP_ME_TABLE_OFFSET; 6559 } else { 6560 fw_data = (const __be32 *)rdev->mec_fw->data; 6561 table_offset = CP_MEC_TABLE_OFFSET; 6562 } 6563 6564 for (i = 0; i < table_size; i ++) { 6565 dst_ptr[bo_offset + i] = 6566 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i])); 6567 } 6568 bo_offset += table_size; 6569 } 6570 } 6571 } 6572 6573 static void cik_enable_gfx_cgpg(struct radeon_device *rdev, 6574 bool enable) 6575 { 6576 u32 data, orig; 6577 6578 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { 6579 orig = data = RREG32(RLC_PG_CNTL); 6580 data |= GFX_PG_ENABLE; 6581 if (orig != data) 6582 WREG32(RLC_PG_CNTL, data); 6583 6584 orig = data = RREG32(RLC_AUTO_PG_CTRL); 6585 data |= AUTO_PG_EN; 6586 if (orig != data) 6587 WREG32(RLC_AUTO_PG_CTRL, data); 6588 } else { 6589 orig = data = RREG32(RLC_PG_CNTL); 6590 data &= ~GFX_PG_ENABLE; 6591 if (orig != data) 6592 WREG32(RLC_PG_CNTL, data); 6593 6594 orig = data = RREG32(RLC_AUTO_PG_CTRL); 6595 data &= ~AUTO_PG_EN; 6596 if (orig != data) 6597 WREG32(RLC_AUTO_PG_CTRL, data); 6598 6599 data = RREG32(DB_RENDER_CONTROL); 6600 } 6601 } 6602 6603 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) 6604 { 6605 u32 mask = 0, tmp, tmp1; 6606 int i; 6607 6608 mutex_lock(&rdev->grbm_idx_mutex); 6609 cik_select_se_sh(rdev, se, sh); 6610 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); 6611 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); 6612 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6613 mutex_unlock(&rdev->grbm_idx_mutex); 6614 6615 tmp &= 0xffff0000; 6616 6617 tmp |= tmp1; 6618 tmp >>= 16; 6619 6620 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { 6621 mask <<= 1; 6622 mask |= 1; 6623 } 6624 6625 return (~tmp) & mask; 6626 } 6627 6628 static void cik_init_ao_cu_mask(struct radeon_device *rdev) 6629 { 6630 u32 i, j, k, active_cu_number = 0; 6631 u32 mask, counter, cu_bitmap; 6632 u32 tmp = 0; 6633 6634 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 6635 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 6636 mask = 1; 6637 cu_bitmap = 0; 6638 counter = 0; 6639 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { 6640 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { 6641 if (counter < 2) 6642 cu_bitmap |= mask; 6643 counter ++; 6644 } 6645 mask <<= 1; 6646 } 6647 6648 active_cu_number += counter; 6649 tmp |= (cu_bitmap << (i * 16 + j * 8)); 6650 } 6651 } 6652 6653 WREG32(RLC_PG_AO_CU_MASK, tmp); 6654 6655 tmp = RREG32(RLC_MAX_PG_CU); 6656 tmp &= ~MAX_PU_CU_MASK; 6657 tmp |= MAX_PU_CU(active_cu_number); 6658 WREG32(RLC_MAX_PG_CU, tmp); 6659 } 6660 6661 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, 6662 bool enable) 6663 { 6664 u32 data, orig; 6665 6666 orig = data = RREG32(RLC_PG_CNTL); 6667 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) 6668 data |= STATIC_PER_CU_PG_ENABLE; 6669 else 6670 data &= ~STATIC_PER_CU_PG_ENABLE; 6671 if (orig != data) 6672 WREG32(RLC_PG_CNTL, data); 6673 } 6674 6675 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, 6676 bool enable) 6677 { 6678 u32 data, orig; 6679 6680 orig = data = RREG32(RLC_PG_CNTL); 6681 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) 6682 data |= DYN_PER_CU_PG_ENABLE; 6683 else 6684 data &= ~DYN_PER_CU_PG_ENABLE; 6685 if (orig != data) 6686 WREG32(RLC_PG_CNTL, data); 6687 } 6688 6689 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 6690 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D 6691 6692 static void cik_init_gfx_cgpg(struct radeon_device *rdev) 6693 { 6694 u32 data, orig; 6695 u32 i; 6696 6697 if (rdev->rlc.cs_data) { 6698 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 6699 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); 6700 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); 6701 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); 6702 } else { 6703 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 6704 for (i = 0; i < 3; i++) 6705 WREG32(RLC_GPM_SCRATCH_DATA, 0); 6706 } 6707 if (rdev->rlc.reg_list) { 6708 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); 6709 for (i = 0; i < rdev->rlc.reg_list_size; i++) 6710 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); 6711 } 6712 6713 orig = data = RREG32(RLC_PG_CNTL); 6714 data |= GFX_PG_SRC; 6715 if (orig != data) 6716 WREG32(RLC_PG_CNTL, data); 6717 6718 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); 6719 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); 6720 6721 data = RREG32(CP_RB_WPTR_POLL_CNTL); 6722 data &= ~IDLE_POLL_COUNT_MASK; 6723 data |= IDLE_POLL_COUNT(0x60); 6724 WREG32(CP_RB_WPTR_POLL_CNTL, data); 6725 6726 data = 0x10101010; 6727 WREG32(RLC_PG_DELAY, data); 6728 6729 data = RREG32(RLC_PG_DELAY_2); 6730 data &= ~0xff; 6731 data |= 0x3; 6732 WREG32(RLC_PG_DELAY_2, data); 6733 6734 data = RREG32(RLC_AUTO_PG_CTRL); 6735 data &= ~GRBM_REG_SGIT_MASK; 6736 data |= GRBM_REG_SGIT(0x700); 6737 WREG32(RLC_AUTO_PG_CTRL, data); 6738 6739 } 6740 6741 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) 6742 { 6743 cik_enable_gfx_cgpg(rdev, enable); 6744 cik_enable_gfx_static_mgpg(rdev, enable); 6745 cik_enable_gfx_dynamic_mgpg(rdev, enable); 6746 } 6747 6748 u32 cik_get_csb_size(struct radeon_device *rdev) 6749 { 6750 u32 count = 0; 6751 const struct cs_section_def *sect = NULL; 6752 const struct cs_extent_def *ext = NULL; 6753 6754 if (rdev->rlc.cs_data == NULL) 6755 return 0; 6756 6757 /* begin clear state */ 6758 count += 2; 6759 /* context control state */ 6760 count += 3; 6761 6762 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 6763 for (ext = sect->section; ext->extent != NULL; ++ext) { 6764 if (sect->id == SECT_CONTEXT) 6765 count += 2 + ext->reg_count; 6766 else 6767 return 0; 6768 } 6769 } 6770 /* pa_sc_raster_config/pa_sc_raster_config1 */ 6771 count += 4; 6772 /* end clear state */ 6773 count += 2; 6774 /* clear state */ 6775 count += 2; 6776 6777 return count; 6778 } 6779 6780 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) 6781 { 6782 u32 count = 0, i; 6783 const struct cs_section_def *sect = NULL; 6784 const struct cs_extent_def *ext = NULL; 6785 6786 if (rdev->rlc.cs_data == NULL) 6787 return; 6788 if (buffer == NULL) 6789 return; 6790 6791 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 6792 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 6793 6794 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6795 buffer[count++] = cpu_to_le32(0x80000000); 6796 buffer[count++] = cpu_to_le32(0x80000000); 6797 6798 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 6799 for (ext = sect->section; ext->extent != NULL; ++ext) { 6800 if (sect->id == SECT_CONTEXT) { 6801 buffer[count++] = 6802 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 6803 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000); 6804 for (i = 0; i < ext->reg_count; i++) 6805 buffer[count++] = cpu_to_le32(ext->extent[i]); 6806 } else { 6807 return; 6808 } 6809 } 6810 } 6811 6812 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 6813 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 6814 switch (rdev->family) { 6815 case CHIP_BONAIRE: 6816 buffer[count++] = cpu_to_le32(0x16000012); 6817 buffer[count++] = cpu_to_le32(0x00000000); 6818 break; 6819 case CHIP_KAVERI: 6820 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ 6821 buffer[count++] = cpu_to_le32(0x00000000); 6822 break; 6823 case CHIP_KABINI: 6824 case CHIP_MULLINS: 6825 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ 6826 buffer[count++] = cpu_to_le32(0x00000000); 6827 break; 6828 case CHIP_HAWAII: 6829 buffer[count++] = cpu_to_le32(0x3a00161a); 6830 buffer[count++] = cpu_to_le32(0x0000002e); 6831 break; 6832 default: 6833 buffer[count++] = cpu_to_le32(0x00000000); 6834 buffer[count++] = cpu_to_le32(0x00000000); 6835 break; 6836 } 6837 6838 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 6839 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 6840 6841 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 6842 buffer[count++] = cpu_to_le32(0); 6843 } 6844 6845 static void cik_init_pg(struct radeon_device *rdev) 6846 { 6847 if (rdev->pg_flags) { 6848 cik_enable_sck_slowdown_on_pu(rdev, true); 6849 cik_enable_sck_slowdown_on_pd(rdev, true); 6850 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 6851 cik_init_gfx_cgpg(rdev); 6852 cik_enable_cp_pg(rdev, true); 6853 cik_enable_gds_pg(rdev, true); 6854 } 6855 cik_init_ao_cu_mask(rdev); 6856 cik_update_gfx_pg(rdev, true); 6857 } 6858 } 6859 6860 static void cik_fini_pg(struct radeon_device *rdev) 6861 { 6862 if (rdev->pg_flags) { 6863 cik_update_gfx_pg(rdev, false); 6864 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 6865 cik_enable_cp_pg(rdev, false); 6866 cik_enable_gds_pg(rdev, false); 6867 } 6868 } 6869 } 6870 6871 /* 6872 * Interrupts 6873 * Starting with r6xx, interrupts are handled via a ring buffer. 6874 * Ring buffers are areas of GPU accessible memory that the GPU 6875 * writes interrupt vectors into and the host reads vectors out of. 6876 * There is a rptr (read pointer) that determines where the 6877 * host is currently reading, and a wptr (write pointer) 6878 * which determines where the GPU has written. When the 6879 * pointers are equal, the ring is idle. When the GPU 6880 * writes vectors to the ring buffer, it increments the 6881 * wptr. When there is an interrupt, the host then starts 6882 * fetching commands and processing them until the pointers are 6883 * equal again at which point it updates the rptr. 6884 */ 6885 6886 /** 6887 * cik_enable_interrupts - Enable the interrupt ring buffer 6888 * 6889 * @rdev: radeon_device pointer 6890 * 6891 * Enable the interrupt ring buffer (CIK). 6892 */ 6893 static void cik_enable_interrupts(struct radeon_device *rdev) 6894 { 6895 u32 ih_cntl = RREG32(IH_CNTL); 6896 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 6897 6898 ih_cntl |= ENABLE_INTR; 6899 ih_rb_cntl |= IH_RB_ENABLE; 6900 WREG32(IH_CNTL, ih_cntl); 6901 WREG32(IH_RB_CNTL, ih_rb_cntl); 6902 rdev->ih.enabled = true; 6903 } 6904 6905 /** 6906 * cik_disable_interrupts - Disable the interrupt ring buffer 6907 * 6908 * @rdev: radeon_device pointer 6909 * 6910 * Disable the interrupt ring buffer (CIK). 6911 */ 6912 static void cik_disable_interrupts(struct radeon_device *rdev) 6913 { 6914 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 6915 u32 ih_cntl = RREG32(IH_CNTL); 6916 6917 ih_rb_cntl &= ~IH_RB_ENABLE; 6918 ih_cntl &= ~ENABLE_INTR; 6919 WREG32(IH_RB_CNTL, ih_rb_cntl); 6920 WREG32(IH_CNTL, ih_cntl); 6921 /* set rptr, wptr to 0 */ 6922 WREG32(IH_RB_RPTR, 0); 6923 WREG32(IH_RB_WPTR, 0); 6924 rdev->ih.enabled = false; 6925 rdev->ih.rptr = 0; 6926 } 6927 6928 /** 6929 * cik_disable_interrupt_state - Disable all interrupt sources 6930 * 6931 * @rdev: radeon_device pointer 6932 * 6933 * Clear all interrupt enable bits used by the driver (CIK). 6934 */ 6935 static void cik_disable_interrupt_state(struct radeon_device *rdev) 6936 { 6937 u32 tmp; 6938 6939 /* gfx ring */ 6940 tmp = RREG32(CP_INT_CNTL_RING0) & 6941 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6942 WREG32(CP_INT_CNTL_RING0, tmp); 6943 /* sdma */ 6944 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6945 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 6946 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6947 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 6948 /* compute queues */ 6949 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 6950 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 6951 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 6952 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 6953 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 6954 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 6955 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 6956 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 6957 /* grbm */ 6958 WREG32(GRBM_INT_CNTL, 0); 6959 /* SRBM */ 6960 WREG32(SRBM_INT_CNTL, 0); 6961 /* vline/vblank, etc. */ 6962 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 6963 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 6964 if (rdev->num_crtc >= 4) { 6965 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 6966 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 6967 } 6968 if (rdev->num_crtc >= 6) { 6969 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 6970 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 6971 } 6972 /* pflip */ 6973 if (rdev->num_crtc >= 2) { 6974 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 6975 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 6976 } 6977 if (rdev->num_crtc >= 4) { 6978 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 6979 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 6980 } 6981 if (rdev->num_crtc >= 6) { 6982 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 6983 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 6984 } 6985 6986 /* dac hotplug */ 6987 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 6988 6989 /* digital hotplug */ 6990 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6991 WREG32(DC_HPD1_INT_CONTROL, tmp); 6992 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6993 WREG32(DC_HPD2_INT_CONTROL, tmp); 6994 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6995 WREG32(DC_HPD3_INT_CONTROL, tmp); 6996 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6997 WREG32(DC_HPD4_INT_CONTROL, tmp); 6998 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6999 WREG32(DC_HPD5_INT_CONTROL, tmp); 7000 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 7001 WREG32(DC_HPD6_INT_CONTROL, tmp); 7002 7003 } 7004 7005 /** 7006 * cik_irq_init - init and enable the interrupt ring 7007 * 7008 * @rdev: radeon_device pointer 7009 * 7010 * Allocate a ring buffer for the interrupt controller, 7011 * enable the RLC, disable interrupts, enable the IH 7012 * ring buffer and enable it (CIK). 7013 * Called at device load and reume. 7014 * Returns 0 for success, errors for failure. 7015 */ 7016 static int cik_irq_init(struct radeon_device *rdev) 7017 { 7018 int ret = 0; 7019 int rb_bufsz; 7020 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 7021 7022 /* allocate ring */ 7023 ret = r600_ih_ring_alloc(rdev); 7024 if (ret) 7025 return ret; 7026 7027 /* disable irqs */ 7028 cik_disable_interrupts(rdev); 7029 7030 /* init rlc */ 7031 ret = cik_rlc_resume(rdev); 7032 if (ret) { 7033 r600_ih_ring_fini(rdev); 7034 return ret; 7035 } 7036 7037 /* setup interrupt control */ 7038 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 7039 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 7040 interrupt_cntl = RREG32(INTERRUPT_CNTL); 7041 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 7042 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 7043 */ 7044 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 7045 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 7046 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 7047 WREG32(INTERRUPT_CNTL, interrupt_cntl); 7048 7049 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 7050 rb_bufsz = order_base_2(rdev->ih.ring_size / 4); 7051 7052 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 7053 IH_WPTR_OVERFLOW_CLEAR | 7054 (rb_bufsz << 1)); 7055 7056 if (rdev->wb.enabled) 7057 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 7058 7059 /* set the writeback address whether it's enabled or not */ 7060 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 7061 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 7062 7063 WREG32(IH_RB_CNTL, ih_rb_cntl); 7064 7065 /* set rptr, wptr to 0 */ 7066 WREG32(IH_RB_RPTR, 0); 7067 WREG32(IH_RB_WPTR, 0); 7068 7069 /* Default settings for IH_CNTL (disabled at first) */ 7070 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 7071 /* RPTR_REARM only works if msi's are enabled */ 7072 if (rdev->msi_enabled) 7073 ih_cntl |= RPTR_REARM; 7074 WREG32(IH_CNTL, ih_cntl); 7075 7076 /* force the active interrupt state to all disabled */ 7077 cik_disable_interrupt_state(rdev); 7078 7079 pci_set_master(rdev->pdev); 7080 7081 /* enable irqs */ 7082 cik_enable_interrupts(rdev); 7083 7084 return ret; 7085 } 7086 7087 /** 7088 * cik_irq_set - enable/disable interrupt sources 7089 * 7090 * @rdev: radeon_device pointer 7091 * 7092 * Enable interrupt sources on the GPU (vblanks, hpd, 7093 * etc.) (CIK). 7094 * Returns 0 for success, errors for failure. 7095 */ 7096 int cik_irq_set(struct radeon_device *rdev) 7097 { 7098 u32 cp_int_cntl; 7099 u32 cp_m1p0; 7100 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 7101 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 7102 u32 grbm_int_cntl = 0; 7103 u32 dma_cntl, dma_cntl1; 7104 7105 if (!rdev->irq.installed) { 7106 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 7107 return -EINVAL; 7108 } 7109 /* don't enable anything if the ih is disabled */ 7110 if (!rdev->ih.enabled) { 7111 cik_disable_interrupts(rdev); 7112 /* force the active interrupt state to all disabled */ 7113 cik_disable_interrupt_state(rdev); 7114 return 0; 7115 } 7116 7117 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & 7118 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 7119 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 7120 7121 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN); 7122 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN); 7123 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN); 7124 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN); 7125 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN); 7126 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN); 7127 7128 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 7129 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 7130 7131 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 7132 7133 /* enable CP interrupts on all rings */ 7134 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 7135 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 7136 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 7137 } 7138 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 7139 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7140 DRM_DEBUG("si_irq_set: sw int cp1\n"); 7141 if (ring->me == 1) { 7142 switch (ring->pipe) { 7143 case 0: 7144 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 7145 break; 7146 default: 7147 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 7148 break; 7149 } 7150 } else { 7151 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 7152 } 7153 } 7154 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 7155 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7156 DRM_DEBUG("si_irq_set: sw int cp2\n"); 7157 if (ring->me == 1) { 7158 switch (ring->pipe) { 7159 case 0: 7160 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 7161 break; 7162 default: 7163 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 7164 break; 7165 } 7166 } else { 7167 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 7168 } 7169 } 7170 7171 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 7172 DRM_DEBUG("cik_irq_set: sw int dma\n"); 7173 dma_cntl |= TRAP_ENABLE; 7174 } 7175 7176 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 7177 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 7178 dma_cntl1 |= TRAP_ENABLE; 7179 } 7180 7181 if (rdev->irq.crtc_vblank_int[0] || 7182 atomic_read(&rdev->irq.pflip[0])) { 7183 DRM_DEBUG("cik_irq_set: vblank 0\n"); 7184 crtc1 |= VBLANK_INTERRUPT_MASK; 7185 } 7186 if (rdev->irq.crtc_vblank_int[1] || 7187 atomic_read(&rdev->irq.pflip[1])) { 7188 DRM_DEBUG("cik_irq_set: vblank 1\n"); 7189 crtc2 |= VBLANK_INTERRUPT_MASK; 7190 } 7191 if (rdev->irq.crtc_vblank_int[2] || 7192 atomic_read(&rdev->irq.pflip[2])) { 7193 DRM_DEBUG("cik_irq_set: vblank 2\n"); 7194 crtc3 |= VBLANK_INTERRUPT_MASK; 7195 } 7196 if (rdev->irq.crtc_vblank_int[3] || 7197 atomic_read(&rdev->irq.pflip[3])) { 7198 DRM_DEBUG("cik_irq_set: vblank 3\n"); 7199 crtc4 |= VBLANK_INTERRUPT_MASK; 7200 } 7201 if (rdev->irq.crtc_vblank_int[4] || 7202 atomic_read(&rdev->irq.pflip[4])) { 7203 DRM_DEBUG("cik_irq_set: vblank 4\n"); 7204 crtc5 |= VBLANK_INTERRUPT_MASK; 7205 } 7206 if (rdev->irq.crtc_vblank_int[5] || 7207 atomic_read(&rdev->irq.pflip[5])) { 7208 DRM_DEBUG("cik_irq_set: vblank 5\n"); 7209 crtc6 |= VBLANK_INTERRUPT_MASK; 7210 } 7211 if (rdev->irq.hpd[0]) { 7212 DRM_DEBUG("cik_irq_set: hpd 1\n"); 7213 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN; 7214 } 7215 if (rdev->irq.hpd[1]) { 7216 DRM_DEBUG("cik_irq_set: hpd 2\n"); 7217 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN; 7218 } 7219 if (rdev->irq.hpd[2]) { 7220 DRM_DEBUG("cik_irq_set: hpd 3\n"); 7221 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN; 7222 } 7223 if (rdev->irq.hpd[3]) { 7224 DRM_DEBUG("cik_irq_set: hpd 4\n"); 7225 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN; 7226 } 7227 if (rdev->irq.hpd[4]) { 7228 DRM_DEBUG("cik_irq_set: hpd 5\n"); 7229 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN; 7230 } 7231 if (rdev->irq.hpd[5]) { 7232 DRM_DEBUG("cik_irq_set: hpd 6\n"); 7233 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN; 7234 } 7235 7236 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 7237 7238 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 7239 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 7240 7241 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 7242 7243 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 7244 7245 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 7246 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 7247 if (rdev->num_crtc >= 4) { 7248 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 7249 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 7250 } 7251 if (rdev->num_crtc >= 6) { 7252 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 7253 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 7254 } 7255 7256 if (rdev->num_crtc >= 2) { 7257 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 7258 GRPH_PFLIP_INT_MASK); 7259 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 7260 GRPH_PFLIP_INT_MASK); 7261 } 7262 if (rdev->num_crtc >= 4) { 7263 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 7264 GRPH_PFLIP_INT_MASK); 7265 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 7266 GRPH_PFLIP_INT_MASK); 7267 } 7268 if (rdev->num_crtc >= 6) { 7269 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 7270 GRPH_PFLIP_INT_MASK); 7271 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 7272 GRPH_PFLIP_INT_MASK); 7273 } 7274 7275 WREG32(DC_HPD1_INT_CONTROL, hpd1); 7276 WREG32(DC_HPD2_INT_CONTROL, hpd2); 7277 WREG32(DC_HPD3_INT_CONTROL, hpd3); 7278 WREG32(DC_HPD4_INT_CONTROL, hpd4); 7279 WREG32(DC_HPD5_INT_CONTROL, hpd5); 7280 WREG32(DC_HPD6_INT_CONTROL, hpd6); 7281 7282 /* posting read */ 7283 RREG32(SRBM_STATUS); 7284 7285 return 0; 7286 } 7287 7288 /** 7289 * cik_irq_ack - ack interrupt sources 7290 * 7291 * @rdev: radeon_device pointer 7292 * 7293 * Ack interrupt sources on the GPU (vblanks, hpd, 7294 * etc.) (CIK). Certain interrupts sources are sw 7295 * generated and do not require an explicit ack. 7296 */ 7297 static inline void cik_irq_ack(struct radeon_device *rdev) 7298 { 7299 u32 tmp; 7300 7301 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 7302 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 7303 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 7304 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 7305 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 7306 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 7307 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 7308 7309 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS + 7310 EVERGREEN_CRTC0_REGISTER_OFFSET); 7311 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS + 7312 EVERGREEN_CRTC1_REGISTER_OFFSET); 7313 if (rdev->num_crtc >= 4) { 7314 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS + 7315 EVERGREEN_CRTC2_REGISTER_OFFSET); 7316 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS + 7317 EVERGREEN_CRTC3_REGISTER_OFFSET); 7318 } 7319 if (rdev->num_crtc >= 6) { 7320 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS + 7321 EVERGREEN_CRTC4_REGISTER_OFFSET); 7322 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS + 7323 EVERGREEN_CRTC5_REGISTER_OFFSET); 7324 } 7325 7326 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED) 7327 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, 7328 GRPH_PFLIP_INT_CLEAR); 7329 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED) 7330 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, 7331 GRPH_PFLIP_INT_CLEAR); 7332 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 7333 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 7334 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 7335 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 7336 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 7337 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 7338 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 7339 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 7340 7341 if (rdev->num_crtc >= 4) { 7342 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED) 7343 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, 7344 GRPH_PFLIP_INT_CLEAR); 7345 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED) 7346 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, 7347 GRPH_PFLIP_INT_CLEAR); 7348 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 7349 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 7350 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 7351 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 7352 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 7353 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 7354 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 7355 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 7356 } 7357 7358 if (rdev->num_crtc >= 6) { 7359 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED) 7360 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, 7361 GRPH_PFLIP_INT_CLEAR); 7362 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED) 7363 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, 7364 GRPH_PFLIP_INT_CLEAR); 7365 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 7366 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 7367 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 7368 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 7369 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 7370 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 7371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 7372 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 7373 } 7374 7375 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 7376 tmp = RREG32(DC_HPD1_INT_CONTROL); 7377 tmp |= DC_HPDx_INT_ACK; 7378 WREG32(DC_HPD1_INT_CONTROL, tmp); 7379 } 7380 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 7381 tmp = RREG32(DC_HPD2_INT_CONTROL); 7382 tmp |= DC_HPDx_INT_ACK; 7383 WREG32(DC_HPD2_INT_CONTROL, tmp); 7384 } 7385 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 7386 tmp = RREG32(DC_HPD3_INT_CONTROL); 7387 tmp |= DC_HPDx_INT_ACK; 7388 WREG32(DC_HPD3_INT_CONTROL, tmp); 7389 } 7390 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 7391 tmp = RREG32(DC_HPD4_INT_CONTROL); 7392 tmp |= DC_HPDx_INT_ACK; 7393 WREG32(DC_HPD4_INT_CONTROL, tmp); 7394 } 7395 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 7396 tmp = RREG32(DC_HPD5_INT_CONTROL); 7397 tmp |= DC_HPDx_INT_ACK; 7398 WREG32(DC_HPD5_INT_CONTROL, tmp); 7399 } 7400 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 7401 tmp = RREG32(DC_HPD6_INT_CONTROL); 7402 tmp |= DC_HPDx_INT_ACK; 7403 WREG32(DC_HPD6_INT_CONTROL, tmp); 7404 } 7405 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { 7406 tmp = RREG32(DC_HPD1_INT_CONTROL); 7407 tmp |= DC_HPDx_RX_INT_ACK; 7408 WREG32(DC_HPD1_INT_CONTROL, tmp); 7409 } 7410 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { 7411 tmp = RREG32(DC_HPD2_INT_CONTROL); 7412 tmp |= DC_HPDx_RX_INT_ACK; 7413 WREG32(DC_HPD2_INT_CONTROL, tmp); 7414 } 7415 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { 7416 tmp = RREG32(DC_HPD3_INT_CONTROL); 7417 tmp |= DC_HPDx_RX_INT_ACK; 7418 WREG32(DC_HPD3_INT_CONTROL, tmp); 7419 } 7420 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { 7421 tmp = RREG32(DC_HPD4_INT_CONTROL); 7422 tmp |= DC_HPDx_RX_INT_ACK; 7423 WREG32(DC_HPD4_INT_CONTROL, tmp); 7424 } 7425 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { 7426 tmp = RREG32(DC_HPD5_INT_CONTROL); 7427 tmp |= DC_HPDx_RX_INT_ACK; 7428 WREG32(DC_HPD5_INT_CONTROL, tmp); 7429 } 7430 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { 7431 tmp = RREG32(DC_HPD6_INT_CONTROL); 7432 tmp |= DC_HPDx_RX_INT_ACK; 7433 WREG32(DC_HPD6_INT_CONTROL, tmp); 7434 } 7435 } 7436 7437 /** 7438 * cik_irq_disable - disable interrupts 7439 * 7440 * @rdev: radeon_device pointer 7441 * 7442 * Disable interrupts on the hw (CIK). 7443 */ 7444 static void cik_irq_disable(struct radeon_device *rdev) 7445 { 7446 cik_disable_interrupts(rdev); 7447 /* Wait and acknowledge irq */ 7448 mdelay(1); 7449 cik_irq_ack(rdev); 7450 cik_disable_interrupt_state(rdev); 7451 } 7452 7453 /** 7454 * cik_irq_disable - disable interrupts for suspend 7455 * 7456 * @rdev: radeon_device pointer 7457 * 7458 * Disable interrupts and stop the RLC (CIK). 7459 * Used for suspend. 7460 */ 7461 static void cik_irq_suspend(struct radeon_device *rdev) 7462 { 7463 cik_irq_disable(rdev); 7464 cik_rlc_stop(rdev); 7465 } 7466 7467 /** 7468 * cik_irq_fini - tear down interrupt support 7469 * 7470 * @rdev: radeon_device pointer 7471 * 7472 * Disable interrupts on the hw and free the IH ring 7473 * buffer (CIK). 7474 * Used for driver unload. 7475 */ 7476 static void cik_irq_fini(struct radeon_device *rdev) 7477 { 7478 cik_irq_suspend(rdev); 7479 r600_ih_ring_fini(rdev); 7480 } 7481 7482 /** 7483 * cik_get_ih_wptr - get the IH ring buffer wptr 7484 * 7485 * @rdev: radeon_device pointer 7486 * 7487 * Get the IH ring buffer wptr from either the register 7488 * or the writeback memory buffer (CIK). Also check for 7489 * ring buffer overflow and deal with it. 7490 * Used by cik_irq_process(). 7491 * Returns the value of the wptr. 7492 */ 7493 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 7494 { 7495 u32 wptr, tmp; 7496 7497 if (rdev->wb.enabled) 7498 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 7499 else 7500 wptr = RREG32(IH_RB_WPTR); 7501 7502 if (wptr & RB_OVERFLOW) { 7503 wptr &= ~RB_OVERFLOW; 7504 /* When a ring buffer overflow happen start parsing interrupt 7505 * from the last not overwritten vector (wptr + 16). Hopefully 7506 * this should allow us to catchup. 7507 */ 7508 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", 7509 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask); 7510 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 7511 tmp = RREG32(IH_RB_CNTL); 7512 tmp |= IH_WPTR_OVERFLOW_CLEAR; 7513 WREG32(IH_RB_CNTL, tmp); 7514 } 7515 return (wptr & rdev->ih.ptr_mask); 7516 } 7517 7518 /* CIK IV Ring 7519 * Each IV ring entry is 128 bits: 7520 * [7:0] - interrupt source id 7521 * [31:8] - reserved 7522 * [59:32] - interrupt source data 7523 * [63:60] - reserved 7524 * [71:64] - RINGID 7525 * CP: 7526 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 7527 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 7528 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 7529 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 7530 * PIPE_ID - ME0 0=3D 7531 * - ME1&2 compute dispatcher (4 pipes each) 7532 * SDMA: 7533 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 7534 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 7535 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 7536 * [79:72] - VMID 7537 * [95:80] - PASID 7538 * [127:96] - reserved 7539 */ 7540 /** 7541 * cik_irq_process - interrupt handler 7542 * 7543 * @rdev: radeon_device pointer 7544 * 7545 * Interrupt hander (CIK). Walk the IH ring, 7546 * ack interrupts and schedule work to handle 7547 * interrupt events. 7548 * Returns irq process return code. 7549 */ 7550 irqreturn_t cik_irq_process(struct radeon_device *rdev) 7551 { 7552 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7553 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7554 u32 wptr; 7555 u32 rptr; 7556 u32 src_id, src_data, ring_id; 7557 u8 me_id, pipe_id, queue_id; 7558 u32 ring_index; 7559 bool queue_hotplug = false; 7560 bool queue_dp = false; 7561 bool queue_reset = false; 7562 u32 addr, status, mc_client; 7563 bool queue_thermal = false; 7564 7565 if (!rdev->ih.enabled || rdev->shutdown) 7566 return IRQ_NONE; 7567 7568 wptr = cik_get_ih_wptr(rdev); 7569 7570 restart_ih: 7571 /* is somebody else already processing irqs? */ 7572 if (atomic_xchg(&rdev->ih.lock, 1)) 7573 return IRQ_NONE; 7574 7575 rptr = rdev->ih.rptr; 7576 DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 7577 7578 /* Order reading of wptr vs. reading of IH ring data */ 7579 rmb(); 7580 7581 /* display interrupts */ 7582 cik_irq_ack(rdev); 7583 7584 while (rptr != wptr) { 7585 /* wptr/rptr are in bytes! */ 7586 ring_index = rptr / 4; 7587 7588 #pragma GCC diagnostic push 7589 #pragma GCC diagnostic ignored "-Wcast-qual" 7590 radeon_kfd_interrupt(rdev, 7591 (const void *) &rdev->ih.ring[ring_index]); 7592 #pragma GCC diagnostic pop 7593 7594 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 7595 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 7596 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 7597 7598 switch (src_id) { 7599 case 1: /* D1 vblank/vline */ 7600 switch (src_data) { 7601 case 0: /* D1 vblank */ 7602 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) 7603 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7604 7605 if (rdev->irq.crtc_vblank_int[0]) { 7606 drm_handle_vblank(rdev->ddev, 0); 7607 rdev->pm.vblank_sync = true; 7608 wake_up(&rdev->irq.vblank_queue); 7609 } 7610 if (atomic_read(&rdev->irq.pflip[0])) 7611 radeon_crtc_handle_vblank(rdev, 0); 7612 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 7613 DRM_DEBUG_VBLANK("IH: D1 vblank\n"); 7614 7615 break; 7616 case 1: /* D1 vline */ 7617 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) 7618 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7619 7620 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 7621 DRM_DEBUG_VBLANK("IH: D1 vline\n"); 7622 7623 break; 7624 default: 7625 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7626 break; 7627 } 7628 break; 7629 case 2: /* D2 vblank/vline */ 7630 switch (src_data) { 7631 case 0: /* D2 vblank */ 7632 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) 7633 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7634 7635 if (rdev->irq.crtc_vblank_int[1]) { 7636 drm_handle_vblank(rdev->ddev, 1); 7637 rdev->pm.vblank_sync = true; 7638 wake_up(&rdev->irq.vblank_queue); 7639 } 7640 if (atomic_read(&rdev->irq.pflip[1])) 7641 radeon_crtc_handle_vblank(rdev, 1); 7642 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 7643 DRM_DEBUG_VBLANK("IH: D2 vblank\n"); 7644 7645 break; 7646 case 1: /* D2 vline */ 7647 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) 7648 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7649 7650 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 7651 DRM_DEBUG_VBLANK("IH: D2 vline\n"); 7652 7653 break; 7654 default: 7655 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7656 break; 7657 } 7658 break; 7659 case 3: /* D3 vblank/vline */ 7660 switch (src_data) { 7661 case 0: /* D3 vblank */ 7662 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) 7663 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7664 7665 if (rdev->irq.crtc_vblank_int[2]) { 7666 drm_handle_vblank(rdev->ddev, 2); 7667 rdev->pm.vblank_sync = true; 7668 wake_up(&rdev->irq.vblank_queue); 7669 } 7670 if (atomic_read(&rdev->irq.pflip[2])) 7671 radeon_crtc_handle_vblank(rdev, 2); 7672 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 7673 DRM_DEBUG_VBLANK("IH: D3 vblank\n"); 7674 7675 break; 7676 case 1: /* D3 vline */ 7677 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) 7678 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7679 7680 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 7681 DRM_DEBUG_VBLANK("IH: D3 vline\n"); 7682 7683 break; 7684 default: 7685 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7686 break; 7687 } 7688 break; 7689 case 4: /* D4 vblank/vline */ 7690 switch (src_data) { 7691 case 0: /* D4 vblank */ 7692 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) 7693 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7694 7695 if (rdev->irq.crtc_vblank_int[3]) { 7696 drm_handle_vblank(rdev->ddev, 3); 7697 rdev->pm.vblank_sync = true; 7698 wake_up(&rdev->irq.vblank_queue); 7699 } 7700 if (atomic_read(&rdev->irq.pflip[3])) 7701 radeon_crtc_handle_vblank(rdev, 3); 7702 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 7703 DRM_DEBUG_VBLANK("IH: D4 vblank\n"); 7704 7705 break; 7706 case 1: /* D4 vline */ 7707 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) 7708 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7709 7710 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 7711 DRM_DEBUG_VBLANK("IH: D4 vline\n"); 7712 7713 break; 7714 default: 7715 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7716 break; 7717 } 7718 break; 7719 case 5: /* D5 vblank/vline */ 7720 switch (src_data) { 7721 case 0: /* D5 vblank */ 7722 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) 7723 DRM_DEBUG_VBLANK("IH: IH event w/o asserted irq bit?\n"); 7724 7725 if (rdev->irq.crtc_vblank_int[4]) { 7726 drm_handle_vblank(rdev->ddev, 4); 7727 rdev->pm.vblank_sync = true; 7728 wake_up(&rdev->irq.vblank_queue); 7729 } 7730 if (atomic_read(&rdev->irq.pflip[4])) 7731 radeon_crtc_handle_vblank(rdev, 4); 7732 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 7733 DRM_DEBUG_VBLANK("IH: D5 vblank\n"); 7734 7735 break; 7736 case 1: /* D5 vline */ 7737 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) 7738 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7739 7740 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 7741 DRM_DEBUG("IH: D5 vline\n"); 7742 7743 break; 7744 default: 7745 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7746 break; 7747 } 7748 break; 7749 case 6: /* D6 vblank/vline */ 7750 switch (src_data) { 7751 case 0: /* D6 vblank */ 7752 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) 7753 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7754 7755 if (rdev->irq.crtc_vblank_int[5]) { 7756 drm_handle_vblank(rdev->ddev, 5); 7757 rdev->pm.vblank_sync = true; 7758 wake_up(&rdev->irq.vblank_queue); 7759 } 7760 if (atomic_read(&rdev->irq.pflip[5])) 7761 radeon_crtc_handle_vblank(rdev, 5); 7762 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 7763 DRM_DEBUG_VBLANK("IH: D6 vblank\n"); 7764 7765 break; 7766 case 1: /* D6 vline */ 7767 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) 7768 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7769 7770 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 7771 DRM_DEBUG_VBLANK("IH: D6 vline\n"); 7772 7773 break; 7774 default: 7775 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7776 break; 7777 } 7778 break; 7779 case 8: /* D1 page flip */ 7780 case 10: /* D2 page flip */ 7781 case 12: /* D3 page flip */ 7782 case 14: /* D4 page flip */ 7783 case 16: /* D5 page flip */ 7784 case 18: /* D6 page flip */ 7785 DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1); 7786 if (radeon_use_pflipirq > 0) 7787 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1); 7788 break; 7789 case 42: /* HPD hotplug */ 7790 switch (src_data) { 7791 case 0: 7792 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) 7793 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7794 7795 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 7796 queue_hotplug = true; 7797 DRM_DEBUG("IH: HPD1\n"); 7798 7799 break; 7800 case 1: 7801 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) 7802 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7803 7804 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 7805 queue_hotplug = true; 7806 DRM_DEBUG("IH: HPD2\n"); 7807 7808 break; 7809 case 2: 7810 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) 7811 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7812 7813 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 7814 queue_hotplug = true; 7815 DRM_DEBUG("IH: HPD3\n"); 7816 7817 break; 7818 case 3: 7819 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) 7820 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7821 7822 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 7823 queue_hotplug = true; 7824 DRM_DEBUG("IH: HPD4\n"); 7825 7826 break; 7827 case 4: 7828 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) 7829 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7830 7831 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 7832 queue_hotplug = true; 7833 DRM_DEBUG("IH: HPD5\n"); 7834 7835 break; 7836 case 5: 7837 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) 7838 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7839 7840 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 7841 queue_hotplug = true; 7842 DRM_DEBUG("IH: HPD6\n"); 7843 7844 break; 7845 case 6: 7846 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) 7847 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7848 7849 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; 7850 queue_dp = true; 7851 DRM_DEBUG("IH: HPD_RX 1\n"); 7852 7853 break; 7854 case 7: 7855 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) 7856 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7857 7858 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; 7859 queue_dp = true; 7860 DRM_DEBUG("IH: HPD_RX 2\n"); 7861 7862 break; 7863 case 8: 7864 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) 7865 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7866 7867 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; 7868 queue_dp = true; 7869 DRM_DEBUG("IH: HPD_RX 3\n"); 7870 7871 break; 7872 case 9: 7873 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) 7874 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7875 7876 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; 7877 queue_dp = true; 7878 DRM_DEBUG("IH: HPD_RX 4\n"); 7879 7880 break; 7881 case 10: 7882 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) 7883 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7884 7885 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; 7886 queue_dp = true; 7887 DRM_DEBUG("IH: HPD_RX 5\n"); 7888 7889 break; 7890 case 11: 7891 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) 7892 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); 7893 7894 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; 7895 queue_dp = true; 7896 DRM_DEBUG("IH: HPD_RX 6\n"); 7897 7898 break; 7899 default: 7900 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7901 break; 7902 } 7903 break; 7904 case 96: 7905 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR)); 7906 WREG32(SRBM_INT_ACK, 0x1); 7907 break; 7908 case 124: /* UVD */ 7909 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); 7910 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); 7911 break; 7912 case 146: 7913 case 147: 7914 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 7915 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 7916 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 7917 /* reset addr and status */ 7918 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 7919 if (addr == 0x0 && status == 0x0) 7920 break; 7921 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 7922 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 7923 addr); 7924 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 7925 status); 7926 cik_vm_decode_fault(rdev, status, addr, mc_client); 7927 break; 7928 case 167: /* VCE */ 7929 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); 7930 switch (src_data) { 7931 case 0: 7932 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); 7933 break; 7934 case 1: 7935 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); 7936 break; 7937 default: 7938 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); 7939 break; 7940 } 7941 break; 7942 case 176: /* GFX RB CP_INT */ 7943 case 177: /* GFX IB CP_INT */ 7944 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 7945 break; 7946 case 181: /* CP EOP event */ 7947 DRM_DEBUG("IH: CP EOP\n"); 7948 /* XXX check the bitfield order! */ 7949 me_id = (ring_id & 0x60) >> 5; 7950 pipe_id = (ring_id & 0x18) >> 3; 7951 queue_id = (ring_id & 0x7) >> 0; 7952 switch (me_id) { 7953 case 0: 7954 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 7955 break; 7956 case 1: 7957 case 2: 7958 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 7959 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7960 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 7961 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7962 break; 7963 } 7964 break; 7965 case 184: /* CP Privileged reg access */ 7966 DRM_ERROR("Illegal register access in command stream\n"); 7967 /* XXX check the bitfield order! */ 7968 me_id = (ring_id & 0x60) >> 5; 7969 pipe_id = (ring_id & 0x18) >> 3; 7970 queue_id = (ring_id & 0x7) >> 0; 7971 switch (me_id) { 7972 case 0: 7973 /* This results in a full GPU reset, but all we need to do is soft 7974 * reset the CP for gfx 7975 */ 7976 queue_reset = true; 7977 break; 7978 case 1: 7979 /* XXX compute */ 7980 queue_reset = true; 7981 break; 7982 case 2: 7983 /* XXX compute */ 7984 queue_reset = true; 7985 break; 7986 } 7987 break; 7988 case 185: /* CP Privileged inst */ 7989 DRM_ERROR("Illegal instruction in command stream\n"); 7990 /* XXX check the bitfield order! */ 7991 me_id = (ring_id & 0x60) >> 5; 7992 pipe_id = (ring_id & 0x18) >> 3; 7993 queue_id = (ring_id & 0x7) >> 0; 7994 switch (me_id) { 7995 case 0: 7996 /* This results in a full GPU reset, but all we need to do is soft 7997 * reset the CP for gfx 7998 */ 7999 queue_reset = true; 8000 break; 8001 case 1: 8002 /* XXX compute */ 8003 queue_reset = true; 8004 break; 8005 case 2: 8006 /* XXX compute */ 8007 queue_reset = true; 8008 break; 8009 } 8010 break; 8011 case 224: /* SDMA trap event */ 8012 /* XXX check the bitfield order! */ 8013 me_id = (ring_id & 0x3) >> 0; 8014 queue_id = (ring_id & 0xc) >> 2; 8015 DRM_DEBUG("IH: SDMA trap\n"); 8016 switch (me_id) { 8017 case 0: 8018 switch (queue_id) { 8019 case 0: 8020 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 8021 break; 8022 case 1: 8023 /* XXX compute */ 8024 break; 8025 case 2: 8026 /* XXX compute */ 8027 break; 8028 } 8029 break; 8030 case 1: 8031 switch (queue_id) { 8032 case 0: 8033 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 8034 break; 8035 case 1: 8036 /* XXX compute */ 8037 break; 8038 case 2: 8039 /* XXX compute */ 8040 break; 8041 } 8042 break; 8043 } 8044 break; 8045 case 230: /* thermal low to high */ 8046 DRM_DEBUG("IH: thermal low to high\n"); 8047 rdev->pm.dpm.thermal.high_to_low = false; 8048 queue_thermal = true; 8049 break; 8050 case 231: /* thermal high to low */ 8051 DRM_DEBUG("IH: thermal high to low\n"); 8052 rdev->pm.dpm.thermal.high_to_low = true; 8053 queue_thermal = true; 8054 break; 8055 case 233: /* GUI IDLE */ 8056 DRM_DEBUG("IH: GUI idle\n"); 8057 break; 8058 case 241: /* SDMA Privileged inst */ 8059 case 247: /* SDMA Privileged inst */ 8060 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 8061 /* XXX check the bitfield order! */ 8062 me_id = (ring_id & 0x3) >> 0; 8063 queue_id = (ring_id & 0xc) >> 2; 8064 switch (me_id) { 8065 case 0: 8066 switch (queue_id) { 8067 case 0: 8068 queue_reset = true; 8069 break; 8070 case 1: 8071 /* XXX compute */ 8072 queue_reset = true; 8073 break; 8074 case 2: 8075 /* XXX compute */ 8076 queue_reset = true; 8077 break; 8078 } 8079 break; 8080 case 1: 8081 switch (queue_id) { 8082 case 0: 8083 queue_reset = true; 8084 break; 8085 case 1: 8086 /* XXX compute */ 8087 queue_reset = true; 8088 break; 8089 case 2: 8090 /* XXX compute */ 8091 queue_reset = true; 8092 break; 8093 } 8094 break; 8095 } 8096 break; 8097 default: 8098 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 8099 break; 8100 } 8101 8102 /* wptr/rptr are in bytes! */ 8103 rptr += 16; 8104 rptr &= rdev->ih.ptr_mask; 8105 WREG32(IH_RB_RPTR, rptr); 8106 } 8107 if (queue_dp) 8108 schedule_work(&rdev->dp_work); 8109 if (queue_hotplug) 8110 schedule_delayed_work(&rdev->hotplug_work, 0); 8111 if (queue_reset) { 8112 rdev->needs_reset = true; 8113 wake_up_all(&rdev->fence_queue); 8114 } 8115 if (queue_thermal) 8116 schedule_work(&rdev->pm.dpm.thermal.work); 8117 rdev->ih.rptr = rptr; 8118 atomic_set(&rdev->ih.lock, 0); 8119 8120 /* make sure wptr hasn't changed while processing */ 8121 wptr = cik_get_ih_wptr(rdev); 8122 if (wptr != rptr) 8123 goto restart_ih; 8124 8125 return IRQ_HANDLED; 8126 } 8127 8128 /* 8129 * startup/shutdown callbacks 8130 */ 8131 static void cik_uvd_init(struct radeon_device *rdev) 8132 { 8133 int r; 8134 8135 if (!rdev->has_uvd) 8136 return; 8137 8138 r = radeon_uvd_init(rdev); 8139 if (r) { 8140 dev_err(rdev->dev, "failed UVD (%d) init.\n", r); 8141 /* 8142 * At this point rdev->uvd.vcpu_bo is NULL which trickles down 8143 * to early fails cik_uvd_start() and thus nothing happens 8144 * there. So it is pointless to try to go through that code 8145 * hence why we disable uvd here. 8146 */ 8147 rdev->has_uvd = 0; 8148 return; 8149 } 8150 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; 8151 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); 8152 } 8153 8154 static void cik_uvd_start(struct radeon_device *rdev) 8155 { 8156 int r; 8157 8158 if (!rdev->has_uvd) 8159 return; 8160 8161 r = radeon_uvd_resume(rdev); 8162 if (r) { 8163 dev_err(rdev->dev, "failed UVD resume (%d).\n", r); 8164 goto error; 8165 } 8166 r = uvd_v4_2_resume(rdev); 8167 if (r) { 8168 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r); 8169 goto error; 8170 } 8171 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); 8172 if (r) { 8173 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); 8174 goto error; 8175 } 8176 return; 8177 8178 error: 8179 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 8180 } 8181 8182 static void cik_uvd_resume(struct radeon_device *rdev) 8183 { 8184 struct radeon_ring *ring; 8185 int r; 8186 8187 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size) 8188 return; 8189 8190 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 8191 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0)); 8192 if (r) { 8193 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r); 8194 return; 8195 } 8196 r = uvd_v1_0_init(rdev); 8197 if (r) { 8198 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r); 8199 return; 8200 } 8201 } 8202 8203 static void cik_vce_init(struct radeon_device *rdev) 8204 { 8205 int r; 8206 8207 if (!rdev->has_vce) 8208 return; 8209 8210 r = radeon_vce_init(rdev); 8211 if (r) { 8212 dev_err(rdev->dev, "failed VCE (%d) init.\n", r); 8213 /* 8214 * At this point rdev->vce.vcpu_bo is NULL which trickles down 8215 * to early fails cik_vce_start() and thus nothing happens 8216 * there. So it is pointless to try to go through that code 8217 * hence why we disable vce here. 8218 */ 8219 rdev->has_vce = 0; 8220 return; 8221 } 8222 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL; 8223 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096); 8224 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL; 8225 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096); 8226 } 8227 8228 static void cik_vce_start(struct radeon_device *rdev) 8229 { 8230 int r; 8231 8232 if (!rdev->has_vce) 8233 return; 8234 8235 r = radeon_vce_resume(rdev); 8236 if (r) { 8237 dev_err(rdev->dev, "failed VCE resume (%d).\n", r); 8238 goto error; 8239 } 8240 r = vce_v2_0_resume(rdev); 8241 if (r) { 8242 dev_err(rdev->dev, "failed VCE resume (%d).\n", r); 8243 goto error; 8244 } 8245 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX); 8246 if (r) { 8247 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r); 8248 goto error; 8249 } 8250 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX); 8251 if (r) { 8252 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r); 8253 goto error; 8254 } 8255 return; 8256 8257 error: 8258 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; 8259 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; 8260 } 8261 8262 static void cik_vce_resume(struct radeon_device *rdev) 8263 { 8264 struct radeon_ring *ring; 8265 int r; 8266 8267 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size) 8268 return; 8269 8270 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 8271 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP); 8272 if (r) { 8273 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r); 8274 return; 8275 } 8276 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 8277 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP); 8278 if (r) { 8279 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r); 8280 return; 8281 } 8282 r = vce_v1_0_init(rdev); 8283 if (r) { 8284 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r); 8285 return; 8286 } 8287 } 8288 8289 /** 8290 * cik_startup - program the asic to a functional state 8291 * 8292 * @rdev: radeon_device pointer 8293 * 8294 * Programs the asic to a functional state (CIK). 8295 * Called by cik_init() and cik_resume(). 8296 * Returns 0 for success, error for failure. 8297 */ 8298 static int cik_startup(struct radeon_device *rdev) 8299 { 8300 struct radeon_ring *ring; 8301 u32 nop; 8302 int r; 8303 8304 /* enable pcie gen2/3 link */ 8305 cik_pcie_gen3_enable(rdev); 8306 /* enable aspm */ 8307 cik_program_aspm(rdev); 8308 8309 /* scratch needs to be initialized before MC */ 8310 r = r600_vram_scratch_init(rdev); 8311 if (r) 8312 return r; 8313 8314 cik_mc_program(rdev); 8315 8316 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) { 8317 r = ci_mc_load_microcode(rdev); 8318 if (r) { 8319 DRM_ERROR("Failed to load MC firmware!\n"); 8320 return r; 8321 } 8322 } 8323 8324 r = cik_pcie_gart_enable(rdev); 8325 if (r) 8326 return r; 8327 cik_gpu_init(rdev); 8328 8329 /* allocate rlc buffers */ 8330 if (rdev->flags & RADEON_IS_IGP) { 8331 if (rdev->family == CHIP_KAVERI) { 8332 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; 8333 rdev->rlc.reg_list_size = 8334 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); 8335 } else { 8336 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; 8337 rdev->rlc.reg_list_size = 8338 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); 8339 } 8340 } 8341 rdev->rlc.cs_data = ci_cs_data; 8342 rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */ 8343 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */ 8344 r = sumo_rlc_init(rdev); 8345 if (r) { 8346 DRM_ERROR("Failed to init rlc BOs!\n"); 8347 return r; 8348 } 8349 8350 /* allocate wb buffer */ 8351 r = radeon_wb_init(rdev); 8352 if (r) 8353 return r; 8354 8355 /* allocate mec buffers */ 8356 r = cik_mec_init(rdev); 8357 if (r) { 8358 DRM_ERROR("Failed to init MEC BOs!\n"); 8359 return r; 8360 } 8361 8362 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 8363 if (r) { 8364 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 8365 return r; 8366 } 8367 8368 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 8369 if (r) { 8370 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 8371 return r; 8372 } 8373 8374 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 8375 if (r) { 8376 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 8377 return r; 8378 } 8379 8380 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 8381 if (r) { 8382 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 8383 return r; 8384 } 8385 8386 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 8387 if (r) { 8388 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 8389 return r; 8390 } 8391 8392 cik_uvd_start(rdev); 8393 cik_vce_start(rdev); 8394 8395 /* Enable IRQ */ 8396 if (!rdev->irq.installed) { 8397 r = radeon_irq_kms_init(rdev); 8398 if (r) 8399 return r; 8400 } 8401 8402 r = cik_irq_init(rdev); 8403 if (r) { 8404 DRM_ERROR("radeon: IH init failed (%d).\n", r); 8405 radeon_irq_kms_fini(rdev); 8406 return r; 8407 } 8408 cik_irq_set(rdev); 8409 8410 if (rdev->family == CHIP_HAWAII) { 8411 if (rdev->new_fw) 8412 nop = PACKET3(PACKET3_NOP, 0x3FFF); 8413 else 8414 nop = RADEON_CP_PACKET2; 8415 } else { 8416 nop = PACKET3(PACKET3_NOP, 0x3FFF); 8417 } 8418 8419 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 8420 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 8421 nop); 8422 if (r) 8423 return r; 8424 8425 /* set up the compute queues */ 8426 /* type-2 packets are deprecated on MEC, use type-3 instead */ 8427 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 8428 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 8429 nop); 8430 if (r) 8431 return r; 8432 ring->me = 1; /* first MEC */ 8433 ring->pipe = 0; /* first pipe */ 8434 ring->queue = 0; /* first queue */ 8435 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 8436 8437 /* type-2 packets are deprecated on MEC, use type-3 instead */ 8438 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 8439 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 8440 nop); 8441 if (r) 8442 return r; 8443 /* dGPU only have 1 MEC */ 8444 ring->me = 1; /* first MEC */ 8445 ring->pipe = 0; /* first pipe */ 8446 ring->queue = 1; /* second queue */ 8447 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 8448 8449 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 8450 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 8451 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 8452 if (r) 8453 return r; 8454 8455 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 8456 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 8457 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 8458 if (r) 8459 return r; 8460 8461 r = cik_cp_resume(rdev); 8462 if (r) 8463 return r; 8464 8465 r = cik_sdma_resume(rdev); 8466 if (r) 8467 return r; 8468 8469 cik_uvd_resume(rdev); 8470 cik_vce_resume(rdev); 8471 8472 r = radeon_ib_pool_init(rdev); 8473 if (r) { 8474 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 8475 return r; 8476 } 8477 8478 r = radeon_vm_manager_init(rdev); 8479 if (r) { 8480 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 8481 return r; 8482 } 8483 8484 r = radeon_audio_init(rdev); 8485 if (r) 8486 return r; 8487 8488 r = radeon_kfd_resume(rdev); 8489 if (r) 8490 return r; 8491 8492 return 0; 8493 } 8494 8495 /** 8496 * cik_resume - resume the asic to a functional state 8497 * 8498 * @rdev: radeon_device pointer 8499 * 8500 * Programs the asic to a functional state (CIK). 8501 * Called at resume. 8502 * Returns 0 for success, error for failure. 8503 */ 8504 int cik_resume(struct radeon_device *rdev) 8505 { 8506 int r; 8507 8508 /* post card */ 8509 atom_asic_init(rdev->mode_info.atom_context); 8510 8511 /* init golden registers */ 8512 cik_init_golden_registers(rdev); 8513 8514 if (rdev->pm.pm_method == PM_METHOD_DPM) 8515 radeon_pm_resume(rdev); 8516 8517 rdev->accel_working = true; 8518 r = cik_startup(rdev); 8519 if (r) { 8520 DRM_ERROR("cik startup failed on resume\n"); 8521 rdev->accel_working = false; 8522 return r; 8523 } 8524 8525 return r; 8526 8527 } 8528 8529 /** 8530 * cik_suspend - suspend the asic 8531 * 8532 * @rdev: radeon_device pointer 8533 * 8534 * Bring the chip into a state suitable for suspend (CIK). 8535 * Called at suspend. 8536 * Returns 0 for success. 8537 */ 8538 int cik_suspend(struct radeon_device *rdev) 8539 { 8540 radeon_kfd_suspend(rdev); 8541 radeon_pm_suspend(rdev); 8542 radeon_audio_fini(rdev); 8543 radeon_vm_manager_fini(rdev); 8544 cik_cp_enable(rdev, false); 8545 cik_sdma_enable(rdev, false); 8546 if (rdev->has_uvd) { 8547 uvd_v1_0_fini(rdev); 8548 radeon_uvd_suspend(rdev); 8549 } 8550 if (rdev->has_vce) 8551 radeon_vce_suspend(rdev); 8552 cik_fini_pg(rdev); 8553 cik_fini_cg(rdev); 8554 cik_irq_suspend(rdev); 8555 radeon_wb_disable(rdev); 8556 cik_pcie_gart_disable(rdev); 8557 return 0; 8558 } 8559 8560 /* Plan is to move initialization in that function and use 8561 * helper function so that radeon_device_init pretty much 8562 * do nothing more than calling asic specific function. This 8563 * should also allow to remove a bunch of callback function 8564 * like vram_info. 8565 */ 8566 /** 8567 * cik_init - asic specific driver and hw init 8568 * 8569 * @rdev: radeon_device pointer 8570 * 8571 * Setup asic specific driver variables and program the hw 8572 * to a functional state (CIK). 8573 * Called at driver startup. 8574 * Returns 0 for success, errors for failure. 8575 */ 8576 int cik_init(struct radeon_device *rdev) 8577 { 8578 struct radeon_ring *ring; 8579 int r; 8580 8581 /* Read BIOS */ 8582 if (!radeon_get_bios(rdev)) { 8583 if (ASIC_IS_AVIVO(rdev)) 8584 return -EINVAL; 8585 } 8586 /* Must be an ATOMBIOS */ 8587 if (!rdev->is_atom_bios) { 8588 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 8589 return -EINVAL; 8590 } 8591 r = radeon_atombios_init(rdev); 8592 if (r) 8593 return r; 8594 8595 /* Post card if necessary */ 8596 if (!radeon_card_posted(rdev)) { 8597 if (!rdev->bios) { 8598 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 8599 return -EINVAL; 8600 } 8601 DRM_INFO("GPU not posted. posting now...\n"); 8602 atom_asic_init(rdev->mode_info.atom_context); 8603 } 8604 /* init golden registers */ 8605 cik_init_golden_registers(rdev); 8606 /* Initialize scratch registers */ 8607 cik_scratch_init(rdev); 8608 /* Initialize surface registers */ 8609 radeon_surface_init(rdev); 8610 /* Initialize clocks */ 8611 radeon_get_clock_info(rdev->ddev); 8612 8613 /* Fence driver */ 8614 r = radeon_fence_driver_init(rdev); 8615 if (r) 8616 return r; 8617 8618 /* initialize memory controller */ 8619 r = cik_mc_init(rdev); 8620 if (r) 8621 return r; 8622 /* Memory manager */ 8623 r = radeon_bo_init(rdev); 8624 if (r) 8625 return r; 8626 8627 if (rdev->flags & RADEON_IS_IGP) { 8628 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 8629 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 8630 r = cik_init_microcode(rdev); 8631 if (r) { 8632 DRM_ERROR("Failed to load firmware!\n"); 8633 return r; 8634 } 8635 } 8636 } else { 8637 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 8638 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 8639 !rdev->mc_fw) { 8640 r = cik_init_microcode(rdev); 8641 if (r) { 8642 DRM_ERROR("Failed to load firmware!\n"); 8643 return r; 8644 } 8645 } 8646 } 8647 8648 /* Initialize power management */ 8649 radeon_pm_init(rdev); 8650 8651 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 8652 ring->ring_obj = NULL; 8653 r600_ring_init(rdev, ring, 1024 * 1024); 8654 8655 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 8656 ring->ring_obj = NULL; 8657 r600_ring_init(rdev, ring, 1024 * 1024); 8658 r = radeon_doorbell_get(rdev, &ring->doorbell_index); 8659 if (r) 8660 return r; 8661 8662 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 8663 ring->ring_obj = NULL; 8664 r600_ring_init(rdev, ring, 1024 * 1024); 8665 r = radeon_doorbell_get(rdev, &ring->doorbell_index); 8666 if (r) 8667 return r; 8668 8669 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 8670 ring->ring_obj = NULL; 8671 r600_ring_init(rdev, ring, 256 * 1024); 8672 8673 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 8674 ring->ring_obj = NULL; 8675 r600_ring_init(rdev, ring, 256 * 1024); 8676 8677 cik_uvd_init(rdev); 8678 cik_vce_init(rdev); 8679 8680 rdev->ih.ring_obj = NULL; 8681 r600_ih_ring_init(rdev, 64 * 1024); 8682 8683 r = r600_pcie_gart_init(rdev); 8684 if (r) 8685 return r; 8686 8687 rdev->accel_working = true; 8688 r = cik_startup(rdev); 8689 if (r) { 8690 dev_err(rdev->dev, "disabling GPU acceleration\n"); 8691 cik_cp_fini(rdev); 8692 cik_sdma_fini(rdev); 8693 cik_irq_fini(rdev); 8694 sumo_rlc_fini(rdev); 8695 cik_mec_fini(rdev); 8696 radeon_wb_fini(rdev); 8697 radeon_ib_pool_fini(rdev); 8698 radeon_vm_manager_fini(rdev); 8699 radeon_irq_kms_fini(rdev); 8700 cik_pcie_gart_fini(rdev); 8701 rdev->accel_working = false; 8702 } 8703 8704 /* Don't start up if the MC ucode is missing. 8705 * The default clocks and voltages before the MC ucode 8706 * is loaded are not suffient for advanced operations. 8707 */ 8708 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 8709 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 8710 return -EINVAL; 8711 } 8712 8713 return 0; 8714 } 8715 8716 /** 8717 * cik_fini - asic specific driver and hw fini 8718 * 8719 * @rdev: radeon_device pointer 8720 * 8721 * Tear down the asic specific driver variables and program the hw 8722 * to an idle state (CIK). 8723 * Called at driver unload. 8724 */ 8725 void cik_fini(struct radeon_device *rdev) 8726 { 8727 radeon_pm_fini(rdev); 8728 cik_cp_fini(rdev); 8729 cik_sdma_fini(rdev); 8730 cik_fini_pg(rdev); 8731 cik_fini_cg(rdev); 8732 cik_irq_fini(rdev); 8733 sumo_rlc_fini(rdev); 8734 cik_mec_fini(rdev); 8735 radeon_wb_fini(rdev); 8736 radeon_vm_manager_fini(rdev); 8737 radeon_ib_pool_fini(rdev); 8738 radeon_irq_kms_fini(rdev); 8739 uvd_v1_0_fini(rdev); 8740 radeon_uvd_fini(rdev); 8741 radeon_vce_fini(rdev); 8742 cik_pcie_gart_fini(rdev); 8743 r600_vram_scratch_fini(rdev); 8744 radeon_gem_fini(rdev); 8745 radeon_fence_driver_fini(rdev); 8746 radeon_bo_fini(rdev); 8747 radeon_atombios_fini(rdev); 8748 cik_fini_microcode(rdev); 8749 kfree(rdev->bios); 8750 rdev->bios = NULL; 8751 } 8752 8753 void dce8_program_fmt(struct drm_encoder *encoder) 8754 { 8755 struct drm_device *dev = encoder->dev; 8756 struct radeon_device *rdev = dev->dev_private; 8757 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); 8758 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc); 8759 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); 8760 int bpc = 0; 8761 u32 tmp = 0; 8762 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE; 8763 8764 if (connector) { 8765 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 8766 bpc = radeon_get_monitor_bpc(connector); 8767 dither = radeon_connector->dither; 8768 } 8769 8770 /* LVDS/eDP FMT is set up by atom */ 8771 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT) 8772 return; 8773 8774 /* not needed for analog */ 8775 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) || 8776 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2)) 8777 return; 8778 8779 if (bpc == 0) 8780 return; 8781 8782 switch (bpc) { 8783 case 6: 8784 if (dither == RADEON_FMT_DITHER_ENABLE) 8785 /* XXX sort out optimal dither settings */ 8786 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE | 8787 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0)); 8788 else 8789 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0)); 8790 break; 8791 case 8: 8792 if (dither == RADEON_FMT_DITHER_ENABLE) 8793 /* XXX sort out optimal dither settings */ 8794 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE | 8795 FMT_RGB_RANDOM_ENABLE | 8796 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1)); 8797 else 8798 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1)); 8799 break; 8800 case 10: 8801 if (dither == RADEON_FMT_DITHER_ENABLE) 8802 /* XXX sort out optimal dither settings */ 8803 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE | 8804 FMT_RGB_RANDOM_ENABLE | 8805 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2)); 8806 else 8807 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2)); 8808 break; 8809 default: 8810 /* not needed */ 8811 break; 8812 } 8813 8814 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp); 8815 } 8816 8817 /* display watermark setup */ 8818 /** 8819 * dce8_line_buffer_adjust - Set up the line buffer 8820 * 8821 * @rdev: radeon_device pointer 8822 * @radeon_crtc: the selected display controller 8823 * @mode: the current display mode on the selected display 8824 * controller 8825 * 8826 * Setup up the line buffer allocation for 8827 * the selected display controller (CIK). 8828 * Returns the line buffer size in pixels. 8829 */ 8830 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 8831 struct radeon_crtc *radeon_crtc, 8832 struct drm_display_mode *mode) 8833 { 8834 u32 tmp, buffer_alloc, i; 8835 u32 pipe_offset = radeon_crtc->crtc_id * 0x20; 8836 /* 8837 * Line Buffer Setup 8838 * There are 6 line buffers, one for each display controllers. 8839 * There are 3 partitions per LB. Select the number of partitions 8840 * to enable based on the display width. For display widths larger 8841 * than 4096, you need use to use 2 display controllers and combine 8842 * them using the stereo blender. 8843 */ 8844 if (radeon_crtc->base.enabled && mode) { 8845 if (mode->crtc_hdisplay < 1920) { 8846 tmp = 1; 8847 buffer_alloc = 2; 8848 } else if (mode->crtc_hdisplay < 2560) { 8849 tmp = 2; 8850 buffer_alloc = 2; 8851 } else if (mode->crtc_hdisplay < 4096) { 8852 tmp = 0; 8853 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 8854 } else { 8855 DRM_DEBUG_KMS("Mode too big for LB!\n"); 8856 tmp = 0; 8857 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 8858 } 8859 } else { 8860 tmp = 1; 8861 buffer_alloc = 0; 8862 } 8863 8864 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 8865 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 8866 8867 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, 8868 DMIF_BUFFERS_ALLOCATED(buffer_alloc)); 8869 for (i = 0; i < rdev->usec_timeout; i++) { 8870 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & 8871 DMIF_BUFFERS_ALLOCATED_COMPLETED) 8872 break; 8873 udelay(1); 8874 } 8875 8876 if (radeon_crtc->base.enabled && mode) { 8877 switch (tmp) { 8878 case 0: 8879 default: 8880 return 4096 * 2; 8881 case 1: 8882 return 1920 * 2; 8883 case 2: 8884 return 2560 * 2; 8885 } 8886 } 8887 8888 /* controller not enabled, so no lb used */ 8889 return 0; 8890 } 8891 8892 /** 8893 * cik_get_number_of_dram_channels - get the number of dram channels 8894 * 8895 * @rdev: radeon_device pointer 8896 * 8897 * Look up the number of video ram channels (CIK). 8898 * Used for display watermark bandwidth calculations 8899 * Returns the number of dram channels 8900 */ 8901 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 8902 { 8903 u32 tmp = RREG32(MC_SHARED_CHMAP); 8904 8905 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 8906 case 0: 8907 default: 8908 return 1; 8909 case 1: 8910 return 2; 8911 case 2: 8912 return 4; 8913 case 3: 8914 return 8; 8915 case 4: 8916 return 3; 8917 case 5: 8918 return 6; 8919 case 6: 8920 return 10; 8921 case 7: 8922 return 12; 8923 case 8: 8924 return 16; 8925 } 8926 } 8927 8928 struct dce8_wm_params { 8929 u32 dram_channels; /* number of dram channels */ 8930 u32 yclk; /* bandwidth per dram data pin in kHz */ 8931 u32 sclk; /* engine clock in kHz */ 8932 u32 disp_clk; /* display clock in kHz */ 8933 u32 src_width; /* viewport width */ 8934 u32 active_time; /* active display time in ns */ 8935 u32 blank_time; /* blank time in ns */ 8936 bool interlaced; /* mode is interlaced */ 8937 fixed20_12 vsc; /* vertical scale ratio */ 8938 u32 num_heads; /* number of active crtcs */ 8939 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 8940 u32 lb_size; /* line buffer allocated to pipe */ 8941 u32 vtaps; /* vertical scaler taps */ 8942 }; 8943 8944 /** 8945 * dce8_dram_bandwidth - get the dram bandwidth 8946 * 8947 * @wm: watermark calculation data 8948 * 8949 * Calculate the raw dram bandwidth (CIK). 8950 * Used for display watermark bandwidth calculations 8951 * Returns the dram bandwidth in MBytes/s 8952 */ 8953 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 8954 { 8955 /* Calculate raw DRAM Bandwidth */ 8956 fixed20_12 dram_efficiency; /* 0.7 */ 8957 fixed20_12 yclk, dram_channels, bandwidth; 8958 fixed20_12 a; 8959 8960 a.full = dfixed_const(1000); 8961 yclk.full = dfixed_const(wm->yclk); 8962 yclk.full = dfixed_div(yclk, a); 8963 dram_channels.full = dfixed_const(wm->dram_channels * 4); 8964 a.full = dfixed_const(10); 8965 dram_efficiency.full = dfixed_const(7); 8966 dram_efficiency.full = dfixed_div(dram_efficiency, a); 8967 bandwidth.full = dfixed_mul(dram_channels, yclk); 8968 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 8969 8970 return dfixed_trunc(bandwidth); 8971 } 8972 8973 /** 8974 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 8975 * 8976 * @wm: watermark calculation data 8977 * 8978 * Calculate the dram bandwidth used for display (CIK). 8979 * Used for display watermark bandwidth calculations 8980 * Returns the dram bandwidth for display in MBytes/s 8981 */ 8982 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 8983 { 8984 /* Calculate DRAM Bandwidth and the part allocated to display. */ 8985 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 8986 fixed20_12 yclk, dram_channels, bandwidth; 8987 fixed20_12 a; 8988 8989 a.full = dfixed_const(1000); 8990 yclk.full = dfixed_const(wm->yclk); 8991 yclk.full = dfixed_div(yclk, a); 8992 dram_channels.full = dfixed_const(wm->dram_channels * 4); 8993 a.full = dfixed_const(10); 8994 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 8995 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 8996 bandwidth.full = dfixed_mul(dram_channels, yclk); 8997 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 8998 8999 return dfixed_trunc(bandwidth); 9000 } 9001 9002 /** 9003 * dce8_data_return_bandwidth - get the data return bandwidth 9004 * 9005 * @wm: watermark calculation data 9006 * 9007 * Calculate the data return bandwidth used for display (CIK). 9008 * Used for display watermark bandwidth calculations 9009 * Returns the data return bandwidth in MBytes/s 9010 */ 9011 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 9012 { 9013 /* Calculate the display Data return Bandwidth */ 9014 fixed20_12 return_efficiency; /* 0.8 */ 9015 fixed20_12 sclk, bandwidth; 9016 fixed20_12 a; 9017 9018 a.full = dfixed_const(1000); 9019 sclk.full = dfixed_const(wm->sclk); 9020 sclk.full = dfixed_div(sclk, a); 9021 a.full = dfixed_const(10); 9022 return_efficiency.full = dfixed_const(8); 9023 return_efficiency.full = dfixed_div(return_efficiency, a); 9024 a.full = dfixed_const(32); 9025 bandwidth.full = dfixed_mul(a, sclk); 9026 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 9027 9028 return dfixed_trunc(bandwidth); 9029 } 9030 9031 /** 9032 * dce8_dmif_request_bandwidth - get the dmif bandwidth 9033 * 9034 * @wm: watermark calculation data 9035 * 9036 * Calculate the dmif bandwidth used for display (CIK). 9037 * Used for display watermark bandwidth calculations 9038 * Returns the dmif bandwidth in MBytes/s 9039 */ 9040 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 9041 { 9042 /* Calculate the DMIF Request Bandwidth */ 9043 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 9044 fixed20_12 disp_clk, bandwidth; 9045 fixed20_12 a, b; 9046 9047 a.full = dfixed_const(1000); 9048 disp_clk.full = dfixed_const(wm->disp_clk); 9049 disp_clk.full = dfixed_div(disp_clk, a); 9050 a.full = dfixed_const(32); 9051 b.full = dfixed_mul(a, disp_clk); 9052 9053 a.full = dfixed_const(10); 9054 disp_clk_request_efficiency.full = dfixed_const(8); 9055 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 9056 9057 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 9058 9059 return dfixed_trunc(bandwidth); 9060 } 9061 9062 /** 9063 * dce8_available_bandwidth - get the min available bandwidth 9064 * 9065 * @wm: watermark calculation data 9066 * 9067 * Calculate the min available bandwidth used for display (CIK). 9068 * Used for display watermark bandwidth calculations 9069 * Returns the min available bandwidth in MBytes/s 9070 */ 9071 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 9072 { 9073 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 9074 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 9075 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 9076 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 9077 9078 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 9079 } 9080 9081 /** 9082 * dce8_average_bandwidth - get the average available bandwidth 9083 * 9084 * @wm: watermark calculation data 9085 * 9086 * Calculate the average available bandwidth used for display (CIK). 9087 * Used for display watermark bandwidth calculations 9088 * Returns the average available bandwidth in MBytes/s 9089 */ 9090 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 9091 { 9092 /* Calculate the display mode Average Bandwidth 9093 * DisplayMode should contain the source and destination dimensions, 9094 * timing, etc. 9095 */ 9096 fixed20_12 bpp; 9097 fixed20_12 line_time; 9098 fixed20_12 src_width; 9099 fixed20_12 bandwidth; 9100 fixed20_12 a; 9101 9102 a.full = dfixed_const(1000); 9103 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 9104 line_time.full = dfixed_div(line_time, a); 9105 bpp.full = dfixed_const(wm->bytes_per_pixel); 9106 src_width.full = dfixed_const(wm->src_width); 9107 bandwidth.full = dfixed_mul(src_width, bpp); 9108 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 9109 bandwidth.full = dfixed_div(bandwidth, line_time); 9110 9111 return dfixed_trunc(bandwidth); 9112 } 9113 9114 /** 9115 * dce8_latency_watermark - get the latency watermark 9116 * 9117 * @wm: watermark calculation data 9118 * 9119 * Calculate the latency watermark (CIK). 9120 * Used for display watermark bandwidth calculations 9121 * Returns the latency watermark in ns 9122 */ 9123 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 9124 { 9125 /* First calculate the latency in ns */ 9126 u32 mc_latency = 2000; /* 2000 ns. */ 9127 u32 available_bandwidth = dce8_available_bandwidth(wm); 9128 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 9129 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 9130 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 9131 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 9132 (wm->num_heads * cursor_line_pair_return_time); 9133 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 9134 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 9135 u32 tmp, dmif_size = 12288; 9136 fixed20_12 a, b, c; 9137 9138 if (wm->num_heads == 0) 9139 return 0; 9140 9141 a.full = dfixed_const(2); 9142 b.full = dfixed_const(1); 9143 if ((wm->vsc.full > a.full) || 9144 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 9145 (wm->vtaps >= 5) || 9146 ((wm->vsc.full >= a.full) && wm->interlaced)) 9147 max_src_lines_per_dst_line = 4; 9148 else 9149 max_src_lines_per_dst_line = 2; 9150 9151 a.full = dfixed_const(available_bandwidth); 9152 b.full = dfixed_const(wm->num_heads); 9153 a.full = dfixed_div(a, b); 9154 9155 b.full = dfixed_const(mc_latency + 512); 9156 c.full = dfixed_const(wm->disp_clk); 9157 b.full = dfixed_div(b, c); 9158 9159 c.full = dfixed_const(dmif_size); 9160 b.full = dfixed_div(c, b); 9161 9162 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 9163 9164 b.full = dfixed_const(1000); 9165 c.full = dfixed_const(wm->disp_clk); 9166 b.full = dfixed_div(c, b); 9167 c.full = dfixed_const(wm->bytes_per_pixel); 9168 b.full = dfixed_mul(b, c); 9169 9170 lb_fill_bw = min(tmp, dfixed_trunc(b)); 9171 9172 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 9173 b.full = dfixed_const(1000); 9174 c.full = dfixed_const(lb_fill_bw); 9175 b.full = dfixed_div(c, b); 9176 a.full = dfixed_div(a, b); 9177 line_fill_time = dfixed_trunc(a); 9178 9179 if (line_fill_time < wm->active_time) 9180 return latency; 9181 else 9182 return latency + (line_fill_time - wm->active_time); 9183 9184 } 9185 9186 /** 9187 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 9188 * average and available dram bandwidth 9189 * 9190 * @wm: watermark calculation data 9191 * 9192 * Check if the display average bandwidth fits in the display 9193 * dram bandwidth (CIK). 9194 * Used for display watermark bandwidth calculations 9195 * Returns true if the display fits, false if not. 9196 */ 9197 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 9198 { 9199 if (dce8_average_bandwidth(wm) <= 9200 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 9201 return true; 9202 else 9203 return false; 9204 } 9205 9206 /** 9207 * dce8_average_bandwidth_vs_available_bandwidth - check 9208 * average and available bandwidth 9209 * 9210 * @wm: watermark calculation data 9211 * 9212 * Check if the display average bandwidth fits in the display 9213 * available bandwidth (CIK). 9214 * Used for display watermark bandwidth calculations 9215 * Returns true if the display fits, false if not. 9216 */ 9217 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 9218 { 9219 if (dce8_average_bandwidth(wm) <= 9220 (dce8_available_bandwidth(wm) / wm->num_heads)) 9221 return true; 9222 else 9223 return false; 9224 } 9225 9226 /** 9227 * dce8_check_latency_hiding - check latency hiding 9228 * 9229 * @wm: watermark calculation data 9230 * 9231 * Check latency hiding (CIK). 9232 * Used for display watermark bandwidth calculations 9233 * Returns true if the display fits, false if not. 9234 */ 9235 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 9236 { 9237 u32 lb_partitions = wm->lb_size / wm->src_width; 9238 u32 line_time = wm->active_time + wm->blank_time; 9239 u32 latency_tolerant_lines; 9240 u32 latency_hiding; 9241 fixed20_12 a; 9242 9243 a.full = dfixed_const(1); 9244 if (wm->vsc.full > a.full) 9245 latency_tolerant_lines = 1; 9246 else { 9247 if (lb_partitions <= (wm->vtaps + 1)) 9248 latency_tolerant_lines = 1; 9249 else 9250 latency_tolerant_lines = 2; 9251 } 9252 9253 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 9254 9255 if (dce8_latency_watermark(wm) <= latency_hiding) 9256 return true; 9257 else 9258 return false; 9259 } 9260 9261 /** 9262 * dce8_program_watermarks - program display watermarks 9263 * 9264 * @rdev: radeon_device pointer 9265 * @radeon_crtc: the selected display controller 9266 * @lb_size: line buffer size 9267 * @num_heads: number of display controllers in use 9268 * 9269 * Calculate and program the display watermarks for the 9270 * selected display controller (CIK). 9271 */ 9272 static void dce8_program_watermarks(struct radeon_device *rdev, 9273 struct radeon_crtc *radeon_crtc, 9274 u32 lb_size, u32 num_heads) 9275 { 9276 struct drm_display_mode *mode = &radeon_crtc->base.mode; 9277 struct dce8_wm_params wm_low, wm_high; 9278 u32 pixel_period; 9279 u32 line_time = 0; 9280 u32 latency_watermark_a = 0, latency_watermark_b = 0; 9281 u32 tmp, wm_mask; 9282 9283 if (radeon_crtc->base.enabled && num_heads && mode) { 9284 pixel_period = 1000000 / (u32)mode->clock; 9285 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 9286 9287 /* watermark for high clocks */ 9288 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 9289 rdev->pm.dpm_enabled) { 9290 wm_high.yclk = 9291 radeon_dpm_get_mclk(rdev, false) * 10; 9292 wm_high.sclk = 9293 radeon_dpm_get_sclk(rdev, false) * 10; 9294 } else { 9295 wm_high.yclk = rdev->pm.current_mclk * 10; 9296 wm_high.sclk = rdev->pm.current_sclk * 10; 9297 } 9298 9299 wm_high.disp_clk = mode->clock; 9300 wm_high.src_width = mode->crtc_hdisplay; 9301 wm_high.active_time = mode->crtc_hdisplay * pixel_period; 9302 wm_high.blank_time = line_time - wm_high.active_time; 9303 wm_high.interlaced = false; 9304 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 9305 wm_high.interlaced = true; 9306 wm_high.vsc = radeon_crtc->vsc; 9307 wm_high.vtaps = 1; 9308 if (radeon_crtc->rmx_type != RMX_OFF) 9309 wm_high.vtaps = 2; 9310 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ 9311 wm_high.lb_size = lb_size; 9312 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); 9313 wm_high.num_heads = num_heads; 9314 9315 /* set for high clocks */ 9316 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); 9317 9318 /* possibly force display priority to high */ 9319 /* should really do this at mode validation time... */ 9320 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || 9321 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || 9322 !dce8_check_latency_hiding(&wm_high) || 9323 (rdev->disp_priority == 2)) { 9324 DRM_DEBUG_KMS("force priority to high\n"); 9325 } 9326 9327 /* watermark for low clocks */ 9328 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 9329 rdev->pm.dpm_enabled) { 9330 wm_low.yclk = 9331 radeon_dpm_get_mclk(rdev, true) * 10; 9332 wm_low.sclk = 9333 radeon_dpm_get_sclk(rdev, true) * 10; 9334 } else { 9335 wm_low.yclk = rdev->pm.current_mclk * 10; 9336 wm_low.sclk = rdev->pm.current_sclk * 10; 9337 } 9338 9339 wm_low.disp_clk = mode->clock; 9340 wm_low.src_width = mode->crtc_hdisplay; 9341 wm_low.active_time = mode->crtc_hdisplay * pixel_period; 9342 wm_low.blank_time = line_time - wm_low.active_time; 9343 wm_low.interlaced = false; 9344 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 9345 wm_low.interlaced = true; 9346 wm_low.vsc = radeon_crtc->vsc; 9347 wm_low.vtaps = 1; 9348 if (radeon_crtc->rmx_type != RMX_OFF) 9349 wm_low.vtaps = 2; 9350 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ 9351 wm_low.lb_size = lb_size; 9352 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); 9353 wm_low.num_heads = num_heads; 9354 9355 /* set for low clocks */ 9356 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); 9357 9358 /* possibly force display priority to high */ 9359 /* should really do this at mode validation time... */ 9360 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || 9361 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || 9362 !dce8_check_latency_hiding(&wm_low) || 9363 (rdev->disp_priority == 2)) { 9364 DRM_DEBUG_KMS("force priority to high\n"); 9365 } 9366 9367 /* Save number of lines the linebuffer leads before the scanout */ 9368 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); 9369 } 9370 9371 /* select wm A */ 9372 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 9373 tmp = wm_mask; 9374 tmp &= ~LATENCY_WATERMARK_MASK(3); 9375 tmp |= LATENCY_WATERMARK_MASK(1); 9376 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 9377 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 9378 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 9379 LATENCY_HIGH_WATERMARK(line_time))); 9380 /* select wm B */ 9381 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 9382 tmp &= ~LATENCY_WATERMARK_MASK(3); 9383 tmp |= LATENCY_WATERMARK_MASK(2); 9384 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 9385 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 9386 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 9387 LATENCY_HIGH_WATERMARK(line_time))); 9388 /* restore original selection */ 9389 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 9390 9391 /* save values for DPM */ 9392 radeon_crtc->line_time = line_time; 9393 radeon_crtc->wm_high = latency_watermark_a; 9394 radeon_crtc->wm_low = latency_watermark_b; 9395 } 9396 9397 /** 9398 * dce8_bandwidth_update - program display watermarks 9399 * 9400 * @rdev: radeon_device pointer 9401 * 9402 * Calculate and program the display watermarks and line 9403 * buffer allocation (CIK). 9404 */ 9405 void dce8_bandwidth_update(struct radeon_device *rdev) 9406 { 9407 struct drm_display_mode *mode = NULL; 9408 u32 num_heads = 0, lb_size; 9409 int i; 9410 9411 if (!rdev->mode_info.mode_config_initialized) 9412 return; 9413 9414 radeon_update_display_priority(rdev); 9415 9416 for (i = 0; i < rdev->num_crtc; i++) { 9417 if (rdev->mode_info.crtcs[i]->base.enabled) 9418 num_heads++; 9419 } 9420 for (i = 0; i < rdev->num_crtc; i++) { 9421 mode = &rdev->mode_info.crtcs[i]->base.mode; 9422 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 9423 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 9424 } 9425 } 9426 9427 /** 9428 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 9429 * 9430 * @rdev: radeon_device pointer 9431 * 9432 * Fetches a GPU clock counter snapshot (SI). 9433 * Returns the 64 bit clock counter snapshot. 9434 */ 9435 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 9436 { 9437 uint64_t clock; 9438 9439 mutex_lock(&rdev->gpu_clock_mutex); 9440 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 9441 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 9442 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 9443 mutex_unlock(&rdev->gpu_clock_mutex); 9444 return clock; 9445 } 9446 9447 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 9448 u32 cntl_reg, u32 status_reg) 9449 { 9450 int r, i; 9451 struct atom_clock_dividers dividers; 9452 uint32_t tmp; 9453 9454 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 9455 clock, false, ÷rs); 9456 if (r) 9457 return r; 9458 9459 tmp = RREG32_SMC(cntl_reg); 9460 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 9461 tmp |= dividers.post_divider; 9462 WREG32_SMC(cntl_reg, tmp); 9463 9464 for (i = 0; i < 100; i++) { 9465 if (RREG32_SMC(status_reg) & DCLK_STATUS) 9466 break; 9467 mdelay(10); 9468 } 9469 if (i == 100) 9470 return -ETIMEDOUT; 9471 9472 return 0; 9473 } 9474 9475 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 9476 { 9477 int r = 0; 9478 9479 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 9480 if (r) 9481 return r; 9482 9483 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 9484 return r; 9485 } 9486 9487 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) 9488 { 9489 int r, i; 9490 struct atom_clock_dividers dividers; 9491 u32 tmp; 9492 9493 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 9494 ecclk, false, ÷rs); 9495 if (r) 9496 return r; 9497 9498 for (i = 0; i < 100; i++) { 9499 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) 9500 break; 9501 mdelay(10); 9502 } 9503 if (i == 100) 9504 return -ETIMEDOUT; 9505 9506 tmp = RREG32_SMC(CG_ECLK_CNTL); 9507 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); 9508 tmp |= dividers.post_divider; 9509 WREG32_SMC(CG_ECLK_CNTL, tmp); 9510 9511 for (i = 0; i < 100; i++) { 9512 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) 9513 break; 9514 mdelay(10); 9515 } 9516 if (i == 100) 9517 return -ETIMEDOUT; 9518 9519 return 0; 9520 } 9521 9522 static void cik_pcie_gen3_enable(struct radeon_device *rdev) 9523 { 9524 struct pci_dev *root = rdev->pdev->bus->self; 9525 int bridge_pos, gpu_pos; 9526 u32 speed_cntl, mask, current_data_rate; 9527 int ret, i; 9528 u16 tmp16; 9529 9530 #if 0 9531 if (pci_is_root_bus(rdev->pdev->bus)) 9532 return; 9533 #endif 9534 9535 if (radeon_pcie_gen2 == 0) 9536 return; 9537 9538 if (rdev->flags & RADEON_IS_IGP) 9539 return; 9540 9541 if (!(rdev->flags & RADEON_IS_PCIE)) 9542 return; 9543 9544 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 9545 if (ret != 0) 9546 return; 9547 9548 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) 9549 return; 9550 9551 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 9552 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 9553 LC_CURRENT_DATA_RATE_SHIFT; 9554 if (mask & DRM_PCIE_SPEED_80) { 9555 if (current_data_rate == 2) { 9556 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 9557 return; 9558 } 9559 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); 9560 } else if (mask & DRM_PCIE_SPEED_50) { 9561 if (current_data_rate == 1) { 9562 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 9563 return; 9564 } 9565 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 9566 } 9567 9568 bridge_pos = pci_pcie_cap(root); 9569 if (!bridge_pos) 9570 return; 9571 9572 gpu_pos = pci_pcie_cap(rdev->pdev); 9573 if (!gpu_pos) 9574 return; 9575 9576 if (mask & DRM_PCIE_SPEED_80) { 9577 /* re-try equalization if gen3 is not already enabled */ 9578 if (current_data_rate != 2) { 9579 u16 bridge_cfg, gpu_cfg; 9580 u16 bridge_cfg2, gpu_cfg2; 9581 u32 max_lw, current_lw, tmp; 9582 9583 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 9584 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 9585 9586 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; 9587 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 9588 9589 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; 9590 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 9591 9592 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 9593 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; 9594 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; 9595 9596 if (current_lw < max_lw) { 9597 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 9598 if (tmp & LC_RENEGOTIATION_SUPPORT) { 9599 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); 9600 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); 9601 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; 9602 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); 9603 } 9604 } 9605 9606 for (i = 0; i < 10; i++) { 9607 /* check status */ 9608 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); 9609 if (tmp16 & PCI_EXP_DEVSTA_TRPND) 9610 break; 9611 9612 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 9613 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 9614 9615 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); 9616 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); 9617 9618 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 9619 tmp |= LC_SET_QUIESCE; 9620 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 9621 9622 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 9623 tmp |= LC_REDO_EQ; 9624 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 9625 9626 mdelay(100); 9627 9628 /* linkctl */ 9629 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); 9630 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 9631 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); 9632 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 9633 9634 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); 9635 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 9636 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); 9637 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 9638 9639 /* linkctl2 */ 9640 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); 9641 tmp16 &= ~((1 << 4) | (7 << 9)); 9642 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); 9643 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); 9644 9645 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 9646 tmp16 &= ~((1 << 4) | (7 << 9)); 9647 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); 9648 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 9649 9650 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 9651 tmp &= ~LC_SET_QUIESCE; 9652 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 9653 } 9654 } 9655 } 9656 9657 /* set the link speed */ 9658 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; 9659 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; 9660 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 9661 9662 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 9663 tmp16 &= ~0xf; 9664 if (mask & DRM_PCIE_SPEED_80) 9665 tmp16 |= 3; /* gen3 */ 9666 else if (mask & DRM_PCIE_SPEED_50) 9667 tmp16 |= 2; /* gen2 */ 9668 else 9669 tmp16 |= 1; /* gen1 */ 9670 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 9671 9672 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 9673 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; 9674 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 9675 9676 for (i = 0; i < rdev->usec_timeout; i++) { 9677 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 9678 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) 9679 break; 9680 udelay(1); 9681 } 9682 } 9683 9684 static void cik_program_aspm(struct radeon_device *rdev) 9685 { 9686 u32 data, orig; 9687 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; 9688 #if 0 9689 bool disable_clkreq = false; 9690 #endif 9691 9692 if (radeon_aspm == 0) 9693 return; 9694 9695 /* XXX double check IGPs */ 9696 if (rdev->flags & RADEON_IS_IGP) 9697 return; 9698 9699 if (!(rdev->flags & RADEON_IS_PCIE)) 9700 return; 9701 9702 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 9703 data &= ~LC_XMIT_N_FTS_MASK; 9704 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; 9705 if (orig != data) 9706 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); 9707 9708 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); 9709 data |= LC_GO_TO_RECOVERY; 9710 if (orig != data) 9711 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); 9712 9713 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); 9714 data |= P_IGNORE_EDB_ERR; 9715 if (orig != data) 9716 WREG32_PCIE_PORT(PCIE_P_CNTL, data); 9717 9718 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 9719 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); 9720 data |= LC_PMI_TO_L1_DIS; 9721 if (!disable_l0s) 9722 data |= LC_L0S_INACTIVITY(7); 9723 9724 if (!disable_l1) { 9725 data |= LC_L1_INACTIVITY(7); 9726 data &= ~LC_PMI_TO_L1_DIS; 9727 if (orig != data) 9728 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 9729 9730 if (!disable_plloff_in_l1) { 9731 bool clk_req_support; 9732 9733 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); 9734 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 9735 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 9736 if (orig != data) 9737 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); 9738 9739 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); 9740 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 9741 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 9742 if (orig != data) 9743 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); 9744 9745 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); 9746 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 9747 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 9748 if (orig != data) 9749 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); 9750 9751 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); 9752 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 9753 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 9754 if (orig != data) 9755 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); 9756 9757 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 9758 data &= ~LC_DYN_LANES_PWR_STATE_MASK; 9759 data |= LC_DYN_LANES_PWR_STATE(3); 9760 if (orig != data) 9761 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); 9762 9763 #ifdef zMN_TODO 9764 if (!disable_clkreq && 9765 !pci_is_root_bus(rdev->pdev->bus)) { 9766 struct pci_dev *root = rdev->pdev->bus->self; 9767 u32 lnkcap; 9768 9769 clk_req_support = false; 9770 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); 9771 if (lnkcap & PCI_EXP_LNKCAP_CLKPM) 9772 clk_req_support = true; 9773 } else { 9774 clk_req_support = false; 9775 } 9776 #else 9777 clk_req_support = false; 9778 #endif 9779 9780 if (clk_req_support) { 9781 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); 9782 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; 9783 if (orig != data) 9784 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); 9785 9786 orig = data = RREG32_SMC(THM_CLK_CNTL); 9787 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); 9788 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); 9789 if (orig != data) 9790 WREG32_SMC(THM_CLK_CNTL, data); 9791 9792 orig = data = RREG32_SMC(MISC_CLK_CTRL); 9793 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); 9794 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); 9795 if (orig != data) 9796 WREG32_SMC(MISC_CLK_CTRL, data); 9797 9798 orig = data = RREG32_SMC(CG_CLKPIN_CNTL); 9799 data &= ~BCLK_AS_XCLK; 9800 if (orig != data) 9801 WREG32_SMC(CG_CLKPIN_CNTL, data); 9802 9803 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); 9804 data &= ~FORCE_BIF_REFCLK_EN; 9805 if (orig != data) 9806 WREG32_SMC(CG_CLKPIN_CNTL_2, data); 9807 9808 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); 9809 data &= ~MPLL_CLKOUT_SEL_MASK; 9810 data |= MPLL_CLKOUT_SEL(4); 9811 if (orig != data) 9812 WREG32_SMC(MPLL_BYPASSCLK_SEL, data); 9813 } 9814 } 9815 } else { 9816 if (orig != data) 9817 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 9818 } 9819 9820 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 9821 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; 9822 if (orig != data) 9823 WREG32_PCIE_PORT(PCIE_CNTL2, data); 9824 9825 if (!disable_l0s) { 9826 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 9827 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { 9828 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 9829 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { 9830 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 9831 data &= ~LC_L0S_INACTIVITY_MASK; 9832 if (orig != data) 9833 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 9834 } 9835 } 9836 } 9837 } 9838