1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include <drm/drmP.h> 27 #include "radeon.h" 28 #include "radeon_asic.h" 29 #include "cikd.h" 30 #include "atom.h" 31 #include "cik_blit_shaders.h" 32 #include "radeon_ucode.h" 33 #include "clearstate_ci.h" 34 35 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 36 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 37 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 38 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 39 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 40 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin"); 41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin"); 44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin"); 45 MODULE_FIRMWARE("radeon/HAWAII_me.bin"); 46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin"); 47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin"); 48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin"); 49 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin"); 50 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin"); 51 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin"); 52 MODULE_FIRMWARE("radeon/HAWAII_smc.bin"); 53 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 54 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 55 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 56 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 57 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 58 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 59 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 60 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 61 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 62 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 63 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 64 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 65 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin"); 66 MODULE_FIRMWARE("radeon/MULLINS_me.bin"); 67 MODULE_FIRMWARE("radeon/MULLINS_ce.bin"); 68 MODULE_FIRMWARE("radeon/MULLINS_mec.bin"); 69 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin"); 70 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin"); 71 72 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh); 73 static void cik_rlc_stop(struct radeon_device *rdev); 74 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 75 static void cik_program_aspm(struct radeon_device *rdev); 76 static void cik_init_pg(struct radeon_device *rdev); 77 static void cik_init_cg(struct radeon_device *rdev); 78 static void cik_fini_pg(struct radeon_device *rdev); 79 static void cik_fini_cg(struct radeon_device *rdev); 80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 81 bool enable); 82 83 /* get temperature in millidegrees */ 84 int ci_get_temp(struct radeon_device *rdev) 85 { 86 u32 temp; 87 int actual_temp = 0; 88 89 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 90 CTF_TEMP_SHIFT; 91 92 if (temp & 0x200) 93 actual_temp = 255; 94 else 95 actual_temp = temp & 0x1ff; 96 97 actual_temp = actual_temp * 1000; 98 99 return actual_temp; 100 } 101 102 /* get temperature in millidegrees */ 103 int kv_get_temp(struct radeon_device *rdev) 104 { 105 u32 temp; 106 int actual_temp = 0; 107 108 temp = RREG32_SMC(0xC0300E0C); 109 110 if (temp) 111 actual_temp = (temp / 8) - 49; 112 else 113 actual_temp = 0; 114 115 actual_temp = actual_temp * 1000; 116 117 return actual_temp; 118 } 119 120 /* 121 * Indirect registers accessor 122 */ 123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 124 { 125 u32 r; 126 127 spin_lock(&rdev->pciep_idx_lock); 128 WREG32(PCIE_INDEX, reg); 129 (void)RREG32(PCIE_INDEX); 130 r = RREG32(PCIE_DATA); 131 spin_unlock(&rdev->pciep_idx_lock); 132 return r; 133 } 134 135 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 136 { 137 spin_lock(&rdev->pciep_idx_lock); 138 WREG32(PCIE_INDEX, reg); 139 (void)RREG32(PCIE_INDEX); 140 WREG32(PCIE_DATA, v); 141 (void)RREG32(PCIE_DATA); 142 spin_unlock(&rdev->pciep_idx_lock); 143 } 144 145 static const u32 spectre_rlc_save_restore_register_list[] = 146 { 147 (0x0e00 << 16) | (0xc12c >> 2), 148 0x00000000, 149 (0x0e00 << 16) | (0xc140 >> 2), 150 0x00000000, 151 (0x0e00 << 16) | (0xc150 >> 2), 152 0x00000000, 153 (0x0e00 << 16) | (0xc15c >> 2), 154 0x00000000, 155 (0x0e00 << 16) | (0xc168 >> 2), 156 0x00000000, 157 (0x0e00 << 16) | (0xc170 >> 2), 158 0x00000000, 159 (0x0e00 << 16) | (0xc178 >> 2), 160 0x00000000, 161 (0x0e00 << 16) | (0xc204 >> 2), 162 0x00000000, 163 (0x0e00 << 16) | (0xc2b4 >> 2), 164 0x00000000, 165 (0x0e00 << 16) | (0xc2b8 >> 2), 166 0x00000000, 167 (0x0e00 << 16) | (0xc2bc >> 2), 168 0x00000000, 169 (0x0e00 << 16) | (0xc2c0 >> 2), 170 0x00000000, 171 (0x0e00 << 16) | (0x8228 >> 2), 172 0x00000000, 173 (0x0e00 << 16) | (0x829c >> 2), 174 0x00000000, 175 (0x0e00 << 16) | (0x869c >> 2), 176 0x00000000, 177 (0x0600 << 16) | (0x98f4 >> 2), 178 0x00000000, 179 (0x0e00 << 16) | (0x98f8 >> 2), 180 0x00000000, 181 (0x0e00 << 16) | (0x9900 >> 2), 182 0x00000000, 183 (0x0e00 << 16) | (0xc260 >> 2), 184 0x00000000, 185 (0x0e00 << 16) | (0x90e8 >> 2), 186 0x00000000, 187 (0x0e00 << 16) | (0x3c000 >> 2), 188 0x00000000, 189 (0x0e00 << 16) | (0x3c00c >> 2), 190 0x00000000, 191 (0x0e00 << 16) | (0x8c1c >> 2), 192 0x00000000, 193 (0x0e00 << 16) | (0x9700 >> 2), 194 0x00000000, 195 (0x0e00 << 16) | (0xcd20 >> 2), 196 0x00000000, 197 (0x4e00 << 16) | (0xcd20 >> 2), 198 0x00000000, 199 (0x5e00 << 16) | (0xcd20 >> 2), 200 0x00000000, 201 (0x6e00 << 16) | (0xcd20 >> 2), 202 0x00000000, 203 (0x7e00 << 16) | (0xcd20 >> 2), 204 0x00000000, 205 (0x8e00 << 16) | (0xcd20 >> 2), 206 0x00000000, 207 (0x9e00 << 16) | (0xcd20 >> 2), 208 0x00000000, 209 (0xae00 << 16) | (0xcd20 >> 2), 210 0x00000000, 211 (0xbe00 << 16) | (0xcd20 >> 2), 212 0x00000000, 213 (0x0e00 << 16) | (0x89bc >> 2), 214 0x00000000, 215 (0x0e00 << 16) | (0x8900 >> 2), 216 0x00000000, 217 0x3, 218 (0x0e00 << 16) | (0xc130 >> 2), 219 0x00000000, 220 (0x0e00 << 16) | (0xc134 >> 2), 221 0x00000000, 222 (0x0e00 << 16) | (0xc1fc >> 2), 223 0x00000000, 224 (0x0e00 << 16) | (0xc208 >> 2), 225 0x00000000, 226 (0x0e00 << 16) | (0xc264 >> 2), 227 0x00000000, 228 (0x0e00 << 16) | (0xc268 >> 2), 229 0x00000000, 230 (0x0e00 << 16) | (0xc26c >> 2), 231 0x00000000, 232 (0x0e00 << 16) | (0xc270 >> 2), 233 0x00000000, 234 (0x0e00 << 16) | (0xc274 >> 2), 235 0x00000000, 236 (0x0e00 << 16) | (0xc278 >> 2), 237 0x00000000, 238 (0x0e00 << 16) | (0xc27c >> 2), 239 0x00000000, 240 (0x0e00 << 16) | (0xc280 >> 2), 241 0x00000000, 242 (0x0e00 << 16) | (0xc284 >> 2), 243 0x00000000, 244 (0x0e00 << 16) | (0xc288 >> 2), 245 0x00000000, 246 (0x0e00 << 16) | (0xc28c >> 2), 247 0x00000000, 248 (0x0e00 << 16) | (0xc290 >> 2), 249 0x00000000, 250 (0x0e00 << 16) | (0xc294 >> 2), 251 0x00000000, 252 (0x0e00 << 16) | (0xc298 >> 2), 253 0x00000000, 254 (0x0e00 << 16) | (0xc29c >> 2), 255 0x00000000, 256 (0x0e00 << 16) | (0xc2a0 >> 2), 257 0x00000000, 258 (0x0e00 << 16) | (0xc2a4 >> 2), 259 0x00000000, 260 (0x0e00 << 16) | (0xc2a8 >> 2), 261 0x00000000, 262 (0x0e00 << 16) | (0xc2ac >> 2), 263 0x00000000, 264 (0x0e00 << 16) | (0xc2b0 >> 2), 265 0x00000000, 266 (0x0e00 << 16) | (0x301d0 >> 2), 267 0x00000000, 268 (0x0e00 << 16) | (0x30238 >> 2), 269 0x00000000, 270 (0x0e00 << 16) | (0x30250 >> 2), 271 0x00000000, 272 (0x0e00 << 16) | (0x30254 >> 2), 273 0x00000000, 274 (0x0e00 << 16) | (0x30258 >> 2), 275 0x00000000, 276 (0x0e00 << 16) | (0x3025c >> 2), 277 0x00000000, 278 (0x4e00 << 16) | (0xc900 >> 2), 279 0x00000000, 280 (0x5e00 << 16) | (0xc900 >> 2), 281 0x00000000, 282 (0x6e00 << 16) | (0xc900 >> 2), 283 0x00000000, 284 (0x7e00 << 16) | (0xc900 >> 2), 285 0x00000000, 286 (0x8e00 << 16) | (0xc900 >> 2), 287 0x00000000, 288 (0x9e00 << 16) | (0xc900 >> 2), 289 0x00000000, 290 (0xae00 << 16) | (0xc900 >> 2), 291 0x00000000, 292 (0xbe00 << 16) | (0xc900 >> 2), 293 0x00000000, 294 (0x4e00 << 16) | (0xc904 >> 2), 295 0x00000000, 296 (0x5e00 << 16) | (0xc904 >> 2), 297 0x00000000, 298 (0x6e00 << 16) | (0xc904 >> 2), 299 0x00000000, 300 (0x7e00 << 16) | (0xc904 >> 2), 301 0x00000000, 302 (0x8e00 << 16) | (0xc904 >> 2), 303 0x00000000, 304 (0x9e00 << 16) | (0xc904 >> 2), 305 0x00000000, 306 (0xae00 << 16) | (0xc904 >> 2), 307 0x00000000, 308 (0xbe00 << 16) | (0xc904 >> 2), 309 0x00000000, 310 (0x4e00 << 16) | (0xc908 >> 2), 311 0x00000000, 312 (0x5e00 << 16) | (0xc908 >> 2), 313 0x00000000, 314 (0x6e00 << 16) | (0xc908 >> 2), 315 0x00000000, 316 (0x7e00 << 16) | (0xc908 >> 2), 317 0x00000000, 318 (0x8e00 << 16) | (0xc908 >> 2), 319 0x00000000, 320 (0x9e00 << 16) | (0xc908 >> 2), 321 0x00000000, 322 (0xae00 << 16) | (0xc908 >> 2), 323 0x00000000, 324 (0xbe00 << 16) | (0xc908 >> 2), 325 0x00000000, 326 (0x4e00 << 16) | (0xc90c >> 2), 327 0x00000000, 328 (0x5e00 << 16) | (0xc90c >> 2), 329 0x00000000, 330 (0x6e00 << 16) | (0xc90c >> 2), 331 0x00000000, 332 (0x7e00 << 16) | (0xc90c >> 2), 333 0x00000000, 334 (0x8e00 << 16) | (0xc90c >> 2), 335 0x00000000, 336 (0x9e00 << 16) | (0xc90c >> 2), 337 0x00000000, 338 (0xae00 << 16) | (0xc90c >> 2), 339 0x00000000, 340 (0xbe00 << 16) | (0xc90c >> 2), 341 0x00000000, 342 (0x4e00 << 16) | (0xc910 >> 2), 343 0x00000000, 344 (0x5e00 << 16) | (0xc910 >> 2), 345 0x00000000, 346 (0x6e00 << 16) | (0xc910 >> 2), 347 0x00000000, 348 (0x7e00 << 16) | (0xc910 >> 2), 349 0x00000000, 350 (0x8e00 << 16) | (0xc910 >> 2), 351 0x00000000, 352 (0x9e00 << 16) | (0xc910 >> 2), 353 0x00000000, 354 (0xae00 << 16) | (0xc910 >> 2), 355 0x00000000, 356 (0xbe00 << 16) | (0xc910 >> 2), 357 0x00000000, 358 (0x0e00 << 16) | (0xc99c >> 2), 359 0x00000000, 360 (0x0e00 << 16) | (0x9834 >> 2), 361 0x00000000, 362 (0x0000 << 16) | (0x30f00 >> 2), 363 0x00000000, 364 (0x0001 << 16) | (0x30f00 >> 2), 365 0x00000000, 366 (0x0000 << 16) | (0x30f04 >> 2), 367 0x00000000, 368 (0x0001 << 16) | (0x30f04 >> 2), 369 0x00000000, 370 (0x0000 << 16) | (0x30f08 >> 2), 371 0x00000000, 372 (0x0001 << 16) | (0x30f08 >> 2), 373 0x00000000, 374 (0x0000 << 16) | (0x30f0c >> 2), 375 0x00000000, 376 (0x0001 << 16) | (0x30f0c >> 2), 377 0x00000000, 378 (0x0600 << 16) | (0x9b7c >> 2), 379 0x00000000, 380 (0x0e00 << 16) | (0x8a14 >> 2), 381 0x00000000, 382 (0x0e00 << 16) | (0x8a18 >> 2), 383 0x00000000, 384 (0x0600 << 16) | (0x30a00 >> 2), 385 0x00000000, 386 (0x0e00 << 16) | (0x8bf0 >> 2), 387 0x00000000, 388 (0x0e00 << 16) | (0x8bcc >> 2), 389 0x00000000, 390 (0x0e00 << 16) | (0x8b24 >> 2), 391 0x00000000, 392 (0x0e00 << 16) | (0x30a04 >> 2), 393 0x00000000, 394 (0x0600 << 16) | (0x30a10 >> 2), 395 0x00000000, 396 (0x0600 << 16) | (0x30a14 >> 2), 397 0x00000000, 398 (0x0600 << 16) | (0x30a18 >> 2), 399 0x00000000, 400 (0x0600 << 16) | (0x30a2c >> 2), 401 0x00000000, 402 (0x0e00 << 16) | (0xc700 >> 2), 403 0x00000000, 404 (0x0e00 << 16) | (0xc704 >> 2), 405 0x00000000, 406 (0x0e00 << 16) | (0xc708 >> 2), 407 0x00000000, 408 (0x0e00 << 16) | (0xc768 >> 2), 409 0x00000000, 410 (0x0400 << 16) | (0xc770 >> 2), 411 0x00000000, 412 (0x0400 << 16) | (0xc774 >> 2), 413 0x00000000, 414 (0x0400 << 16) | (0xc778 >> 2), 415 0x00000000, 416 (0x0400 << 16) | (0xc77c >> 2), 417 0x00000000, 418 (0x0400 << 16) | (0xc780 >> 2), 419 0x00000000, 420 (0x0400 << 16) | (0xc784 >> 2), 421 0x00000000, 422 (0x0400 << 16) | (0xc788 >> 2), 423 0x00000000, 424 (0x0400 << 16) | (0xc78c >> 2), 425 0x00000000, 426 (0x0400 << 16) | (0xc798 >> 2), 427 0x00000000, 428 (0x0400 << 16) | (0xc79c >> 2), 429 0x00000000, 430 (0x0400 << 16) | (0xc7a0 >> 2), 431 0x00000000, 432 (0x0400 << 16) | (0xc7a4 >> 2), 433 0x00000000, 434 (0x0400 << 16) | (0xc7a8 >> 2), 435 0x00000000, 436 (0x0400 << 16) | (0xc7ac >> 2), 437 0x00000000, 438 (0x0400 << 16) | (0xc7b0 >> 2), 439 0x00000000, 440 (0x0400 << 16) | (0xc7b4 >> 2), 441 0x00000000, 442 (0x0e00 << 16) | (0x9100 >> 2), 443 0x00000000, 444 (0x0e00 << 16) | (0x3c010 >> 2), 445 0x00000000, 446 (0x0e00 << 16) | (0x92a8 >> 2), 447 0x00000000, 448 (0x0e00 << 16) | (0x92ac >> 2), 449 0x00000000, 450 (0x0e00 << 16) | (0x92b4 >> 2), 451 0x00000000, 452 (0x0e00 << 16) | (0x92b8 >> 2), 453 0x00000000, 454 (0x0e00 << 16) | (0x92bc >> 2), 455 0x00000000, 456 (0x0e00 << 16) | (0x92c0 >> 2), 457 0x00000000, 458 (0x0e00 << 16) | (0x92c4 >> 2), 459 0x00000000, 460 (0x0e00 << 16) | (0x92c8 >> 2), 461 0x00000000, 462 (0x0e00 << 16) | (0x92cc >> 2), 463 0x00000000, 464 (0x0e00 << 16) | (0x92d0 >> 2), 465 0x00000000, 466 (0x0e00 << 16) | (0x8c00 >> 2), 467 0x00000000, 468 (0x0e00 << 16) | (0x8c04 >> 2), 469 0x00000000, 470 (0x0e00 << 16) | (0x8c20 >> 2), 471 0x00000000, 472 (0x0e00 << 16) | (0x8c38 >> 2), 473 0x00000000, 474 (0x0e00 << 16) | (0x8c3c >> 2), 475 0x00000000, 476 (0x0e00 << 16) | (0xae00 >> 2), 477 0x00000000, 478 (0x0e00 << 16) | (0x9604 >> 2), 479 0x00000000, 480 (0x0e00 << 16) | (0xac08 >> 2), 481 0x00000000, 482 (0x0e00 << 16) | (0xac0c >> 2), 483 0x00000000, 484 (0x0e00 << 16) | (0xac10 >> 2), 485 0x00000000, 486 (0x0e00 << 16) | (0xac14 >> 2), 487 0x00000000, 488 (0x0e00 << 16) | (0xac58 >> 2), 489 0x00000000, 490 (0x0e00 << 16) | (0xac68 >> 2), 491 0x00000000, 492 (0x0e00 << 16) | (0xac6c >> 2), 493 0x00000000, 494 (0x0e00 << 16) | (0xac70 >> 2), 495 0x00000000, 496 (0x0e00 << 16) | (0xac74 >> 2), 497 0x00000000, 498 (0x0e00 << 16) | (0xac78 >> 2), 499 0x00000000, 500 (0x0e00 << 16) | (0xac7c >> 2), 501 0x00000000, 502 (0x0e00 << 16) | (0xac80 >> 2), 503 0x00000000, 504 (0x0e00 << 16) | (0xac84 >> 2), 505 0x00000000, 506 (0x0e00 << 16) | (0xac88 >> 2), 507 0x00000000, 508 (0x0e00 << 16) | (0xac8c >> 2), 509 0x00000000, 510 (0x0e00 << 16) | (0x970c >> 2), 511 0x00000000, 512 (0x0e00 << 16) | (0x9714 >> 2), 513 0x00000000, 514 (0x0e00 << 16) | (0x9718 >> 2), 515 0x00000000, 516 (0x0e00 << 16) | (0x971c >> 2), 517 0x00000000, 518 (0x0e00 << 16) | (0x31068 >> 2), 519 0x00000000, 520 (0x4e00 << 16) | (0x31068 >> 2), 521 0x00000000, 522 (0x5e00 << 16) | (0x31068 >> 2), 523 0x00000000, 524 (0x6e00 << 16) | (0x31068 >> 2), 525 0x00000000, 526 (0x7e00 << 16) | (0x31068 >> 2), 527 0x00000000, 528 (0x8e00 << 16) | (0x31068 >> 2), 529 0x00000000, 530 (0x9e00 << 16) | (0x31068 >> 2), 531 0x00000000, 532 (0xae00 << 16) | (0x31068 >> 2), 533 0x00000000, 534 (0xbe00 << 16) | (0x31068 >> 2), 535 0x00000000, 536 (0x0e00 << 16) | (0xcd10 >> 2), 537 0x00000000, 538 (0x0e00 << 16) | (0xcd14 >> 2), 539 0x00000000, 540 (0x0e00 << 16) | (0x88b0 >> 2), 541 0x00000000, 542 (0x0e00 << 16) | (0x88b4 >> 2), 543 0x00000000, 544 (0x0e00 << 16) | (0x88b8 >> 2), 545 0x00000000, 546 (0x0e00 << 16) | (0x88bc >> 2), 547 0x00000000, 548 (0x0400 << 16) | (0x89c0 >> 2), 549 0x00000000, 550 (0x0e00 << 16) | (0x88c4 >> 2), 551 0x00000000, 552 (0x0e00 << 16) | (0x88c8 >> 2), 553 0x00000000, 554 (0x0e00 << 16) | (0x88d0 >> 2), 555 0x00000000, 556 (0x0e00 << 16) | (0x88d4 >> 2), 557 0x00000000, 558 (0x0e00 << 16) | (0x88d8 >> 2), 559 0x00000000, 560 (0x0e00 << 16) | (0x8980 >> 2), 561 0x00000000, 562 (0x0e00 << 16) | (0x30938 >> 2), 563 0x00000000, 564 (0x0e00 << 16) | (0x3093c >> 2), 565 0x00000000, 566 (0x0e00 << 16) | (0x30940 >> 2), 567 0x00000000, 568 (0x0e00 << 16) | (0x89a0 >> 2), 569 0x00000000, 570 (0x0e00 << 16) | (0x30900 >> 2), 571 0x00000000, 572 (0x0e00 << 16) | (0x30904 >> 2), 573 0x00000000, 574 (0x0e00 << 16) | (0x89b4 >> 2), 575 0x00000000, 576 (0x0e00 << 16) | (0x3c210 >> 2), 577 0x00000000, 578 (0x0e00 << 16) | (0x3c214 >> 2), 579 0x00000000, 580 (0x0e00 << 16) | (0x3c218 >> 2), 581 0x00000000, 582 (0x0e00 << 16) | (0x8904 >> 2), 583 0x00000000, 584 0x5, 585 (0x0e00 << 16) | (0x8c28 >> 2), 586 (0x0e00 << 16) | (0x8c2c >> 2), 587 (0x0e00 << 16) | (0x8c30 >> 2), 588 (0x0e00 << 16) | (0x8c34 >> 2), 589 (0x0e00 << 16) | (0x9600 >> 2), 590 }; 591 592 static const u32 kalindi_rlc_save_restore_register_list[] = 593 { 594 (0x0e00 << 16) | (0xc12c >> 2), 595 0x00000000, 596 (0x0e00 << 16) | (0xc140 >> 2), 597 0x00000000, 598 (0x0e00 << 16) | (0xc150 >> 2), 599 0x00000000, 600 (0x0e00 << 16) | (0xc15c >> 2), 601 0x00000000, 602 (0x0e00 << 16) | (0xc168 >> 2), 603 0x00000000, 604 (0x0e00 << 16) | (0xc170 >> 2), 605 0x00000000, 606 (0x0e00 << 16) | (0xc204 >> 2), 607 0x00000000, 608 (0x0e00 << 16) | (0xc2b4 >> 2), 609 0x00000000, 610 (0x0e00 << 16) | (0xc2b8 >> 2), 611 0x00000000, 612 (0x0e00 << 16) | (0xc2bc >> 2), 613 0x00000000, 614 (0x0e00 << 16) | (0xc2c0 >> 2), 615 0x00000000, 616 (0x0e00 << 16) | (0x8228 >> 2), 617 0x00000000, 618 (0x0e00 << 16) | (0x829c >> 2), 619 0x00000000, 620 (0x0e00 << 16) | (0x869c >> 2), 621 0x00000000, 622 (0x0600 << 16) | (0x98f4 >> 2), 623 0x00000000, 624 (0x0e00 << 16) | (0x98f8 >> 2), 625 0x00000000, 626 (0x0e00 << 16) | (0x9900 >> 2), 627 0x00000000, 628 (0x0e00 << 16) | (0xc260 >> 2), 629 0x00000000, 630 (0x0e00 << 16) | (0x90e8 >> 2), 631 0x00000000, 632 (0x0e00 << 16) | (0x3c000 >> 2), 633 0x00000000, 634 (0x0e00 << 16) | (0x3c00c >> 2), 635 0x00000000, 636 (0x0e00 << 16) | (0x8c1c >> 2), 637 0x00000000, 638 (0x0e00 << 16) | (0x9700 >> 2), 639 0x00000000, 640 (0x0e00 << 16) | (0xcd20 >> 2), 641 0x00000000, 642 (0x4e00 << 16) | (0xcd20 >> 2), 643 0x00000000, 644 (0x5e00 << 16) | (0xcd20 >> 2), 645 0x00000000, 646 (0x6e00 << 16) | (0xcd20 >> 2), 647 0x00000000, 648 (0x7e00 << 16) | (0xcd20 >> 2), 649 0x00000000, 650 (0x0e00 << 16) | (0x89bc >> 2), 651 0x00000000, 652 (0x0e00 << 16) | (0x8900 >> 2), 653 0x00000000, 654 0x3, 655 (0x0e00 << 16) | (0xc130 >> 2), 656 0x00000000, 657 (0x0e00 << 16) | (0xc134 >> 2), 658 0x00000000, 659 (0x0e00 << 16) | (0xc1fc >> 2), 660 0x00000000, 661 (0x0e00 << 16) | (0xc208 >> 2), 662 0x00000000, 663 (0x0e00 << 16) | (0xc264 >> 2), 664 0x00000000, 665 (0x0e00 << 16) | (0xc268 >> 2), 666 0x00000000, 667 (0x0e00 << 16) | (0xc26c >> 2), 668 0x00000000, 669 (0x0e00 << 16) | (0xc270 >> 2), 670 0x00000000, 671 (0x0e00 << 16) | (0xc274 >> 2), 672 0x00000000, 673 (0x0e00 << 16) | (0xc28c >> 2), 674 0x00000000, 675 (0x0e00 << 16) | (0xc290 >> 2), 676 0x00000000, 677 (0x0e00 << 16) | (0xc294 >> 2), 678 0x00000000, 679 (0x0e00 << 16) | (0xc298 >> 2), 680 0x00000000, 681 (0x0e00 << 16) | (0xc2a0 >> 2), 682 0x00000000, 683 (0x0e00 << 16) | (0xc2a4 >> 2), 684 0x00000000, 685 (0x0e00 << 16) | (0xc2a8 >> 2), 686 0x00000000, 687 (0x0e00 << 16) | (0xc2ac >> 2), 688 0x00000000, 689 (0x0e00 << 16) | (0x301d0 >> 2), 690 0x00000000, 691 (0x0e00 << 16) | (0x30238 >> 2), 692 0x00000000, 693 (0x0e00 << 16) | (0x30250 >> 2), 694 0x00000000, 695 (0x0e00 << 16) | (0x30254 >> 2), 696 0x00000000, 697 (0x0e00 << 16) | (0x30258 >> 2), 698 0x00000000, 699 (0x0e00 << 16) | (0x3025c >> 2), 700 0x00000000, 701 (0x4e00 << 16) | (0xc900 >> 2), 702 0x00000000, 703 (0x5e00 << 16) | (0xc900 >> 2), 704 0x00000000, 705 (0x6e00 << 16) | (0xc900 >> 2), 706 0x00000000, 707 (0x7e00 << 16) | (0xc900 >> 2), 708 0x00000000, 709 (0x4e00 << 16) | (0xc904 >> 2), 710 0x00000000, 711 (0x5e00 << 16) | (0xc904 >> 2), 712 0x00000000, 713 (0x6e00 << 16) | (0xc904 >> 2), 714 0x00000000, 715 (0x7e00 << 16) | (0xc904 >> 2), 716 0x00000000, 717 (0x4e00 << 16) | (0xc908 >> 2), 718 0x00000000, 719 (0x5e00 << 16) | (0xc908 >> 2), 720 0x00000000, 721 (0x6e00 << 16) | (0xc908 >> 2), 722 0x00000000, 723 (0x7e00 << 16) | (0xc908 >> 2), 724 0x00000000, 725 (0x4e00 << 16) | (0xc90c >> 2), 726 0x00000000, 727 (0x5e00 << 16) | (0xc90c >> 2), 728 0x00000000, 729 (0x6e00 << 16) | (0xc90c >> 2), 730 0x00000000, 731 (0x7e00 << 16) | (0xc90c >> 2), 732 0x00000000, 733 (0x4e00 << 16) | (0xc910 >> 2), 734 0x00000000, 735 (0x5e00 << 16) | (0xc910 >> 2), 736 0x00000000, 737 (0x6e00 << 16) | (0xc910 >> 2), 738 0x00000000, 739 (0x7e00 << 16) | (0xc910 >> 2), 740 0x00000000, 741 (0x0e00 << 16) | (0xc99c >> 2), 742 0x00000000, 743 (0x0e00 << 16) | (0x9834 >> 2), 744 0x00000000, 745 (0x0000 << 16) | (0x30f00 >> 2), 746 0x00000000, 747 (0x0000 << 16) | (0x30f04 >> 2), 748 0x00000000, 749 (0x0000 << 16) | (0x30f08 >> 2), 750 0x00000000, 751 (0x0000 << 16) | (0x30f0c >> 2), 752 0x00000000, 753 (0x0600 << 16) | (0x9b7c >> 2), 754 0x00000000, 755 (0x0e00 << 16) | (0x8a14 >> 2), 756 0x00000000, 757 (0x0e00 << 16) | (0x8a18 >> 2), 758 0x00000000, 759 (0x0600 << 16) | (0x30a00 >> 2), 760 0x00000000, 761 (0x0e00 << 16) | (0x8bf0 >> 2), 762 0x00000000, 763 (0x0e00 << 16) | (0x8bcc >> 2), 764 0x00000000, 765 (0x0e00 << 16) | (0x8b24 >> 2), 766 0x00000000, 767 (0x0e00 << 16) | (0x30a04 >> 2), 768 0x00000000, 769 (0x0600 << 16) | (0x30a10 >> 2), 770 0x00000000, 771 (0x0600 << 16) | (0x30a14 >> 2), 772 0x00000000, 773 (0x0600 << 16) | (0x30a18 >> 2), 774 0x00000000, 775 (0x0600 << 16) | (0x30a2c >> 2), 776 0x00000000, 777 (0x0e00 << 16) | (0xc700 >> 2), 778 0x00000000, 779 (0x0e00 << 16) | (0xc704 >> 2), 780 0x00000000, 781 (0x0e00 << 16) | (0xc708 >> 2), 782 0x00000000, 783 (0x0e00 << 16) | (0xc768 >> 2), 784 0x00000000, 785 (0x0400 << 16) | (0xc770 >> 2), 786 0x00000000, 787 (0x0400 << 16) | (0xc774 >> 2), 788 0x00000000, 789 (0x0400 << 16) | (0xc798 >> 2), 790 0x00000000, 791 (0x0400 << 16) | (0xc79c >> 2), 792 0x00000000, 793 (0x0e00 << 16) | (0x9100 >> 2), 794 0x00000000, 795 (0x0e00 << 16) | (0x3c010 >> 2), 796 0x00000000, 797 (0x0e00 << 16) | (0x8c00 >> 2), 798 0x00000000, 799 (0x0e00 << 16) | (0x8c04 >> 2), 800 0x00000000, 801 (0x0e00 << 16) | (0x8c20 >> 2), 802 0x00000000, 803 (0x0e00 << 16) | (0x8c38 >> 2), 804 0x00000000, 805 (0x0e00 << 16) | (0x8c3c >> 2), 806 0x00000000, 807 (0x0e00 << 16) | (0xae00 >> 2), 808 0x00000000, 809 (0x0e00 << 16) | (0x9604 >> 2), 810 0x00000000, 811 (0x0e00 << 16) | (0xac08 >> 2), 812 0x00000000, 813 (0x0e00 << 16) | (0xac0c >> 2), 814 0x00000000, 815 (0x0e00 << 16) | (0xac10 >> 2), 816 0x00000000, 817 (0x0e00 << 16) | (0xac14 >> 2), 818 0x00000000, 819 (0x0e00 << 16) | (0xac58 >> 2), 820 0x00000000, 821 (0x0e00 << 16) | (0xac68 >> 2), 822 0x00000000, 823 (0x0e00 << 16) | (0xac6c >> 2), 824 0x00000000, 825 (0x0e00 << 16) | (0xac70 >> 2), 826 0x00000000, 827 (0x0e00 << 16) | (0xac74 >> 2), 828 0x00000000, 829 (0x0e00 << 16) | (0xac78 >> 2), 830 0x00000000, 831 (0x0e00 << 16) | (0xac7c >> 2), 832 0x00000000, 833 (0x0e00 << 16) | (0xac80 >> 2), 834 0x00000000, 835 (0x0e00 << 16) | (0xac84 >> 2), 836 0x00000000, 837 (0x0e00 << 16) | (0xac88 >> 2), 838 0x00000000, 839 (0x0e00 << 16) | (0xac8c >> 2), 840 0x00000000, 841 (0x0e00 << 16) | (0x970c >> 2), 842 0x00000000, 843 (0x0e00 << 16) | (0x9714 >> 2), 844 0x00000000, 845 (0x0e00 << 16) | (0x9718 >> 2), 846 0x00000000, 847 (0x0e00 << 16) | (0x971c >> 2), 848 0x00000000, 849 (0x0e00 << 16) | (0x31068 >> 2), 850 0x00000000, 851 (0x4e00 << 16) | (0x31068 >> 2), 852 0x00000000, 853 (0x5e00 << 16) | (0x31068 >> 2), 854 0x00000000, 855 (0x6e00 << 16) | (0x31068 >> 2), 856 0x00000000, 857 (0x7e00 << 16) | (0x31068 >> 2), 858 0x00000000, 859 (0x0e00 << 16) | (0xcd10 >> 2), 860 0x00000000, 861 (0x0e00 << 16) | (0xcd14 >> 2), 862 0x00000000, 863 (0x0e00 << 16) | (0x88b0 >> 2), 864 0x00000000, 865 (0x0e00 << 16) | (0x88b4 >> 2), 866 0x00000000, 867 (0x0e00 << 16) | (0x88b8 >> 2), 868 0x00000000, 869 (0x0e00 << 16) | (0x88bc >> 2), 870 0x00000000, 871 (0x0400 << 16) | (0x89c0 >> 2), 872 0x00000000, 873 (0x0e00 << 16) | (0x88c4 >> 2), 874 0x00000000, 875 (0x0e00 << 16) | (0x88c8 >> 2), 876 0x00000000, 877 (0x0e00 << 16) | (0x88d0 >> 2), 878 0x00000000, 879 (0x0e00 << 16) | (0x88d4 >> 2), 880 0x00000000, 881 (0x0e00 << 16) | (0x88d8 >> 2), 882 0x00000000, 883 (0x0e00 << 16) | (0x8980 >> 2), 884 0x00000000, 885 (0x0e00 << 16) | (0x30938 >> 2), 886 0x00000000, 887 (0x0e00 << 16) | (0x3093c >> 2), 888 0x00000000, 889 (0x0e00 << 16) | (0x30940 >> 2), 890 0x00000000, 891 (0x0e00 << 16) | (0x89a0 >> 2), 892 0x00000000, 893 (0x0e00 << 16) | (0x30900 >> 2), 894 0x00000000, 895 (0x0e00 << 16) | (0x30904 >> 2), 896 0x00000000, 897 (0x0e00 << 16) | (0x89b4 >> 2), 898 0x00000000, 899 (0x0e00 << 16) | (0x3e1fc >> 2), 900 0x00000000, 901 (0x0e00 << 16) | (0x3c210 >> 2), 902 0x00000000, 903 (0x0e00 << 16) | (0x3c214 >> 2), 904 0x00000000, 905 (0x0e00 << 16) | (0x3c218 >> 2), 906 0x00000000, 907 (0x0e00 << 16) | (0x8904 >> 2), 908 0x00000000, 909 0x5, 910 (0x0e00 << 16) | (0x8c28 >> 2), 911 (0x0e00 << 16) | (0x8c2c >> 2), 912 (0x0e00 << 16) | (0x8c30 >> 2), 913 (0x0e00 << 16) | (0x8c34 >> 2), 914 (0x0e00 << 16) | (0x9600 >> 2), 915 }; 916 917 static const u32 bonaire_golden_spm_registers[] = 918 { 919 0x30800, 0xe0ffffff, 0xe0000000 920 }; 921 922 static const u32 bonaire_golden_common_registers[] = 923 { 924 0xc770, 0xffffffff, 0x00000800, 925 0xc774, 0xffffffff, 0x00000800, 926 0xc798, 0xffffffff, 0x00007fbf, 927 0xc79c, 0xffffffff, 0x00007faf 928 }; 929 930 static const u32 bonaire_golden_registers[] = 931 { 932 0x3354, 0x00000333, 0x00000333, 933 0x3350, 0x000c0fc0, 0x00040200, 934 0x9a10, 0x00010000, 0x00058208, 935 0x3c000, 0xffff1fff, 0x00140000, 936 0x3c200, 0xfdfc0fff, 0x00000100, 937 0x3c234, 0x40000000, 0x40000200, 938 0x9830, 0xffffffff, 0x00000000, 939 0x9834, 0xf00fffff, 0x00000400, 940 0x9838, 0x0002021c, 0x00020200, 941 0xc78, 0x00000080, 0x00000000, 942 0x5bb0, 0x000000f0, 0x00000070, 943 0x5bc0, 0xf0311fff, 0x80300000, 944 0x98f8, 0x73773777, 0x12010001, 945 0x350c, 0x00810000, 0x408af000, 946 0x7030, 0x31000111, 0x00000011, 947 0x2f48, 0x73773777, 0x12010001, 948 0x220c, 0x00007fb6, 0x0021a1b1, 949 0x2210, 0x00007fb6, 0x002021b1, 950 0x2180, 0x00007fb6, 0x00002191, 951 0x2218, 0x00007fb6, 0x002121b1, 952 0x221c, 0x00007fb6, 0x002021b1, 953 0x21dc, 0x00007fb6, 0x00002191, 954 0x21e0, 0x00007fb6, 0x00002191, 955 0x3628, 0x0000003f, 0x0000000a, 956 0x362c, 0x0000003f, 0x0000000a, 957 0x2ae4, 0x00073ffe, 0x000022a2, 958 0x240c, 0x000007ff, 0x00000000, 959 0x8a14, 0xf000003f, 0x00000007, 960 0x8bf0, 0x00002001, 0x00000001, 961 0x8b24, 0xffffffff, 0x00ffffff, 962 0x30a04, 0x0000ff0f, 0x00000000, 963 0x28a4c, 0x07ffffff, 0x06000000, 964 0x4d8, 0x00000fff, 0x00000100, 965 0x3e78, 0x00000001, 0x00000002, 966 0x9100, 0x03000000, 0x0362c688, 967 0x8c00, 0x000000ff, 0x00000001, 968 0xe40, 0x00001fff, 0x00001fff, 969 0x9060, 0x0000007f, 0x00000020, 970 0x9508, 0x00010000, 0x00010000, 971 0xac14, 0x000003ff, 0x000000f3, 972 0xac0c, 0xffffffff, 0x00001032 973 }; 974 975 static const u32 bonaire_mgcg_cgcg_init[] = 976 { 977 0xc420, 0xffffffff, 0xfffffffc, 978 0x30800, 0xffffffff, 0xe0000000, 979 0x3c2a0, 0xffffffff, 0x00000100, 980 0x3c208, 0xffffffff, 0x00000100, 981 0x3c2c0, 0xffffffff, 0xc0000100, 982 0x3c2c8, 0xffffffff, 0xc0000100, 983 0x3c2c4, 0xffffffff, 0xc0000100, 984 0x55e4, 0xffffffff, 0x00600100, 985 0x3c280, 0xffffffff, 0x00000100, 986 0x3c214, 0xffffffff, 0x06000100, 987 0x3c220, 0xffffffff, 0x00000100, 988 0x3c218, 0xffffffff, 0x06000100, 989 0x3c204, 0xffffffff, 0x00000100, 990 0x3c2e0, 0xffffffff, 0x00000100, 991 0x3c224, 0xffffffff, 0x00000100, 992 0x3c200, 0xffffffff, 0x00000100, 993 0x3c230, 0xffffffff, 0x00000100, 994 0x3c234, 0xffffffff, 0x00000100, 995 0x3c250, 0xffffffff, 0x00000100, 996 0x3c254, 0xffffffff, 0x00000100, 997 0x3c258, 0xffffffff, 0x00000100, 998 0x3c25c, 0xffffffff, 0x00000100, 999 0x3c260, 0xffffffff, 0x00000100, 1000 0x3c27c, 0xffffffff, 0x00000100, 1001 0x3c278, 0xffffffff, 0x00000100, 1002 0x3c210, 0xffffffff, 0x06000100, 1003 0x3c290, 0xffffffff, 0x00000100, 1004 0x3c274, 0xffffffff, 0x00000100, 1005 0x3c2b4, 0xffffffff, 0x00000100, 1006 0x3c2b0, 0xffffffff, 0x00000100, 1007 0x3c270, 0xffffffff, 0x00000100, 1008 0x30800, 0xffffffff, 0xe0000000, 1009 0x3c020, 0xffffffff, 0x00010000, 1010 0x3c024, 0xffffffff, 0x00030002, 1011 0x3c028, 0xffffffff, 0x00040007, 1012 0x3c02c, 0xffffffff, 0x00060005, 1013 0x3c030, 0xffffffff, 0x00090008, 1014 0x3c034, 0xffffffff, 0x00010000, 1015 0x3c038, 0xffffffff, 0x00030002, 1016 0x3c03c, 0xffffffff, 0x00040007, 1017 0x3c040, 0xffffffff, 0x00060005, 1018 0x3c044, 0xffffffff, 0x00090008, 1019 0x3c048, 0xffffffff, 0x00010000, 1020 0x3c04c, 0xffffffff, 0x00030002, 1021 0x3c050, 0xffffffff, 0x00040007, 1022 0x3c054, 0xffffffff, 0x00060005, 1023 0x3c058, 0xffffffff, 0x00090008, 1024 0x3c05c, 0xffffffff, 0x00010000, 1025 0x3c060, 0xffffffff, 0x00030002, 1026 0x3c064, 0xffffffff, 0x00040007, 1027 0x3c068, 0xffffffff, 0x00060005, 1028 0x3c06c, 0xffffffff, 0x00090008, 1029 0x3c070, 0xffffffff, 0x00010000, 1030 0x3c074, 0xffffffff, 0x00030002, 1031 0x3c078, 0xffffffff, 0x00040007, 1032 0x3c07c, 0xffffffff, 0x00060005, 1033 0x3c080, 0xffffffff, 0x00090008, 1034 0x3c084, 0xffffffff, 0x00010000, 1035 0x3c088, 0xffffffff, 0x00030002, 1036 0x3c08c, 0xffffffff, 0x00040007, 1037 0x3c090, 0xffffffff, 0x00060005, 1038 0x3c094, 0xffffffff, 0x00090008, 1039 0x3c098, 0xffffffff, 0x00010000, 1040 0x3c09c, 0xffffffff, 0x00030002, 1041 0x3c0a0, 0xffffffff, 0x00040007, 1042 0x3c0a4, 0xffffffff, 0x00060005, 1043 0x3c0a8, 0xffffffff, 0x00090008, 1044 0x3c000, 0xffffffff, 0x96e00200, 1045 0x8708, 0xffffffff, 0x00900100, 1046 0xc424, 0xffffffff, 0x0020003f, 1047 0x38, 0xffffffff, 0x0140001c, 1048 0x3c, 0x000f0000, 0x000f0000, 1049 0x220, 0xffffffff, 0xC060000C, 1050 0x224, 0xc0000fff, 0x00000100, 1051 0xf90, 0xffffffff, 0x00000100, 1052 0xf98, 0x00000101, 0x00000000, 1053 0x20a8, 0xffffffff, 0x00000104, 1054 0x55e4, 0xff000fff, 0x00000100, 1055 0x30cc, 0xc0000fff, 0x00000104, 1056 0xc1e4, 0x00000001, 0x00000001, 1057 0xd00c, 0xff000ff0, 0x00000100, 1058 0xd80c, 0xff000ff0, 0x00000100 1059 }; 1060 1061 static const u32 spectre_golden_spm_registers[] = 1062 { 1063 0x30800, 0xe0ffffff, 0xe0000000 1064 }; 1065 1066 static const u32 spectre_golden_common_registers[] = 1067 { 1068 0xc770, 0xffffffff, 0x00000800, 1069 0xc774, 0xffffffff, 0x00000800, 1070 0xc798, 0xffffffff, 0x00007fbf, 1071 0xc79c, 0xffffffff, 0x00007faf 1072 }; 1073 1074 static const u32 spectre_golden_registers[] = 1075 { 1076 0x3c000, 0xffff1fff, 0x96940200, 1077 0x3c00c, 0xffff0001, 0xff000000, 1078 0x3c200, 0xfffc0fff, 0x00000100, 1079 0x6ed8, 0x00010101, 0x00010000, 1080 0x9834, 0xf00fffff, 0x00000400, 1081 0x9838, 0xfffffffc, 0x00020200, 1082 0x5bb0, 0x000000f0, 0x00000070, 1083 0x5bc0, 0xf0311fff, 0x80300000, 1084 0x98f8, 0x73773777, 0x12010001, 1085 0x9b7c, 0x00ff0000, 0x00fc0000, 1086 0x2f48, 0x73773777, 0x12010001, 1087 0x8a14, 0xf000003f, 0x00000007, 1088 0x8b24, 0xffffffff, 0x00ffffff, 1089 0x28350, 0x3f3f3fff, 0x00000082, 1090 0x28354, 0x0000003f, 0x00000000, 1091 0x3e78, 0x00000001, 0x00000002, 1092 0x913c, 0xffff03df, 0x00000004, 1093 0xc768, 0x00000008, 0x00000008, 1094 0x8c00, 0x000008ff, 0x00000800, 1095 0x9508, 0x00010000, 0x00010000, 1096 0xac0c, 0xffffffff, 0x54763210, 1097 0x214f8, 0x01ff01ff, 0x00000002, 1098 0x21498, 0x007ff800, 0x00200000, 1099 0x2015c, 0xffffffff, 0x00000f40, 1100 0x30934, 0xffffffff, 0x00000001 1101 }; 1102 1103 static const u32 spectre_mgcg_cgcg_init[] = 1104 { 1105 0xc420, 0xffffffff, 0xfffffffc, 1106 0x30800, 0xffffffff, 0xe0000000, 1107 0x3c2a0, 0xffffffff, 0x00000100, 1108 0x3c208, 0xffffffff, 0x00000100, 1109 0x3c2c0, 0xffffffff, 0x00000100, 1110 0x3c2c8, 0xffffffff, 0x00000100, 1111 0x3c2c4, 0xffffffff, 0x00000100, 1112 0x55e4, 0xffffffff, 0x00600100, 1113 0x3c280, 0xffffffff, 0x00000100, 1114 0x3c214, 0xffffffff, 0x06000100, 1115 0x3c220, 0xffffffff, 0x00000100, 1116 0x3c218, 0xffffffff, 0x06000100, 1117 0x3c204, 0xffffffff, 0x00000100, 1118 0x3c2e0, 0xffffffff, 0x00000100, 1119 0x3c224, 0xffffffff, 0x00000100, 1120 0x3c200, 0xffffffff, 0x00000100, 1121 0x3c230, 0xffffffff, 0x00000100, 1122 0x3c234, 0xffffffff, 0x00000100, 1123 0x3c250, 0xffffffff, 0x00000100, 1124 0x3c254, 0xffffffff, 0x00000100, 1125 0x3c258, 0xffffffff, 0x00000100, 1126 0x3c25c, 0xffffffff, 0x00000100, 1127 0x3c260, 0xffffffff, 0x00000100, 1128 0x3c27c, 0xffffffff, 0x00000100, 1129 0x3c278, 0xffffffff, 0x00000100, 1130 0x3c210, 0xffffffff, 0x06000100, 1131 0x3c290, 0xffffffff, 0x00000100, 1132 0x3c274, 0xffffffff, 0x00000100, 1133 0x3c2b4, 0xffffffff, 0x00000100, 1134 0x3c2b0, 0xffffffff, 0x00000100, 1135 0x3c270, 0xffffffff, 0x00000100, 1136 0x30800, 0xffffffff, 0xe0000000, 1137 0x3c020, 0xffffffff, 0x00010000, 1138 0x3c024, 0xffffffff, 0x00030002, 1139 0x3c028, 0xffffffff, 0x00040007, 1140 0x3c02c, 0xffffffff, 0x00060005, 1141 0x3c030, 0xffffffff, 0x00090008, 1142 0x3c034, 0xffffffff, 0x00010000, 1143 0x3c038, 0xffffffff, 0x00030002, 1144 0x3c03c, 0xffffffff, 0x00040007, 1145 0x3c040, 0xffffffff, 0x00060005, 1146 0x3c044, 0xffffffff, 0x00090008, 1147 0x3c048, 0xffffffff, 0x00010000, 1148 0x3c04c, 0xffffffff, 0x00030002, 1149 0x3c050, 0xffffffff, 0x00040007, 1150 0x3c054, 0xffffffff, 0x00060005, 1151 0x3c058, 0xffffffff, 0x00090008, 1152 0x3c05c, 0xffffffff, 0x00010000, 1153 0x3c060, 0xffffffff, 0x00030002, 1154 0x3c064, 0xffffffff, 0x00040007, 1155 0x3c068, 0xffffffff, 0x00060005, 1156 0x3c06c, 0xffffffff, 0x00090008, 1157 0x3c070, 0xffffffff, 0x00010000, 1158 0x3c074, 0xffffffff, 0x00030002, 1159 0x3c078, 0xffffffff, 0x00040007, 1160 0x3c07c, 0xffffffff, 0x00060005, 1161 0x3c080, 0xffffffff, 0x00090008, 1162 0x3c084, 0xffffffff, 0x00010000, 1163 0x3c088, 0xffffffff, 0x00030002, 1164 0x3c08c, 0xffffffff, 0x00040007, 1165 0x3c090, 0xffffffff, 0x00060005, 1166 0x3c094, 0xffffffff, 0x00090008, 1167 0x3c098, 0xffffffff, 0x00010000, 1168 0x3c09c, 0xffffffff, 0x00030002, 1169 0x3c0a0, 0xffffffff, 0x00040007, 1170 0x3c0a4, 0xffffffff, 0x00060005, 1171 0x3c0a8, 0xffffffff, 0x00090008, 1172 0x3c0ac, 0xffffffff, 0x00010000, 1173 0x3c0b0, 0xffffffff, 0x00030002, 1174 0x3c0b4, 0xffffffff, 0x00040007, 1175 0x3c0b8, 0xffffffff, 0x00060005, 1176 0x3c0bc, 0xffffffff, 0x00090008, 1177 0x3c000, 0xffffffff, 0x96e00200, 1178 0x8708, 0xffffffff, 0x00900100, 1179 0xc424, 0xffffffff, 0x0020003f, 1180 0x38, 0xffffffff, 0x0140001c, 1181 0x3c, 0x000f0000, 0x000f0000, 1182 0x220, 0xffffffff, 0xC060000C, 1183 0x224, 0xc0000fff, 0x00000100, 1184 0xf90, 0xffffffff, 0x00000100, 1185 0xf98, 0x00000101, 0x00000000, 1186 0x20a8, 0xffffffff, 0x00000104, 1187 0x55e4, 0xff000fff, 0x00000100, 1188 0x30cc, 0xc0000fff, 0x00000104, 1189 0xc1e4, 0x00000001, 0x00000001, 1190 0xd00c, 0xff000ff0, 0x00000100, 1191 0xd80c, 0xff000ff0, 0x00000100 1192 }; 1193 1194 static const u32 kalindi_golden_spm_registers[] = 1195 { 1196 0x30800, 0xe0ffffff, 0xe0000000 1197 }; 1198 1199 static const u32 kalindi_golden_common_registers[] = 1200 { 1201 0xc770, 0xffffffff, 0x00000800, 1202 0xc774, 0xffffffff, 0x00000800, 1203 0xc798, 0xffffffff, 0x00007fbf, 1204 0xc79c, 0xffffffff, 0x00007faf 1205 }; 1206 1207 static const u32 kalindi_golden_registers[] = 1208 { 1209 0x3c000, 0xffffdfff, 0x6e944040, 1210 0x55e4, 0xff607fff, 0xfc000100, 1211 0x3c220, 0xff000fff, 0x00000100, 1212 0x3c224, 0xff000fff, 0x00000100, 1213 0x3c200, 0xfffc0fff, 0x00000100, 1214 0x6ed8, 0x00010101, 0x00010000, 1215 0x9830, 0xffffffff, 0x00000000, 1216 0x9834, 0xf00fffff, 0x00000400, 1217 0x5bb0, 0x000000f0, 0x00000070, 1218 0x5bc0, 0xf0311fff, 0x80300000, 1219 0x98f8, 0x73773777, 0x12010001, 1220 0x98fc, 0xffffffff, 0x00000010, 1221 0x9b7c, 0x00ff0000, 0x00fc0000, 1222 0x8030, 0x00001f0f, 0x0000100a, 1223 0x2f48, 0x73773777, 0x12010001, 1224 0x2408, 0x000fffff, 0x000c007f, 1225 0x8a14, 0xf000003f, 0x00000007, 1226 0x8b24, 0x3fff3fff, 0x00ffcfff, 1227 0x30a04, 0x0000ff0f, 0x00000000, 1228 0x28a4c, 0x07ffffff, 0x06000000, 1229 0x4d8, 0x00000fff, 0x00000100, 1230 0x3e78, 0x00000001, 0x00000002, 1231 0xc768, 0x00000008, 0x00000008, 1232 0x8c00, 0x000000ff, 0x00000003, 1233 0x214f8, 0x01ff01ff, 0x00000002, 1234 0x21498, 0x007ff800, 0x00200000, 1235 0x2015c, 0xffffffff, 0x00000f40, 1236 0x88c4, 0x001f3ae3, 0x00000082, 1237 0x88d4, 0x0000001f, 0x00000010, 1238 0x30934, 0xffffffff, 0x00000000 1239 }; 1240 1241 static const u32 kalindi_mgcg_cgcg_init[] = 1242 { 1243 0xc420, 0xffffffff, 0xfffffffc, 1244 0x30800, 0xffffffff, 0xe0000000, 1245 0x3c2a0, 0xffffffff, 0x00000100, 1246 0x3c208, 0xffffffff, 0x00000100, 1247 0x3c2c0, 0xffffffff, 0x00000100, 1248 0x3c2c8, 0xffffffff, 0x00000100, 1249 0x3c2c4, 0xffffffff, 0x00000100, 1250 0x55e4, 0xffffffff, 0x00600100, 1251 0x3c280, 0xffffffff, 0x00000100, 1252 0x3c214, 0xffffffff, 0x06000100, 1253 0x3c220, 0xffffffff, 0x00000100, 1254 0x3c218, 0xffffffff, 0x06000100, 1255 0x3c204, 0xffffffff, 0x00000100, 1256 0x3c2e0, 0xffffffff, 0x00000100, 1257 0x3c224, 0xffffffff, 0x00000100, 1258 0x3c200, 0xffffffff, 0x00000100, 1259 0x3c230, 0xffffffff, 0x00000100, 1260 0x3c234, 0xffffffff, 0x00000100, 1261 0x3c250, 0xffffffff, 0x00000100, 1262 0x3c254, 0xffffffff, 0x00000100, 1263 0x3c258, 0xffffffff, 0x00000100, 1264 0x3c25c, 0xffffffff, 0x00000100, 1265 0x3c260, 0xffffffff, 0x00000100, 1266 0x3c27c, 0xffffffff, 0x00000100, 1267 0x3c278, 0xffffffff, 0x00000100, 1268 0x3c210, 0xffffffff, 0x06000100, 1269 0x3c290, 0xffffffff, 0x00000100, 1270 0x3c274, 0xffffffff, 0x00000100, 1271 0x3c2b4, 0xffffffff, 0x00000100, 1272 0x3c2b0, 0xffffffff, 0x00000100, 1273 0x3c270, 0xffffffff, 0x00000100, 1274 0x30800, 0xffffffff, 0xe0000000, 1275 0x3c020, 0xffffffff, 0x00010000, 1276 0x3c024, 0xffffffff, 0x00030002, 1277 0x3c028, 0xffffffff, 0x00040007, 1278 0x3c02c, 0xffffffff, 0x00060005, 1279 0x3c030, 0xffffffff, 0x00090008, 1280 0x3c034, 0xffffffff, 0x00010000, 1281 0x3c038, 0xffffffff, 0x00030002, 1282 0x3c03c, 0xffffffff, 0x00040007, 1283 0x3c040, 0xffffffff, 0x00060005, 1284 0x3c044, 0xffffffff, 0x00090008, 1285 0x3c000, 0xffffffff, 0x96e00200, 1286 0x8708, 0xffffffff, 0x00900100, 1287 0xc424, 0xffffffff, 0x0020003f, 1288 0x38, 0xffffffff, 0x0140001c, 1289 0x3c, 0x000f0000, 0x000f0000, 1290 0x220, 0xffffffff, 0xC060000C, 1291 0x224, 0xc0000fff, 0x00000100, 1292 0x20a8, 0xffffffff, 0x00000104, 1293 0x55e4, 0xff000fff, 0x00000100, 1294 0x30cc, 0xc0000fff, 0x00000104, 1295 0xc1e4, 0x00000001, 0x00000001, 1296 0xd00c, 0xff000ff0, 0x00000100, 1297 0xd80c, 0xff000ff0, 0x00000100 1298 }; 1299 1300 static const u32 hawaii_golden_spm_registers[] = 1301 { 1302 0x30800, 0xe0ffffff, 0xe0000000 1303 }; 1304 1305 static const u32 hawaii_golden_common_registers[] = 1306 { 1307 0x30800, 0xffffffff, 0xe0000000, 1308 0x28350, 0xffffffff, 0x3a00161a, 1309 0x28354, 0xffffffff, 0x0000002e, 1310 0x9a10, 0xffffffff, 0x00018208, 1311 0x98f8, 0xffffffff, 0x12011003 1312 }; 1313 1314 static const u32 hawaii_golden_registers[] = 1315 { 1316 0x3354, 0x00000333, 0x00000333, 1317 0x9a10, 0x00010000, 0x00058208, 1318 0x9830, 0xffffffff, 0x00000000, 1319 0x9834, 0xf00fffff, 0x00000400, 1320 0x9838, 0x0002021c, 0x00020200, 1321 0xc78, 0x00000080, 0x00000000, 1322 0x5bb0, 0x000000f0, 0x00000070, 1323 0x5bc0, 0xf0311fff, 0x80300000, 1324 0x350c, 0x00810000, 0x408af000, 1325 0x7030, 0x31000111, 0x00000011, 1326 0x2f48, 0x73773777, 0x12010001, 1327 0x2120, 0x0000007f, 0x0000001b, 1328 0x21dc, 0x00007fb6, 0x00002191, 1329 0x3628, 0x0000003f, 0x0000000a, 1330 0x362c, 0x0000003f, 0x0000000a, 1331 0x2ae4, 0x00073ffe, 0x000022a2, 1332 0x240c, 0x000007ff, 0x00000000, 1333 0x8bf0, 0x00002001, 0x00000001, 1334 0x8b24, 0xffffffff, 0x00ffffff, 1335 0x30a04, 0x0000ff0f, 0x00000000, 1336 0x28a4c, 0x07ffffff, 0x06000000, 1337 0x3e78, 0x00000001, 0x00000002, 1338 0xc768, 0x00000008, 0x00000008, 1339 0xc770, 0x00000f00, 0x00000800, 1340 0xc774, 0x00000f00, 0x00000800, 1341 0xc798, 0x00ffffff, 0x00ff7fbf, 1342 0xc79c, 0x00ffffff, 0x00ff7faf, 1343 0x8c00, 0x000000ff, 0x00000800, 1344 0xe40, 0x00001fff, 0x00001fff, 1345 0x9060, 0x0000007f, 0x00000020, 1346 0x9508, 0x00010000, 0x00010000, 1347 0xae00, 0x00100000, 0x000ff07c, 1348 0xac14, 0x000003ff, 0x0000000f, 1349 0xac10, 0xffffffff, 0x7564fdec, 1350 0xac0c, 0xffffffff, 0x3120b9a8, 1351 0xac08, 0x20000000, 0x0f9c0000 1352 }; 1353 1354 static const u32 hawaii_mgcg_cgcg_init[] = 1355 { 1356 0xc420, 0xffffffff, 0xfffffffd, 1357 0x30800, 0xffffffff, 0xe0000000, 1358 0x3c2a0, 0xffffffff, 0x00000100, 1359 0x3c208, 0xffffffff, 0x00000100, 1360 0x3c2c0, 0xffffffff, 0x00000100, 1361 0x3c2c8, 0xffffffff, 0x00000100, 1362 0x3c2c4, 0xffffffff, 0x00000100, 1363 0x55e4, 0xffffffff, 0x00200100, 1364 0x3c280, 0xffffffff, 0x00000100, 1365 0x3c214, 0xffffffff, 0x06000100, 1366 0x3c220, 0xffffffff, 0x00000100, 1367 0x3c218, 0xffffffff, 0x06000100, 1368 0x3c204, 0xffffffff, 0x00000100, 1369 0x3c2e0, 0xffffffff, 0x00000100, 1370 0x3c224, 0xffffffff, 0x00000100, 1371 0x3c200, 0xffffffff, 0x00000100, 1372 0x3c230, 0xffffffff, 0x00000100, 1373 0x3c234, 0xffffffff, 0x00000100, 1374 0x3c250, 0xffffffff, 0x00000100, 1375 0x3c254, 0xffffffff, 0x00000100, 1376 0x3c258, 0xffffffff, 0x00000100, 1377 0x3c25c, 0xffffffff, 0x00000100, 1378 0x3c260, 0xffffffff, 0x00000100, 1379 0x3c27c, 0xffffffff, 0x00000100, 1380 0x3c278, 0xffffffff, 0x00000100, 1381 0x3c210, 0xffffffff, 0x06000100, 1382 0x3c290, 0xffffffff, 0x00000100, 1383 0x3c274, 0xffffffff, 0x00000100, 1384 0x3c2b4, 0xffffffff, 0x00000100, 1385 0x3c2b0, 0xffffffff, 0x00000100, 1386 0x3c270, 0xffffffff, 0x00000100, 1387 0x30800, 0xffffffff, 0xe0000000, 1388 0x3c020, 0xffffffff, 0x00010000, 1389 0x3c024, 0xffffffff, 0x00030002, 1390 0x3c028, 0xffffffff, 0x00040007, 1391 0x3c02c, 0xffffffff, 0x00060005, 1392 0x3c030, 0xffffffff, 0x00090008, 1393 0x3c034, 0xffffffff, 0x00010000, 1394 0x3c038, 0xffffffff, 0x00030002, 1395 0x3c03c, 0xffffffff, 0x00040007, 1396 0x3c040, 0xffffffff, 0x00060005, 1397 0x3c044, 0xffffffff, 0x00090008, 1398 0x3c048, 0xffffffff, 0x00010000, 1399 0x3c04c, 0xffffffff, 0x00030002, 1400 0x3c050, 0xffffffff, 0x00040007, 1401 0x3c054, 0xffffffff, 0x00060005, 1402 0x3c058, 0xffffffff, 0x00090008, 1403 0x3c05c, 0xffffffff, 0x00010000, 1404 0x3c060, 0xffffffff, 0x00030002, 1405 0x3c064, 0xffffffff, 0x00040007, 1406 0x3c068, 0xffffffff, 0x00060005, 1407 0x3c06c, 0xffffffff, 0x00090008, 1408 0x3c070, 0xffffffff, 0x00010000, 1409 0x3c074, 0xffffffff, 0x00030002, 1410 0x3c078, 0xffffffff, 0x00040007, 1411 0x3c07c, 0xffffffff, 0x00060005, 1412 0x3c080, 0xffffffff, 0x00090008, 1413 0x3c084, 0xffffffff, 0x00010000, 1414 0x3c088, 0xffffffff, 0x00030002, 1415 0x3c08c, 0xffffffff, 0x00040007, 1416 0x3c090, 0xffffffff, 0x00060005, 1417 0x3c094, 0xffffffff, 0x00090008, 1418 0x3c098, 0xffffffff, 0x00010000, 1419 0x3c09c, 0xffffffff, 0x00030002, 1420 0x3c0a0, 0xffffffff, 0x00040007, 1421 0x3c0a4, 0xffffffff, 0x00060005, 1422 0x3c0a8, 0xffffffff, 0x00090008, 1423 0x3c0ac, 0xffffffff, 0x00010000, 1424 0x3c0b0, 0xffffffff, 0x00030002, 1425 0x3c0b4, 0xffffffff, 0x00040007, 1426 0x3c0b8, 0xffffffff, 0x00060005, 1427 0x3c0bc, 0xffffffff, 0x00090008, 1428 0x3c0c0, 0xffffffff, 0x00010000, 1429 0x3c0c4, 0xffffffff, 0x00030002, 1430 0x3c0c8, 0xffffffff, 0x00040007, 1431 0x3c0cc, 0xffffffff, 0x00060005, 1432 0x3c0d0, 0xffffffff, 0x00090008, 1433 0x3c0d4, 0xffffffff, 0x00010000, 1434 0x3c0d8, 0xffffffff, 0x00030002, 1435 0x3c0dc, 0xffffffff, 0x00040007, 1436 0x3c0e0, 0xffffffff, 0x00060005, 1437 0x3c0e4, 0xffffffff, 0x00090008, 1438 0x3c0e8, 0xffffffff, 0x00010000, 1439 0x3c0ec, 0xffffffff, 0x00030002, 1440 0x3c0f0, 0xffffffff, 0x00040007, 1441 0x3c0f4, 0xffffffff, 0x00060005, 1442 0x3c0f8, 0xffffffff, 0x00090008, 1443 0xc318, 0xffffffff, 0x00020200, 1444 0x3350, 0xffffffff, 0x00000200, 1445 0x15c0, 0xffffffff, 0x00000400, 1446 0x55e8, 0xffffffff, 0x00000000, 1447 0x2f50, 0xffffffff, 0x00000902, 1448 0x3c000, 0xffffffff, 0x96940200, 1449 0x8708, 0xffffffff, 0x00900100, 1450 0xc424, 0xffffffff, 0x0020003f, 1451 0x38, 0xffffffff, 0x0140001c, 1452 0x3c, 0x000f0000, 0x000f0000, 1453 0x220, 0xffffffff, 0xc060000c, 1454 0x224, 0xc0000fff, 0x00000100, 1455 0xf90, 0xffffffff, 0x00000100, 1456 0xf98, 0x00000101, 0x00000000, 1457 0x20a8, 0xffffffff, 0x00000104, 1458 0x55e4, 0xff000fff, 0x00000100, 1459 0x30cc, 0xc0000fff, 0x00000104, 1460 0xc1e4, 0x00000001, 0x00000001, 1461 0xd00c, 0xff000ff0, 0x00000100, 1462 0xd80c, 0xff000ff0, 0x00000100 1463 }; 1464 1465 static const u32 godavari_golden_registers[] = 1466 { 1467 0x55e4, 0xff607fff, 0xfc000100, 1468 0x6ed8, 0x00010101, 0x00010000, 1469 0x9830, 0xffffffff, 0x00000000, 1470 0x98302, 0xf00fffff, 0x00000400, 1471 0x6130, 0xffffffff, 0x00010000, 1472 0x5bb0, 0x000000f0, 0x00000070, 1473 0x5bc0, 0xf0311fff, 0x80300000, 1474 0x98f8, 0x73773777, 0x12010001, 1475 0x98fc, 0xffffffff, 0x00000010, 1476 0x8030, 0x00001f0f, 0x0000100a, 1477 0x2f48, 0x73773777, 0x12010001, 1478 0x2408, 0x000fffff, 0x000c007f, 1479 0x8a14, 0xf000003f, 0x00000007, 1480 0x8b24, 0xffffffff, 0x00ff0fff, 1481 0x30a04, 0x0000ff0f, 0x00000000, 1482 0x28a4c, 0x07ffffff, 0x06000000, 1483 0x4d8, 0x00000fff, 0x00000100, 1484 0xd014, 0x00010000, 0x00810001, 1485 0xd814, 0x00010000, 0x00810001, 1486 0x3e78, 0x00000001, 0x00000002, 1487 0xc768, 0x00000008, 0x00000008, 1488 0xc770, 0x00000f00, 0x00000800, 1489 0xc774, 0x00000f00, 0x00000800, 1490 0xc798, 0x00ffffff, 0x00ff7fbf, 1491 0xc79c, 0x00ffffff, 0x00ff7faf, 1492 0x8c00, 0x000000ff, 0x00000001, 1493 0x214f8, 0x01ff01ff, 0x00000002, 1494 0x21498, 0x007ff800, 0x00200000, 1495 0x2015c, 0xffffffff, 0x00000f40, 1496 0x88c4, 0x001f3ae3, 0x00000082, 1497 0x88d4, 0x0000001f, 0x00000010, 1498 0x30934, 0xffffffff, 0x00000000 1499 }; 1500 1501 1502 static void cik_init_golden_registers(struct radeon_device *rdev) 1503 { 1504 switch (rdev->family) { 1505 case CHIP_BONAIRE: 1506 radeon_program_register_sequence(rdev, 1507 bonaire_mgcg_cgcg_init, 1508 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 1509 radeon_program_register_sequence(rdev, 1510 bonaire_golden_registers, 1511 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 1512 radeon_program_register_sequence(rdev, 1513 bonaire_golden_common_registers, 1514 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 1515 radeon_program_register_sequence(rdev, 1516 bonaire_golden_spm_registers, 1517 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 1518 break; 1519 case CHIP_KABINI: 1520 radeon_program_register_sequence(rdev, 1521 kalindi_mgcg_cgcg_init, 1522 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1523 radeon_program_register_sequence(rdev, 1524 kalindi_golden_registers, 1525 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 1526 radeon_program_register_sequence(rdev, 1527 kalindi_golden_common_registers, 1528 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1529 radeon_program_register_sequence(rdev, 1530 kalindi_golden_spm_registers, 1531 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1532 break; 1533 case CHIP_MULLINS: 1534 radeon_program_register_sequence(rdev, 1535 kalindi_mgcg_cgcg_init, 1536 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 1537 radeon_program_register_sequence(rdev, 1538 godavari_golden_registers, 1539 (const u32)ARRAY_SIZE(godavari_golden_registers)); 1540 radeon_program_register_sequence(rdev, 1541 kalindi_golden_common_registers, 1542 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 1543 radeon_program_register_sequence(rdev, 1544 kalindi_golden_spm_registers, 1545 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 1546 break; 1547 case CHIP_KAVERI: 1548 radeon_program_register_sequence(rdev, 1549 spectre_mgcg_cgcg_init, 1550 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 1551 radeon_program_register_sequence(rdev, 1552 spectre_golden_registers, 1553 (const u32)ARRAY_SIZE(spectre_golden_registers)); 1554 radeon_program_register_sequence(rdev, 1555 spectre_golden_common_registers, 1556 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 1557 radeon_program_register_sequence(rdev, 1558 spectre_golden_spm_registers, 1559 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 1560 break; 1561 case CHIP_HAWAII: 1562 radeon_program_register_sequence(rdev, 1563 hawaii_mgcg_cgcg_init, 1564 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init)); 1565 radeon_program_register_sequence(rdev, 1566 hawaii_golden_registers, 1567 (const u32)ARRAY_SIZE(hawaii_golden_registers)); 1568 radeon_program_register_sequence(rdev, 1569 hawaii_golden_common_registers, 1570 (const u32)ARRAY_SIZE(hawaii_golden_common_registers)); 1571 radeon_program_register_sequence(rdev, 1572 hawaii_golden_spm_registers, 1573 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers)); 1574 break; 1575 default: 1576 break; 1577 } 1578 } 1579 1580 /** 1581 * cik_get_xclk - get the xclk 1582 * 1583 * @rdev: radeon_device pointer 1584 * 1585 * Returns the reference clock used by the gfx engine 1586 * (CIK). 1587 */ 1588 u32 cik_get_xclk(struct radeon_device *rdev) 1589 { 1590 u32 reference_clock = rdev->clock.spll.reference_freq; 1591 1592 if (rdev->flags & RADEON_IS_IGP) { 1593 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 1594 return reference_clock / 2; 1595 } else { 1596 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 1597 return reference_clock / 4; 1598 } 1599 return reference_clock; 1600 } 1601 1602 /** 1603 * cik_mm_rdoorbell - read a doorbell dword 1604 * 1605 * @rdev: radeon_device pointer 1606 * @index: doorbell index 1607 * 1608 * Returns the value in the doorbell aperture at the 1609 * requested doorbell index (CIK). 1610 */ 1611 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index) 1612 { 1613 if (index < rdev->doorbell.num_doorbells) { 1614 return readl(rdev->doorbell.ptr + index); 1615 } else { 1616 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 1617 return 0; 1618 } 1619 } 1620 1621 /** 1622 * cik_mm_wdoorbell - write a doorbell dword 1623 * 1624 * @rdev: radeon_device pointer 1625 * @index: doorbell index 1626 * @v: value to write 1627 * 1628 * Writes @v to the doorbell aperture at the 1629 * requested doorbell index (CIK). 1630 */ 1631 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v) 1632 { 1633 if (index < rdev->doorbell.num_doorbells) { 1634 writel(v, rdev->doorbell.ptr + index); 1635 } else { 1636 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 1637 } 1638 } 1639 1640 #define BONAIRE_IO_MC_REGS_SIZE 36 1641 1642 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 1643 { 1644 {0x00000070, 0x04400000}, 1645 {0x00000071, 0x80c01803}, 1646 {0x00000072, 0x00004004}, 1647 {0x00000073, 0x00000100}, 1648 {0x00000074, 0x00ff0000}, 1649 {0x00000075, 0x34000000}, 1650 {0x00000076, 0x08000014}, 1651 {0x00000077, 0x00cc08ec}, 1652 {0x00000078, 0x00000400}, 1653 {0x00000079, 0x00000000}, 1654 {0x0000007a, 0x04090000}, 1655 {0x0000007c, 0x00000000}, 1656 {0x0000007e, 0x4408a8e8}, 1657 {0x0000007f, 0x00000304}, 1658 {0x00000080, 0x00000000}, 1659 {0x00000082, 0x00000001}, 1660 {0x00000083, 0x00000002}, 1661 {0x00000084, 0xf3e4f400}, 1662 {0x00000085, 0x052024e3}, 1663 {0x00000087, 0x00000000}, 1664 {0x00000088, 0x01000000}, 1665 {0x0000008a, 0x1c0a0000}, 1666 {0x0000008b, 0xff010000}, 1667 {0x0000008d, 0xffffefff}, 1668 {0x0000008e, 0xfff3efff}, 1669 {0x0000008f, 0xfff3efbf}, 1670 {0x00000092, 0xf7ffffff}, 1671 {0x00000093, 0xffffff7f}, 1672 {0x00000095, 0x00101101}, 1673 {0x00000096, 0x00000fff}, 1674 {0x00000097, 0x00116fff}, 1675 {0x00000098, 0x60010000}, 1676 {0x00000099, 0x10010000}, 1677 {0x0000009a, 0x00006000}, 1678 {0x0000009b, 0x00001000}, 1679 {0x0000009f, 0x00b48000} 1680 }; 1681 1682 #define HAWAII_IO_MC_REGS_SIZE 22 1683 1684 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] = 1685 { 1686 {0x0000007d, 0x40000000}, 1687 {0x0000007e, 0x40180304}, 1688 {0x0000007f, 0x0000ff00}, 1689 {0x00000081, 0x00000000}, 1690 {0x00000083, 0x00000800}, 1691 {0x00000086, 0x00000000}, 1692 {0x00000087, 0x00000100}, 1693 {0x00000088, 0x00020100}, 1694 {0x00000089, 0x00000000}, 1695 {0x0000008b, 0x00040000}, 1696 {0x0000008c, 0x00000100}, 1697 {0x0000008e, 0xff010000}, 1698 {0x00000090, 0xffffefff}, 1699 {0x00000091, 0xfff3efff}, 1700 {0x00000092, 0xfff3efbf}, 1701 {0x00000093, 0xf7ffffff}, 1702 {0x00000094, 0xffffff7f}, 1703 {0x00000095, 0x00000fff}, 1704 {0x00000096, 0x00116fff}, 1705 {0x00000097, 0x60010000}, 1706 {0x00000098, 0x10010000}, 1707 {0x0000009f, 0x00c79000} 1708 }; 1709 1710 1711 /** 1712 * cik_srbm_select - select specific register instances 1713 * 1714 * @rdev: radeon_device pointer 1715 * @me: selected ME (micro engine) 1716 * @pipe: pipe 1717 * @queue: queue 1718 * @vmid: VMID 1719 * 1720 * Switches the currently active registers instances. Some 1721 * registers are instanced per VMID, others are instanced per 1722 * me/pipe/queue combination. 1723 */ 1724 static void cik_srbm_select(struct radeon_device *rdev, 1725 u32 me, u32 pipe, u32 queue, u32 vmid) 1726 { 1727 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 1728 MEID(me & 0x3) | 1729 VMID(vmid & 0xf) | 1730 QUEUEID(queue & 0x7)); 1731 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 1732 } 1733 1734 /* ucode loading */ 1735 /** 1736 * ci_mc_load_microcode - load MC ucode into the hw 1737 * 1738 * @rdev: radeon_device pointer 1739 * 1740 * Load the GDDR MC ucode into the hw (CIK). 1741 * Returns 0 on success, error on failure. 1742 */ 1743 int ci_mc_load_microcode(struct radeon_device *rdev) 1744 { 1745 const __be32 *fw_data; 1746 u32 running, blackout = 0; 1747 u32 *io_mc_regs; 1748 int i, regs_size, ucode_size; 1749 1750 if (!rdev->mc_fw) 1751 return -EINVAL; 1752 1753 ucode_size = rdev->mc_fw->datasize / 4; 1754 1755 switch (rdev->family) { 1756 case CHIP_BONAIRE: 1757 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 1758 regs_size = BONAIRE_IO_MC_REGS_SIZE; 1759 break; 1760 case CHIP_HAWAII: 1761 io_mc_regs = (u32 *)&hawaii_io_mc_regs; 1762 regs_size = HAWAII_IO_MC_REGS_SIZE; 1763 break; 1764 default: 1765 return -EINVAL; 1766 } 1767 1768 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 1769 1770 if (running == 0) { 1771 if (running) { 1772 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 1773 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 1774 } 1775 1776 /* reset the engine and set to writable */ 1777 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1778 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 1779 1780 /* load mc io regs */ 1781 for (i = 0; i < regs_size; i++) { 1782 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 1783 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 1784 } 1785 /* load the MC ucode */ 1786 fw_data = (const __be32 *)rdev->mc_fw->data; 1787 for (i = 0; i < ucode_size; i++) 1788 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 1789 1790 /* put the engine back into the active state */ 1791 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 1792 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 1793 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 1794 1795 /* wait for training to complete */ 1796 for (i = 0; i < rdev->usec_timeout; i++) { 1797 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 1798 break; 1799 udelay(1); 1800 } 1801 for (i = 0; i < rdev->usec_timeout; i++) { 1802 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 1803 break; 1804 udelay(1); 1805 } 1806 1807 if (running) 1808 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 1809 } 1810 1811 return 0; 1812 } 1813 1814 /** 1815 * cik_init_microcode - load ucode images from disk 1816 * 1817 * @rdev: radeon_device pointer 1818 * 1819 * Use the firmware interface to load the ucode images into 1820 * the driver (not loaded into hw). 1821 * Returns 0 on success, error on failure. 1822 */ 1823 static int cik_init_microcode(struct radeon_device *rdev) 1824 { 1825 const char *chip_name; 1826 size_t pfp_req_size, me_req_size, ce_req_size, 1827 mec_req_size, rlc_req_size, mc_req_size = 0, 1828 sdma_req_size, smc_req_size = 0, mc2_req_size = 0; 1829 char fw_name[30]; 1830 int err; 1831 1832 DRM_DEBUG("\n"); 1833 1834 switch (rdev->family) { 1835 case CHIP_BONAIRE: 1836 chip_name = "BONAIRE"; 1837 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1838 me_req_size = CIK_ME_UCODE_SIZE * 4; 1839 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1840 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1841 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1842 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4; 1843 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4; 1844 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1845 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4); 1846 break; 1847 case CHIP_HAWAII: 1848 chip_name = "HAWAII"; 1849 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1850 me_req_size = CIK_ME_UCODE_SIZE * 4; 1851 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1852 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1853 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 1854 mc_req_size = HAWAII_MC_UCODE_SIZE * 4; 1855 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4; 1856 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1857 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4); 1858 break; 1859 case CHIP_KAVERI: 1860 chip_name = "KAVERI"; 1861 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1862 me_req_size = CIK_ME_UCODE_SIZE * 4; 1863 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1864 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1865 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 1866 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1867 break; 1868 case CHIP_KABINI: 1869 chip_name = "KABINI"; 1870 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1871 me_req_size = CIK_ME_UCODE_SIZE * 4; 1872 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1873 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1874 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 1875 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1876 break; 1877 case CHIP_MULLINS: 1878 chip_name = "MULLINS"; 1879 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 1880 me_req_size = CIK_ME_UCODE_SIZE * 4; 1881 ce_req_size = CIK_CE_UCODE_SIZE * 4; 1882 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 1883 rlc_req_size = ML_RLC_UCODE_SIZE * 4; 1884 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 1885 break; 1886 default: BUG(); 1887 } 1888 1889 DRM_INFO("Loading %s Microcode\n", chip_name); 1890 1891 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name); 1892 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 1893 if (err) 1894 goto out; 1895 if (rdev->pfp_fw->datasize != pfp_req_size) { 1896 printk(KERN_ERR 1897 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1898 rdev->pfp_fw->datasize, fw_name); 1899 err = -EINVAL; 1900 goto out; 1901 } 1902 1903 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name); 1904 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 1905 if (err) 1906 goto out; 1907 if (rdev->me_fw->datasize != me_req_size) { 1908 printk(KERN_ERR 1909 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1910 rdev->me_fw->datasize, fw_name); 1911 err = -EINVAL; 1912 } 1913 1914 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name); 1915 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 1916 if (err) 1917 goto out; 1918 if (rdev->ce_fw->datasize != ce_req_size) { 1919 printk(KERN_ERR 1920 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1921 rdev->ce_fw->datasize, fw_name); 1922 err = -EINVAL; 1923 } 1924 1925 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name); 1926 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 1927 if (err) 1928 goto out; 1929 if (rdev->mec_fw->datasize != mec_req_size) { 1930 printk(KERN_ERR 1931 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 1932 rdev->mec_fw->datasize, fw_name); 1933 err = -EINVAL; 1934 } 1935 1936 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name); 1937 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 1938 if (err) 1939 goto out; 1940 if (rdev->rlc_fw->datasize != rlc_req_size) { 1941 printk(KERN_ERR 1942 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 1943 rdev->rlc_fw->datasize, fw_name); 1944 err = -EINVAL; 1945 } 1946 1947 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name); 1948 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 1949 if (err) 1950 goto out; 1951 if (rdev->sdma_fw->datasize != sdma_req_size) { 1952 printk(KERN_ERR 1953 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 1954 rdev->sdma_fw->datasize, fw_name); 1955 err = -EINVAL; 1956 } 1957 1958 /* No SMC, MC ucode on APUs */ 1959 if (!(rdev->flags & RADEON_IS_IGP)) { 1960 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name); 1961 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 1962 if (err) { 1963 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name); 1964 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 1965 if (err) 1966 goto out; 1967 } 1968 if ((rdev->mc_fw->datasize != mc_req_size) && 1969 (rdev->mc_fw->datasize != mc2_req_size)){ 1970 printk(KERN_ERR 1971 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 1972 rdev->mc_fw->datasize, fw_name); 1973 err = -EINVAL; 1974 } 1975 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize); 1976 1977 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name); 1978 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); 1979 if (err) { 1980 printk(KERN_ERR 1981 "smc: error loading firmware \"%s\"\n", 1982 fw_name); 1983 release_firmware(rdev->smc_fw); 1984 rdev->smc_fw = NULL; 1985 err = 0; 1986 } else if (rdev->smc_fw->datasize != smc_req_size) { 1987 printk(KERN_ERR 1988 "cik_smc: Bogus length %zu in firmware \"%s\"\n", 1989 rdev->smc_fw->datasize, fw_name); 1990 err = -EINVAL; 1991 } 1992 } 1993 1994 out: 1995 if (err) { 1996 if (err != -EINVAL) 1997 printk(KERN_ERR 1998 "cik_cp: Failed to load firmware \"%s\"\n", 1999 fw_name); 2000 release_firmware(rdev->pfp_fw); 2001 rdev->pfp_fw = NULL; 2002 release_firmware(rdev->me_fw); 2003 rdev->me_fw = NULL; 2004 release_firmware(rdev->ce_fw); 2005 rdev->ce_fw = NULL; 2006 release_firmware(rdev->mec_fw); 2007 rdev->mec_fw = NULL; 2008 release_firmware(rdev->rlc_fw); 2009 rdev->rlc_fw = NULL; 2010 release_firmware(rdev->sdma_fw); 2011 rdev->sdma_fw = NULL; 2012 release_firmware(rdev->mc_fw); 2013 rdev->mc_fw = NULL; 2014 release_firmware(rdev->smc_fw); 2015 rdev->smc_fw = NULL; 2016 } 2017 return err; 2018 } 2019 2020 /* 2021 * Core functions 2022 */ 2023 /** 2024 * cik_tiling_mode_table_init - init the hw tiling table 2025 * 2026 * @rdev: radeon_device pointer 2027 * 2028 * Starting with SI, the tiling setup is done globally in a 2029 * set of 32 tiling modes. Rather than selecting each set of 2030 * parameters per surface as on older asics, we just select 2031 * which index in the tiling table we want to use, and the 2032 * surface uses those parameters (CIK). 2033 */ 2034 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 2035 { 2036 const u32 num_tile_mode_states = 32; 2037 const u32 num_secondary_tile_mode_states = 16; 2038 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 2039 u32 num_pipe_configs; 2040 u32 num_rbs = rdev->config.cik.max_backends_per_se * 2041 rdev->config.cik.max_shader_engines; 2042 2043 switch (rdev->config.cik.mem_row_size_in_kb) { 2044 case 1: 2045 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 2046 break; 2047 case 2: 2048 default: 2049 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 2050 break; 2051 case 4: 2052 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 2053 break; 2054 } 2055 2056 num_pipe_configs = rdev->config.cik.max_tile_pipes; 2057 if (num_pipe_configs > 8) 2058 num_pipe_configs = 16; 2059 2060 if (num_pipe_configs == 16) { 2061 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2062 switch (reg_offset) { 2063 case 0: 2064 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2065 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2066 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2067 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2068 break; 2069 case 1: 2070 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2071 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2072 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2073 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2074 break; 2075 case 2: 2076 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2077 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2078 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2079 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2080 break; 2081 case 3: 2082 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2083 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2084 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2085 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2086 break; 2087 case 4: 2088 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2090 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2091 TILE_SPLIT(split_equal_to_row_size)); 2092 break; 2093 case 5: 2094 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2095 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2097 break; 2098 case 6: 2099 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2100 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2101 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2102 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2103 break; 2104 case 7: 2105 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2107 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2108 TILE_SPLIT(split_equal_to_row_size)); 2109 break; 2110 case 8: 2111 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2112 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2113 break; 2114 case 9: 2115 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2116 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2118 break; 2119 case 10: 2120 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2122 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2124 break; 2125 case 11: 2126 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2127 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2128 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) | 2129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2130 break; 2131 case 12: 2132 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2134 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2136 break; 2137 case 13: 2138 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2139 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2140 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2141 break; 2142 case 14: 2143 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2144 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2145 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2147 break; 2148 case 16: 2149 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2151 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) | 2152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2153 break; 2154 case 17: 2155 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2156 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2157 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2159 break; 2160 case 27: 2161 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2162 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2163 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2164 break; 2165 case 28: 2166 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2167 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2168 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2170 break; 2171 case 29: 2172 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2173 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2174 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) | 2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2176 break; 2177 case 30: 2178 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2179 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2180 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2182 break; 2183 default: 2184 gb_tile_moden = 0; 2185 break; 2186 } 2187 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2188 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2189 } 2190 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2191 switch (reg_offset) { 2192 case 0: 2193 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2196 NUM_BANKS(ADDR_SURF_16_BANK)); 2197 break; 2198 case 1: 2199 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2202 NUM_BANKS(ADDR_SURF_16_BANK)); 2203 break; 2204 case 2: 2205 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2208 NUM_BANKS(ADDR_SURF_16_BANK)); 2209 break; 2210 case 3: 2211 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2214 NUM_BANKS(ADDR_SURF_16_BANK)); 2215 break; 2216 case 4: 2217 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2220 NUM_BANKS(ADDR_SURF_8_BANK)); 2221 break; 2222 case 5: 2223 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2226 NUM_BANKS(ADDR_SURF_4_BANK)); 2227 break; 2228 case 6: 2229 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2232 NUM_BANKS(ADDR_SURF_2_BANK)); 2233 break; 2234 case 8: 2235 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2238 NUM_BANKS(ADDR_SURF_16_BANK)); 2239 break; 2240 case 9: 2241 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2244 NUM_BANKS(ADDR_SURF_16_BANK)); 2245 break; 2246 case 10: 2247 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2250 NUM_BANKS(ADDR_SURF_16_BANK)); 2251 break; 2252 case 11: 2253 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2256 NUM_BANKS(ADDR_SURF_8_BANK)); 2257 break; 2258 case 12: 2259 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2262 NUM_BANKS(ADDR_SURF_4_BANK)); 2263 break; 2264 case 13: 2265 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2268 NUM_BANKS(ADDR_SURF_2_BANK)); 2269 break; 2270 case 14: 2271 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2274 NUM_BANKS(ADDR_SURF_2_BANK)); 2275 break; 2276 default: 2277 gb_tile_moden = 0; 2278 break; 2279 } 2280 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; 2281 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2282 } 2283 } else if (num_pipe_configs == 8) { 2284 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2285 switch (reg_offset) { 2286 case 0: 2287 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2288 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2289 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2290 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2291 break; 2292 case 1: 2293 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2295 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2297 break; 2298 case 2: 2299 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2300 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2301 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2303 break; 2304 case 3: 2305 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2307 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2309 break; 2310 case 4: 2311 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2313 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2314 TILE_SPLIT(split_equal_to_row_size)); 2315 break; 2316 case 5: 2317 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2318 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2319 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2320 break; 2321 case 6: 2322 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2324 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2326 break; 2327 case 7: 2328 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2330 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2331 TILE_SPLIT(split_equal_to_row_size)); 2332 break; 2333 case 8: 2334 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2335 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2336 break; 2337 case 9: 2338 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2339 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2340 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2341 break; 2342 case 10: 2343 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2344 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2345 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2347 break; 2348 case 11: 2349 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2350 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2351 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2353 break; 2354 case 12: 2355 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2356 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2357 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2359 break; 2360 case 13: 2361 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2362 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2364 break; 2365 case 14: 2366 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2368 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2370 break; 2371 case 16: 2372 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2373 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2374 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2376 break; 2377 case 17: 2378 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2380 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2382 break; 2383 case 27: 2384 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2385 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2386 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2387 break; 2388 case 28: 2389 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2390 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2391 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2393 break; 2394 case 29: 2395 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2397 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2399 break; 2400 case 30: 2401 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2402 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2403 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2405 break; 2406 default: 2407 gb_tile_moden = 0; 2408 break; 2409 } 2410 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2411 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2412 } 2413 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2414 switch (reg_offset) { 2415 case 0: 2416 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2419 NUM_BANKS(ADDR_SURF_16_BANK)); 2420 break; 2421 case 1: 2422 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2425 NUM_BANKS(ADDR_SURF_16_BANK)); 2426 break; 2427 case 2: 2428 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2431 NUM_BANKS(ADDR_SURF_16_BANK)); 2432 break; 2433 case 3: 2434 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2437 NUM_BANKS(ADDR_SURF_16_BANK)); 2438 break; 2439 case 4: 2440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2443 NUM_BANKS(ADDR_SURF_8_BANK)); 2444 break; 2445 case 5: 2446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2449 NUM_BANKS(ADDR_SURF_4_BANK)); 2450 break; 2451 case 6: 2452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2455 NUM_BANKS(ADDR_SURF_2_BANK)); 2456 break; 2457 case 8: 2458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2461 NUM_BANKS(ADDR_SURF_16_BANK)); 2462 break; 2463 case 9: 2464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2467 NUM_BANKS(ADDR_SURF_16_BANK)); 2468 break; 2469 case 10: 2470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2473 NUM_BANKS(ADDR_SURF_16_BANK)); 2474 break; 2475 case 11: 2476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2479 NUM_BANKS(ADDR_SURF_16_BANK)); 2480 break; 2481 case 12: 2482 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2485 NUM_BANKS(ADDR_SURF_8_BANK)); 2486 break; 2487 case 13: 2488 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2491 NUM_BANKS(ADDR_SURF_4_BANK)); 2492 break; 2493 case 14: 2494 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2497 NUM_BANKS(ADDR_SURF_2_BANK)); 2498 break; 2499 default: 2500 gb_tile_moden = 0; 2501 break; 2502 } 2503 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; 2504 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2505 } 2506 } else if (num_pipe_configs == 4) { 2507 if (num_rbs == 4) { 2508 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2509 switch (reg_offset) { 2510 case 0: 2511 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2512 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2513 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2515 break; 2516 case 1: 2517 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2519 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2521 break; 2522 case 2: 2523 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2525 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2527 break; 2528 case 3: 2529 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2531 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2532 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2533 break; 2534 case 4: 2535 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2537 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2538 TILE_SPLIT(split_equal_to_row_size)); 2539 break; 2540 case 5: 2541 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2544 break; 2545 case 6: 2546 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2547 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2548 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2549 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2550 break; 2551 case 7: 2552 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2553 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2554 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2555 TILE_SPLIT(split_equal_to_row_size)); 2556 break; 2557 case 8: 2558 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2559 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2560 break; 2561 case 9: 2562 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2563 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2564 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2565 break; 2566 case 10: 2567 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2568 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2569 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2570 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2571 break; 2572 case 11: 2573 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2575 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2577 break; 2578 case 12: 2579 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2580 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2581 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2583 break; 2584 case 13: 2585 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2586 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2588 break; 2589 case 14: 2590 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2592 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2594 break; 2595 case 16: 2596 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2598 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2600 break; 2601 case 17: 2602 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2606 break; 2607 case 27: 2608 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2609 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2610 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2611 break; 2612 case 28: 2613 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2615 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2617 break; 2618 case 29: 2619 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2620 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2621 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2623 break; 2624 case 30: 2625 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2627 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2629 break; 2630 default: 2631 gb_tile_moden = 0; 2632 break; 2633 } 2634 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2635 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2636 } 2637 } else if (num_rbs < 4) { 2638 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2639 switch (reg_offset) { 2640 case 0: 2641 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2643 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2644 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2645 break; 2646 case 1: 2647 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2648 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2649 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2651 break; 2652 case 2: 2653 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2655 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2656 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2657 break; 2658 case 3: 2659 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2661 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2663 break; 2664 case 4: 2665 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2667 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2668 TILE_SPLIT(split_equal_to_row_size)); 2669 break; 2670 case 5: 2671 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2672 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2673 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2674 break; 2675 case 6: 2676 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2677 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2678 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2679 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2680 break; 2681 case 7: 2682 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2683 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2684 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2685 TILE_SPLIT(split_equal_to_row_size)); 2686 break; 2687 case 8: 2688 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2689 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 2690 break; 2691 case 9: 2692 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2693 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 2695 break; 2696 case 10: 2697 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2699 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2701 break; 2702 case 11: 2703 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2704 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2705 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2707 break; 2708 case 12: 2709 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2711 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2713 break; 2714 case 13: 2715 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2716 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2717 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2718 break; 2719 case 14: 2720 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2722 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2724 break; 2725 case 16: 2726 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2727 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2728 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2730 break; 2731 case 17: 2732 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2733 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2734 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2736 break; 2737 case 27: 2738 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2739 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 2741 break; 2742 case 28: 2743 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2744 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2745 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2747 break; 2748 case 29: 2749 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2750 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2751 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2753 break; 2754 case 30: 2755 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2756 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2757 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2759 break; 2760 default: 2761 gb_tile_moden = 0; 2762 break; 2763 } 2764 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2765 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2766 } 2767 } 2768 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2769 switch (reg_offset) { 2770 case 0: 2771 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2774 NUM_BANKS(ADDR_SURF_16_BANK)); 2775 break; 2776 case 1: 2777 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2780 NUM_BANKS(ADDR_SURF_16_BANK)); 2781 break; 2782 case 2: 2783 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2786 NUM_BANKS(ADDR_SURF_16_BANK)); 2787 break; 2788 case 3: 2789 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2792 NUM_BANKS(ADDR_SURF_16_BANK)); 2793 break; 2794 case 4: 2795 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2798 NUM_BANKS(ADDR_SURF_16_BANK)); 2799 break; 2800 case 5: 2801 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2804 NUM_BANKS(ADDR_SURF_8_BANK)); 2805 break; 2806 case 6: 2807 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2810 NUM_BANKS(ADDR_SURF_4_BANK)); 2811 break; 2812 case 8: 2813 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2816 NUM_BANKS(ADDR_SURF_16_BANK)); 2817 break; 2818 case 9: 2819 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2822 NUM_BANKS(ADDR_SURF_16_BANK)); 2823 break; 2824 case 10: 2825 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2828 NUM_BANKS(ADDR_SURF_16_BANK)); 2829 break; 2830 case 11: 2831 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2834 NUM_BANKS(ADDR_SURF_16_BANK)); 2835 break; 2836 case 12: 2837 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2840 NUM_BANKS(ADDR_SURF_16_BANK)); 2841 break; 2842 case 13: 2843 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2846 NUM_BANKS(ADDR_SURF_8_BANK)); 2847 break; 2848 case 14: 2849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2852 NUM_BANKS(ADDR_SURF_4_BANK)); 2853 break; 2854 default: 2855 gb_tile_moden = 0; 2856 break; 2857 } 2858 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; 2859 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2860 } 2861 } else if (num_pipe_configs == 2) { 2862 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2863 switch (reg_offset) { 2864 case 0: 2865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2867 PIPE_CONFIG(ADDR_SURF_P2) | 2868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 2869 break; 2870 case 1: 2871 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2873 PIPE_CONFIG(ADDR_SURF_P2) | 2874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 2875 break; 2876 case 2: 2877 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2879 PIPE_CONFIG(ADDR_SURF_P2) | 2880 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2881 break; 2882 case 3: 2883 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2885 PIPE_CONFIG(ADDR_SURF_P2) | 2886 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 2887 break; 2888 case 4: 2889 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2890 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2891 PIPE_CONFIG(ADDR_SURF_P2) | 2892 TILE_SPLIT(split_equal_to_row_size)); 2893 break; 2894 case 5: 2895 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2896 PIPE_CONFIG(ADDR_SURF_P2) | 2897 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2898 break; 2899 case 6: 2900 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2901 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2902 PIPE_CONFIG(ADDR_SURF_P2) | 2903 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 2904 break; 2905 case 7: 2906 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2907 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 2908 PIPE_CONFIG(ADDR_SURF_P2) | 2909 TILE_SPLIT(split_equal_to_row_size)); 2910 break; 2911 case 8: 2912 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2913 PIPE_CONFIG(ADDR_SURF_P2); 2914 break; 2915 case 9: 2916 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2918 PIPE_CONFIG(ADDR_SURF_P2)); 2919 break; 2920 case 10: 2921 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2923 PIPE_CONFIG(ADDR_SURF_P2) | 2924 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2925 break; 2926 case 11: 2927 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2929 PIPE_CONFIG(ADDR_SURF_P2) | 2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2931 break; 2932 case 12: 2933 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2935 PIPE_CONFIG(ADDR_SURF_P2) | 2936 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2937 break; 2938 case 13: 2939 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2940 PIPE_CONFIG(ADDR_SURF_P2) | 2941 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 2942 break; 2943 case 14: 2944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2945 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2946 PIPE_CONFIG(ADDR_SURF_P2) | 2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2948 break; 2949 case 16: 2950 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2951 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2952 PIPE_CONFIG(ADDR_SURF_P2) | 2953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2954 break; 2955 case 17: 2956 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2958 PIPE_CONFIG(ADDR_SURF_P2) | 2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2960 break; 2961 case 27: 2962 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2963 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2964 PIPE_CONFIG(ADDR_SURF_P2)); 2965 break; 2966 case 28: 2967 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2968 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2969 PIPE_CONFIG(ADDR_SURF_P2) | 2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2971 break; 2972 case 29: 2973 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2974 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2975 PIPE_CONFIG(ADDR_SURF_P2) | 2976 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2977 break; 2978 case 30: 2979 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 2980 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2981 PIPE_CONFIG(ADDR_SURF_P2) | 2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2983 break; 2984 default: 2985 gb_tile_moden = 0; 2986 break; 2987 } 2988 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 2989 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2990 } 2991 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 2992 switch (reg_offset) { 2993 case 0: 2994 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2997 NUM_BANKS(ADDR_SURF_16_BANK)); 2998 break; 2999 case 1: 3000 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3003 NUM_BANKS(ADDR_SURF_16_BANK)); 3004 break; 3005 case 2: 3006 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3007 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3008 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3009 NUM_BANKS(ADDR_SURF_16_BANK)); 3010 break; 3011 case 3: 3012 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3015 NUM_BANKS(ADDR_SURF_16_BANK)); 3016 break; 3017 case 4: 3018 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3021 NUM_BANKS(ADDR_SURF_16_BANK)); 3022 break; 3023 case 5: 3024 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3027 NUM_BANKS(ADDR_SURF_16_BANK)); 3028 break; 3029 case 6: 3030 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3031 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3032 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3033 NUM_BANKS(ADDR_SURF_8_BANK)); 3034 break; 3035 case 8: 3036 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3039 NUM_BANKS(ADDR_SURF_16_BANK)); 3040 break; 3041 case 9: 3042 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3045 NUM_BANKS(ADDR_SURF_16_BANK)); 3046 break; 3047 case 10: 3048 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3051 NUM_BANKS(ADDR_SURF_16_BANK)); 3052 break; 3053 case 11: 3054 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3057 NUM_BANKS(ADDR_SURF_16_BANK)); 3058 break; 3059 case 12: 3060 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3063 NUM_BANKS(ADDR_SURF_16_BANK)); 3064 break; 3065 case 13: 3066 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3069 NUM_BANKS(ADDR_SURF_16_BANK)); 3070 break; 3071 case 14: 3072 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3075 NUM_BANKS(ADDR_SURF_8_BANK)); 3076 break; 3077 default: 3078 gb_tile_moden = 0; 3079 break; 3080 } 3081 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; 3082 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 3083 } 3084 } else 3085 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 3086 } 3087 3088 /** 3089 * cik_select_se_sh - select which SE, SH to address 3090 * 3091 * @rdev: radeon_device pointer 3092 * @se_num: shader engine to address 3093 * @sh_num: sh block to address 3094 * 3095 * Select which SE, SH combinations to address. Certain 3096 * registers are instanced per SE or SH. 0xffffffff means 3097 * broadcast to all SEs or SHs (CIK). 3098 */ 3099 static void cik_select_se_sh(struct radeon_device *rdev, 3100 u32 se_num, u32 sh_num) 3101 { 3102 u32 data = INSTANCE_BROADCAST_WRITES; 3103 3104 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 3105 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 3106 else if (se_num == 0xffffffff) 3107 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 3108 else if (sh_num == 0xffffffff) 3109 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 3110 else 3111 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 3112 WREG32(GRBM_GFX_INDEX, data); 3113 } 3114 3115 /** 3116 * cik_create_bitmask - create a bitmask 3117 * 3118 * @bit_width: length of the mask 3119 * 3120 * create a variable length bit mask (CIK). 3121 * Returns the bitmask. 3122 */ 3123 static u32 cik_create_bitmask(u32 bit_width) 3124 { 3125 u32 i, mask = 0; 3126 3127 for (i = 0; i < bit_width; i++) { 3128 mask <<= 1; 3129 mask |= 1; 3130 } 3131 return mask; 3132 } 3133 3134 /** 3135 * cik_get_rb_disabled - computes the mask of disabled RBs 3136 * 3137 * @rdev: radeon_device pointer 3138 * @max_rb_num: max RBs (render backends) for the asic 3139 * @se_num: number of SEs (shader engines) for the asic 3140 * @sh_per_se: number of SH blocks per SE for the asic 3141 * 3142 * Calculates the bitmask of disabled RBs (CIK). 3143 * Returns the disabled RB bitmask. 3144 */ 3145 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 3146 u32 max_rb_num_per_se, 3147 u32 sh_per_se) 3148 { 3149 u32 data, mask; 3150 3151 data = RREG32(CC_RB_BACKEND_DISABLE); 3152 if (data & 1) 3153 data &= BACKEND_DISABLE_MASK; 3154 else 3155 data = 0; 3156 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 3157 3158 data >>= BACKEND_DISABLE_SHIFT; 3159 3160 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se); 3161 3162 return data & mask; 3163 } 3164 3165 /** 3166 * cik_setup_rb - setup the RBs on the asic 3167 * 3168 * @rdev: radeon_device pointer 3169 * @se_num: number of SEs (shader engines) for the asic 3170 * @sh_per_se: number of SH blocks per SE for the asic 3171 * @max_rb_num: max RBs (render backends) for the asic 3172 * 3173 * Configures per-SE/SH RB registers (CIK). 3174 */ 3175 static void cik_setup_rb(struct radeon_device *rdev, 3176 u32 se_num, u32 sh_per_se, 3177 u32 max_rb_num_per_se) 3178 { 3179 int i, j; 3180 u32 data, mask; 3181 u32 disabled_rbs = 0; 3182 u32 enabled_rbs = 0; 3183 3184 for (i = 0; i < se_num; i++) { 3185 for (j = 0; j < sh_per_se; j++) { 3186 cik_select_se_sh(rdev, i, j); 3187 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se); 3188 if (rdev->family == CHIP_HAWAII) 3189 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); 3190 else 3191 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 3192 } 3193 } 3194 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 3195 3196 mask = 1; 3197 for (i = 0; i < max_rb_num_per_se * se_num; i++) { 3198 if (!(disabled_rbs & mask)) 3199 enabled_rbs |= mask; 3200 mask <<= 1; 3201 } 3202 3203 rdev->config.cik.backend_enable_mask = enabled_rbs; 3204 3205 for (i = 0; i < se_num; i++) { 3206 cik_select_se_sh(rdev, i, 0xffffffff); 3207 data = 0; 3208 for (j = 0; j < sh_per_se; j++) { 3209 switch (enabled_rbs & 3) { 3210 case 0: 3211 if (j == 0) 3212 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3); 3213 else 3214 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0); 3215 break; 3216 case 1: 3217 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 3218 break; 3219 case 2: 3220 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 3221 break; 3222 case 3: 3223 default: 3224 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 3225 break; 3226 } 3227 enabled_rbs >>= 2; 3228 } 3229 WREG32(PA_SC_RASTER_CONFIG, data); 3230 } 3231 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 3232 } 3233 3234 /** 3235 * cik_gpu_init - setup the 3D engine 3236 * 3237 * @rdev: radeon_device pointer 3238 * 3239 * Configures the 3D engine and tiling configuration 3240 * registers so that the 3D engine is usable. 3241 */ 3242 static void cik_gpu_init(struct radeon_device *rdev) 3243 { 3244 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 3245 u32 mc_shared_chmap, mc_arb_ramcfg; 3246 u32 hdp_host_path_cntl; 3247 u32 tmp; 3248 int i, j; 3249 3250 switch (rdev->family) { 3251 case CHIP_BONAIRE: 3252 rdev->config.cik.max_shader_engines = 2; 3253 rdev->config.cik.max_tile_pipes = 4; 3254 rdev->config.cik.max_cu_per_sh = 7; 3255 rdev->config.cik.max_sh_per_se = 1; 3256 rdev->config.cik.max_backends_per_se = 2; 3257 rdev->config.cik.max_texture_channel_caches = 4; 3258 rdev->config.cik.max_gprs = 256; 3259 rdev->config.cik.max_gs_threads = 32; 3260 rdev->config.cik.max_hw_contexts = 8; 3261 3262 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3263 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3264 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3265 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3266 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 3267 break; 3268 case CHIP_HAWAII: 3269 rdev->config.cik.max_shader_engines = 4; 3270 rdev->config.cik.max_tile_pipes = 16; 3271 rdev->config.cik.max_cu_per_sh = 11; 3272 rdev->config.cik.max_sh_per_se = 1; 3273 rdev->config.cik.max_backends_per_se = 4; 3274 rdev->config.cik.max_texture_channel_caches = 16; 3275 rdev->config.cik.max_gprs = 256; 3276 rdev->config.cik.max_gs_threads = 32; 3277 rdev->config.cik.max_hw_contexts = 8; 3278 3279 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3280 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3281 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3282 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3283 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN; 3284 break; 3285 case CHIP_KAVERI: 3286 rdev->config.cik.max_shader_engines = 1; 3287 rdev->config.cik.max_tile_pipes = 4; 3288 if ((rdev->pdev->device == 0x1304) || 3289 (rdev->pdev->device == 0x1305) || 3290 (rdev->pdev->device == 0x130C) || 3291 (rdev->pdev->device == 0x130F) || 3292 (rdev->pdev->device == 0x1310) || 3293 (rdev->pdev->device == 0x1311) || 3294 (rdev->pdev->device == 0x131C)) { 3295 rdev->config.cik.max_cu_per_sh = 8; 3296 rdev->config.cik.max_backends_per_se = 2; 3297 } else if ((rdev->pdev->device == 0x1309) || 3298 (rdev->pdev->device == 0x130A) || 3299 (rdev->pdev->device == 0x130D) || 3300 (rdev->pdev->device == 0x1313) || 3301 (rdev->pdev->device == 0x131D)) { 3302 rdev->config.cik.max_cu_per_sh = 6; 3303 rdev->config.cik.max_backends_per_se = 2; 3304 } else if ((rdev->pdev->device == 0x1306) || 3305 (rdev->pdev->device == 0x1307) || 3306 (rdev->pdev->device == 0x130B) || 3307 (rdev->pdev->device == 0x130E) || 3308 (rdev->pdev->device == 0x1315) || 3309 (rdev->pdev->device == 0x1318) || 3310 (rdev->pdev->device == 0x131B)) { 3311 rdev->config.cik.max_cu_per_sh = 4; 3312 rdev->config.cik.max_backends_per_se = 1; 3313 } else { 3314 rdev->config.cik.max_cu_per_sh = 3; 3315 rdev->config.cik.max_backends_per_se = 1; 3316 } 3317 rdev->config.cik.max_sh_per_se = 1; 3318 rdev->config.cik.max_texture_channel_caches = 4; 3319 rdev->config.cik.max_gprs = 256; 3320 rdev->config.cik.max_gs_threads = 16; 3321 rdev->config.cik.max_hw_contexts = 8; 3322 3323 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3324 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3325 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3326 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3327 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 3328 break; 3329 case CHIP_KABINI: 3330 case CHIP_MULLINS: 3331 default: 3332 rdev->config.cik.max_shader_engines = 1; 3333 rdev->config.cik.max_tile_pipes = 2; 3334 rdev->config.cik.max_cu_per_sh = 2; 3335 rdev->config.cik.max_sh_per_se = 1; 3336 rdev->config.cik.max_backends_per_se = 1; 3337 rdev->config.cik.max_texture_channel_caches = 2; 3338 rdev->config.cik.max_gprs = 256; 3339 rdev->config.cik.max_gs_threads = 16; 3340 rdev->config.cik.max_hw_contexts = 8; 3341 3342 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 3343 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 3344 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 3345 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 3346 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 3347 break; 3348 } 3349 3350 /* Initialize HDP */ 3351 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 3352 WREG32((0x2c14 + j), 0x00000000); 3353 WREG32((0x2c18 + j), 0x00000000); 3354 WREG32((0x2c1c + j), 0x00000000); 3355 WREG32((0x2c20 + j), 0x00000000); 3356 WREG32((0x2c24 + j), 0x00000000); 3357 } 3358 3359 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 3360 3361 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 3362 3363 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 3364 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 3365 3366 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 3367 rdev->config.cik.mem_max_burst_length_bytes = 256; 3368 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 3369 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 3370 if (rdev->config.cik.mem_row_size_in_kb > 4) 3371 rdev->config.cik.mem_row_size_in_kb = 4; 3372 /* XXX use MC settings? */ 3373 rdev->config.cik.shader_engine_tile_size = 32; 3374 rdev->config.cik.num_gpus = 1; 3375 rdev->config.cik.multi_gpu_tile_size = 64; 3376 3377 /* fix up row size */ 3378 gb_addr_config &= ~ROW_SIZE_MASK; 3379 switch (rdev->config.cik.mem_row_size_in_kb) { 3380 case 1: 3381 default: 3382 gb_addr_config |= ROW_SIZE(0); 3383 break; 3384 case 2: 3385 gb_addr_config |= ROW_SIZE(1); 3386 break; 3387 case 4: 3388 gb_addr_config |= ROW_SIZE(2); 3389 break; 3390 } 3391 3392 /* setup tiling info dword. gb_addr_config is not adequate since it does 3393 * not have bank info, so create a custom tiling dword. 3394 * bits 3:0 num_pipes 3395 * bits 7:4 num_banks 3396 * bits 11:8 group_size 3397 * bits 15:12 row_size 3398 */ 3399 rdev->config.cik.tile_config = 0; 3400 switch (rdev->config.cik.num_tile_pipes) { 3401 case 1: 3402 rdev->config.cik.tile_config |= (0 << 0); 3403 break; 3404 case 2: 3405 rdev->config.cik.tile_config |= (1 << 0); 3406 break; 3407 case 4: 3408 rdev->config.cik.tile_config |= (2 << 0); 3409 break; 3410 case 8: 3411 default: 3412 /* XXX what about 12? */ 3413 rdev->config.cik.tile_config |= (3 << 0); 3414 break; 3415 } 3416 rdev->config.cik.tile_config |= 3417 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; 3418 rdev->config.cik.tile_config |= 3419 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 3420 rdev->config.cik.tile_config |= 3421 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 3422 3423 WREG32(GB_ADDR_CONFIG, gb_addr_config); 3424 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 3425 WREG32(DMIF_ADDR_CALC, gb_addr_config); 3426 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 3427 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 3428 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 3429 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 3430 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 3431 3432 cik_tiling_mode_table_init(rdev); 3433 3434 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 3435 rdev->config.cik.max_sh_per_se, 3436 rdev->config.cik.max_backends_per_se); 3437 3438 rdev->config.cik.active_cus = 0; 3439 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 3440 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 3441 rdev->config.cik.active_cus += 3442 hweight32(cik_get_cu_active_bitmap(rdev, i, j)); 3443 } 3444 } 3445 3446 /* set HW defaults for 3D engine */ 3447 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 3448 3449 WREG32(SX_DEBUG_1, 0x20); 3450 3451 WREG32(TA_CNTL_AUX, 0x00010000); 3452 3453 tmp = RREG32(SPI_CONFIG_CNTL); 3454 tmp |= 0x03000000; 3455 WREG32(SPI_CONFIG_CNTL, tmp); 3456 3457 WREG32(SQ_CONFIG, 1); 3458 3459 WREG32(DB_DEBUG, 0); 3460 3461 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 3462 tmp |= 0x00000400; 3463 WREG32(DB_DEBUG2, tmp); 3464 3465 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 3466 tmp |= 0x00020200; 3467 WREG32(DB_DEBUG3, tmp); 3468 3469 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 3470 tmp |= 0x00018208; 3471 WREG32(CB_HW_CONTROL, tmp); 3472 3473 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 3474 3475 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 3476 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 3477 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 3478 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 3479 3480 WREG32(VGT_NUM_INSTANCES, 1); 3481 3482 WREG32(CP_PERFMON_CNTL, 0); 3483 3484 WREG32(SQ_CONFIG, 0); 3485 3486 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 3487 FORCE_EOV_MAX_REZ_CNT(255))); 3488 3489 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 3490 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 3491 3492 WREG32(VGT_GS_VERTEX_REUSE, 16); 3493 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 3494 3495 tmp = RREG32(HDP_MISC_CNTL); 3496 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 3497 WREG32(HDP_MISC_CNTL, tmp); 3498 3499 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 3500 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 3501 3502 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 3503 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 3504 3505 udelay(50); 3506 } 3507 3508 /* 3509 * GPU scratch registers helpers function. 3510 */ 3511 /** 3512 * cik_scratch_init - setup driver info for CP scratch regs 3513 * 3514 * @rdev: radeon_device pointer 3515 * 3516 * Set up the number and offset of the CP scratch registers. 3517 * NOTE: use of CP scratch registers is a legacy inferface and 3518 * is not used by default on newer asics (r6xx+). On newer asics, 3519 * memory buffers are used for fences rather than scratch regs. 3520 */ 3521 static void cik_scratch_init(struct radeon_device *rdev) 3522 { 3523 int i; 3524 3525 rdev->scratch.num_reg = 7; 3526 rdev->scratch.reg_base = SCRATCH_REG0; 3527 for (i = 0; i < rdev->scratch.num_reg; i++) { 3528 rdev->scratch.free[i] = true; 3529 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 3530 } 3531 } 3532 3533 /** 3534 * cik_ring_test - basic gfx ring test 3535 * 3536 * @rdev: radeon_device pointer 3537 * @ring: radeon_ring structure holding ring information 3538 * 3539 * Allocate a scratch register and write to it using the gfx ring (CIK). 3540 * Provides a basic gfx ring test to verify that the ring is working. 3541 * Used by cik_cp_gfx_resume(); 3542 * Returns 0 on success, error on failure. 3543 */ 3544 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3545 { 3546 uint32_t scratch; 3547 uint32_t tmp = 0; 3548 unsigned i; 3549 int r; 3550 3551 r = radeon_scratch_get(rdev, &scratch); 3552 if (r) { 3553 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 3554 return r; 3555 } 3556 WREG32(scratch, 0xCAFEDEAD); 3557 r = radeon_ring_lock(rdev, ring, 3); 3558 if (r) { 3559 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 3560 radeon_scratch_free(rdev, scratch); 3561 return r; 3562 } 3563 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3564 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 3565 radeon_ring_write(ring, 0xDEADBEEF); 3566 radeon_ring_unlock_commit(rdev, ring, false); 3567 3568 for (i = 0; i < rdev->usec_timeout; i++) { 3569 tmp = RREG32(scratch); 3570 if (tmp == 0xDEADBEEF) 3571 break; 3572 DRM_UDELAY(1); 3573 } 3574 if (i < rdev->usec_timeout) { 3575 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3576 } else { 3577 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 3578 ring->idx, scratch, tmp); 3579 r = -EINVAL; 3580 } 3581 radeon_scratch_free(rdev, scratch); 3582 return r; 3583 } 3584 3585 /** 3586 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp 3587 * 3588 * @rdev: radeon_device pointer 3589 * @ridx: radeon ring index 3590 * 3591 * Emits an hdp flush on the cp. 3592 */ 3593 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev, 3594 int ridx) 3595 { 3596 struct radeon_ring *ring = &rdev->ring[ridx]; 3597 u32 ref_and_mask; 3598 3599 switch (ring->idx) { 3600 case CAYMAN_RING_TYPE_CP1_INDEX: 3601 case CAYMAN_RING_TYPE_CP2_INDEX: 3602 default: 3603 switch (ring->me) { 3604 case 0: 3605 ref_and_mask = CP2 << ring->pipe; 3606 break; 3607 case 1: 3608 ref_and_mask = CP6 << ring->pipe; 3609 break; 3610 default: 3611 return; 3612 } 3613 break; 3614 case RADEON_RING_TYPE_GFX_INDEX: 3615 ref_and_mask = CP0; 3616 break; 3617 } 3618 3619 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 3620 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 3621 WAIT_REG_MEM_FUNCTION(3) | /* == */ 3622 WAIT_REG_MEM_ENGINE(1))); /* pfp */ 3623 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2); 3624 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2); 3625 radeon_ring_write(ring, ref_and_mask); 3626 radeon_ring_write(ring, ref_and_mask); 3627 radeon_ring_write(ring, 0x20); /* poll interval */ 3628 } 3629 3630 /** 3631 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 3632 * 3633 * @rdev: radeon_device pointer 3634 * @fence: radeon fence object 3635 * 3636 * Emits a fence sequnce number on the gfx ring and flushes 3637 * GPU caches. 3638 */ 3639 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 3640 struct radeon_fence *fence) 3641 { 3642 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3643 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3644 3645 /* EVENT_WRITE_EOP - flush caches, send int */ 3646 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 3647 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3648 EOP_TC_ACTION_EN | 3649 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3650 EVENT_INDEX(5))); 3651 radeon_ring_write(ring, addr & 0xfffffffc); 3652 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3653 radeon_ring_write(ring, fence->seq); 3654 radeon_ring_write(ring, 0); 3655 } 3656 3657 /** 3658 * cik_fence_compute_ring_emit - emit a fence on the compute ring 3659 * 3660 * @rdev: radeon_device pointer 3661 * @fence: radeon fence object 3662 * 3663 * Emits a fence sequnce number on the compute ring and flushes 3664 * GPU caches. 3665 */ 3666 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 3667 struct radeon_fence *fence) 3668 { 3669 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3670 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3671 3672 /* RELEASE_MEM - flush caches, send int */ 3673 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 3674 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 3675 EOP_TC_ACTION_EN | 3676 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3677 EVENT_INDEX(5))); 3678 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 3679 radeon_ring_write(ring, addr & 0xfffffffc); 3680 radeon_ring_write(ring, upper_32_bits(addr)); 3681 radeon_ring_write(ring, fence->seq); 3682 radeon_ring_write(ring, 0); 3683 } 3684 3685 /** 3686 * cik_semaphore_ring_emit - emit a semaphore on the CP ring 3687 * 3688 * @rdev: radeon_device pointer 3689 * @ring: radeon ring buffer object 3690 * @semaphore: radeon semaphore object 3691 * @emit_wait: Is this a sempahore wait? 3692 * 3693 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP 3694 * from running ahead of semaphore waits. 3695 */ 3696 bool cik_semaphore_ring_emit(struct radeon_device *rdev, 3697 struct radeon_ring *ring, 3698 struct radeon_semaphore *semaphore, 3699 bool emit_wait) 3700 { 3701 uint64_t addr = semaphore->gpu_addr; 3702 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3703 3704 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3705 radeon_ring_write(ring, lower_32_bits(addr)); 3706 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3707 3708 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) { 3709 /* Prevent the PFP from running ahead of the semaphore wait */ 3710 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 3711 radeon_ring_write(ring, 0x0); 3712 } 3713 3714 return true; 3715 } 3716 3717 /** 3718 * cik_copy_cpdma - copy pages using the CP DMA engine 3719 * 3720 * @rdev: radeon_device pointer 3721 * @src_offset: src GPU address 3722 * @dst_offset: dst GPU address 3723 * @num_gpu_pages: number of GPU pages to xfer 3724 * @fence: radeon fence object 3725 * 3726 * Copy GPU paging using the CP DMA engine (CIK+). 3727 * Used by the radeon ttm implementation to move pages if 3728 * registered as the asic copy callback. 3729 */ 3730 int cik_copy_cpdma(struct radeon_device *rdev, 3731 uint64_t src_offset, uint64_t dst_offset, 3732 unsigned num_gpu_pages, 3733 struct radeon_fence **fence) 3734 { 3735 struct radeon_semaphore *sem = NULL; 3736 int ring_index = rdev->asic->copy.blit_ring_index; 3737 struct radeon_ring *ring = &rdev->ring[ring_index]; 3738 u32 size_in_bytes, cur_size_in_bytes, control; 3739 int i, num_loops; 3740 int r = 0; 3741 3742 r = radeon_semaphore_create(rdev, &sem); 3743 if (r) { 3744 DRM_ERROR("radeon: moving bo (%d).\n", r); 3745 return r; 3746 } 3747 3748 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); 3749 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); 3750 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18); 3751 if (r) { 3752 DRM_ERROR("radeon: moving bo (%d).\n", r); 3753 radeon_semaphore_free(rdev, &sem, NULL); 3754 return r; 3755 } 3756 3757 radeon_semaphore_sync_to(sem, *fence); 3758 radeon_semaphore_sync_rings(rdev, sem, ring->idx); 3759 3760 for (i = 0; i < num_loops; i++) { 3761 cur_size_in_bytes = size_in_bytes; 3762 if (cur_size_in_bytes > 0x1fffff) 3763 cur_size_in_bytes = 0x1fffff; 3764 size_in_bytes -= cur_size_in_bytes; 3765 control = 0; 3766 if (size_in_bytes == 0) 3767 control |= PACKET3_DMA_DATA_CP_SYNC; 3768 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3769 radeon_ring_write(ring, control); 3770 radeon_ring_write(ring, lower_32_bits(src_offset)); 3771 radeon_ring_write(ring, upper_32_bits(src_offset)); 3772 radeon_ring_write(ring, lower_32_bits(dst_offset)); 3773 radeon_ring_write(ring, upper_32_bits(dst_offset)); 3774 radeon_ring_write(ring, cur_size_in_bytes); 3775 src_offset += cur_size_in_bytes; 3776 dst_offset += cur_size_in_bytes; 3777 } 3778 3779 r = radeon_fence_emit(rdev, fence, ring->idx); 3780 if (r) { 3781 radeon_ring_unlock_undo(rdev, ring); 3782 radeon_semaphore_free(rdev, &sem, NULL); 3783 return r; 3784 } 3785 3786 radeon_ring_unlock_commit(rdev, ring, false); 3787 radeon_semaphore_free(rdev, &sem, *fence); 3788 3789 return r; 3790 } 3791 3792 /* 3793 * IB stuff 3794 */ 3795 /** 3796 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 3797 * 3798 * @rdev: radeon_device pointer 3799 * @ib: radeon indirect buffer object 3800 * 3801 * Emits an DE (drawing engine) or CE (constant engine) IB 3802 * on the gfx ring. IBs are usually generated by userspace 3803 * acceleration drivers and submitted to the kernel for 3804 * sheduling on the ring. This function schedules the IB 3805 * on the gfx ring for execution by the GPU. 3806 */ 3807 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3808 { 3809 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3810 u32 header, control = INDIRECT_BUFFER_VALID; 3811 3812 if (ib->is_const_ib) { 3813 /* set switch buffer packet before const IB */ 3814 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3815 radeon_ring_write(ring, 0); 3816 3817 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3818 } else { 3819 u32 next_rptr; 3820 if (ring->rptr_save_reg) { 3821 next_rptr = ring->wptr + 3 + 4; 3822 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 3823 radeon_ring_write(ring, ((ring->rptr_save_reg - 3824 PACKET3_SET_UCONFIG_REG_START) >> 2)); 3825 radeon_ring_write(ring, next_rptr); 3826 } else if (rdev->wb.enabled) { 3827 next_rptr = ring->wptr + 5 + 4; 3828 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3829 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 3830 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3831 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr)); 3832 radeon_ring_write(ring, next_rptr); 3833 } 3834 3835 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3836 } 3837 3838 control |= ib->length_dw | 3839 (ib->vm ? (ib->vm->id << 24) : 0); 3840 3841 radeon_ring_write(ring, header); 3842 radeon_ring_write(ring, 3843 #ifdef __BIG_ENDIAN 3844 (2 << 0) | 3845 #endif 3846 (ib->gpu_addr & 0xFFFFFFFC)); 3847 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 3848 radeon_ring_write(ring, control); 3849 } 3850 3851 /** 3852 * cik_ib_test - basic gfx ring IB test 3853 * 3854 * @rdev: radeon_device pointer 3855 * @ring: radeon_ring structure holding ring information 3856 * 3857 * Allocate an IB and execute it on the gfx ring (CIK). 3858 * Provides a basic gfx ring test to verify that IBs are working. 3859 * Returns 0 on success, error on failure. 3860 */ 3861 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3862 { 3863 struct radeon_ib ib; 3864 uint32_t scratch; 3865 uint32_t tmp = 0; 3866 unsigned i; 3867 int r; 3868 3869 r = radeon_scratch_get(rdev, &scratch); 3870 if (r) { 3871 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3872 return r; 3873 } 3874 WREG32(scratch, 0xCAFEDEAD); 3875 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3876 if (r) { 3877 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3878 radeon_scratch_free(rdev, scratch); 3879 return r; 3880 } 3881 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3882 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 3883 ib.ptr[2] = 0xDEADBEEF; 3884 ib.length_dw = 3; 3885 r = radeon_ib_schedule(rdev, &ib, NULL, false); 3886 if (r) { 3887 radeon_scratch_free(rdev, scratch); 3888 radeon_ib_free(rdev, &ib); 3889 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3890 return r; 3891 } 3892 r = radeon_fence_wait(ib.fence, false); 3893 if (r) { 3894 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3895 radeon_scratch_free(rdev, scratch); 3896 radeon_ib_free(rdev, &ib); 3897 return r; 3898 } 3899 for (i = 0; i < rdev->usec_timeout; i++) { 3900 tmp = RREG32(scratch); 3901 if (tmp == 0xDEADBEEF) 3902 break; 3903 DRM_UDELAY(1); 3904 } 3905 if (i < rdev->usec_timeout) { 3906 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3907 } else { 3908 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3909 scratch, tmp); 3910 r = -EINVAL; 3911 } 3912 radeon_scratch_free(rdev, scratch); 3913 radeon_ib_free(rdev, &ib); 3914 return r; 3915 } 3916 3917 /* 3918 * CP. 3919 * On CIK, gfx and compute now have independant command processors. 3920 * 3921 * GFX 3922 * Gfx consists of a single ring and can process both gfx jobs and 3923 * compute jobs. The gfx CP consists of three microengines (ME): 3924 * PFP - Pre-Fetch Parser 3925 * ME - Micro Engine 3926 * CE - Constant Engine 3927 * The PFP and ME make up what is considered the Drawing Engine (DE). 3928 * The CE is an asynchronous engine used for updating buffer desciptors 3929 * used by the DE so that they can be loaded into cache in parallel 3930 * while the DE is processing state update packets. 3931 * 3932 * Compute 3933 * The compute CP consists of two microengines (ME): 3934 * MEC1 - Compute MicroEngine 1 3935 * MEC2 - Compute MicroEngine 2 3936 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 3937 * The queues are exposed to userspace and are programmed directly 3938 * by the compute runtime. 3939 */ 3940 /** 3941 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 3942 * 3943 * @rdev: radeon_device pointer 3944 * @enable: enable or disable the MEs 3945 * 3946 * Halts or unhalts the gfx MEs. 3947 */ 3948 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 3949 { 3950 if (enable) 3951 WREG32(CP_ME_CNTL, 0); 3952 else { 3953 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX) 3954 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 3955 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 3956 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3957 } 3958 udelay(50); 3959 } 3960 3961 /** 3962 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 3963 * 3964 * @rdev: radeon_device pointer 3965 * 3966 * Loads the gfx PFP, ME, and CE ucode. 3967 * Returns 0 for success, -EINVAL if the ucode is not available. 3968 */ 3969 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 3970 { 3971 const __be32 *fw_data; 3972 int i; 3973 3974 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 3975 return -EINVAL; 3976 3977 cik_cp_gfx_enable(rdev, false); 3978 3979 /* PFP */ 3980 fw_data = (const __be32 *)rdev->pfp_fw->data; 3981 WREG32(CP_PFP_UCODE_ADDR, 0); 3982 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 3983 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 3984 WREG32(CP_PFP_UCODE_ADDR, 0); 3985 3986 /* CE */ 3987 fw_data = (const __be32 *)rdev->ce_fw->data; 3988 WREG32(CP_CE_UCODE_ADDR, 0); 3989 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 3990 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 3991 WREG32(CP_CE_UCODE_ADDR, 0); 3992 3993 /* ME */ 3994 fw_data = (const __be32 *)rdev->me_fw->data; 3995 WREG32(CP_ME_RAM_WADDR, 0); 3996 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 3997 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 3998 WREG32(CP_ME_RAM_WADDR, 0); 3999 4000 WREG32(CP_PFP_UCODE_ADDR, 0); 4001 WREG32(CP_CE_UCODE_ADDR, 0); 4002 WREG32(CP_ME_RAM_WADDR, 0); 4003 WREG32(CP_ME_RAM_RADDR, 0); 4004 return 0; 4005 } 4006 4007 /** 4008 * cik_cp_gfx_start - start the gfx ring 4009 * 4010 * @rdev: radeon_device pointer 4011 * 4012 * Enables the ring and loads the clear state context and other 4013 * packets required to init the ring. 4014 * Returns 0 for success, error for failure. 4015 */ 4016 static int cik_cp_gfx_start(struct radeon_device *rdev) 4017 { 4018 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 4019 int r, i; 4020 4021 /* init the CP */ 4022 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 4023 WREG32(CP_ENDIAN_SWAP, 0); 4024 WREG32(CP_DEVICE_ID, 1); 4025 4026 cik_cp_gfx_enable(rdev, true); 4027 4028 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 4029 if (r) { 4030 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 4031 return r; 4032 } 4033 4034 /* init the CE partitions. CE only used for gfx on CIK */ 4035 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4036 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4037 radeon_ring_write(ring, 0xc000); 4038 radeon_ring_write(ring, 0xc000); 4039 4040 /* setup clear context state */ 4041 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4042 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4043 4044 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4045 radeon_ring_write(ring, 0x80000000); 4046 radeon_ring_write(ring, 0x80000000); 4047 4048 for (i = 0; i < cik_default_size; i++) 4049 radeon_ring_write(ring, cik_default_state[i]); 4050 4051 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4052 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4053 4054 /* set clear context state */ 4055 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4056 radeon_ring_write(ring, 0); 4057 4058 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4059 radeon_ring_write(ring, 0x00000316); 4060 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 4061 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 4062 4063 radeon_ring_unlock_commit(rdev, ring, false); 4064 4065 return 0; 4066 } 4067 4068 /** 4069 * cik_cp_gfx_fini - stop the gfx ring 4070 * 4071 * @rdev: radeon_device pointer 4072 * 4073 * Stop the gfx ring and tear down the driver ring 4074 * info. 4075 */ 4076 static void cik_cp_gfx_fini(struct radeon_device *rdev) 4077 { 4078 cik_cp_gfx_enable(rdev, false); 4079 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 4080 } 4081 4082 /** 4083 * cik_cp_gfx_resume - setup the gfx ring buffer registers 4084 * 4085 * @rdev: radeon_device pointer 4086 * 4087 * Program the location and size of the gfx ring buffer 4088 * and test it to make sure it's working. 4089 * Returns 0 for success, error for failure. 4090 */ 4091 static int cik_cp_gfx_resume(struct radeon_device *rdev) 4092 { 4093 struct radeon_ring *ring; 4094 u32 tmp; 4095 u32 rb_bufsz; 4096 u64 rb_addr; 4097 int r; 4098 4099 WREG32(CP_SEM_WAIT_TIMER, 0x0); 4100 if (rdev->family != CHIP_HAWAII) 4101 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 4102 4103 /* Set the write pointer delay */ 4104 WREG32(CP_RB_WPTR_DELAY, 0); 4105 4106 /* set the RB to use vmid 0 */ 4107 WREG32(CP_RB_VMID, 0); 4108 4109 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 4110 4111 /* ring 0 - compute and gfx */ 4112 /* Set ring buffer size */ 4113 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 4114 rb_bufsz = order_base_2(ring->ring_size / 8); 4115 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 4116 #ifdef __BIG_ENDIAN 4117 tmp |= BUF_SWAP_32BIT; 4118 #endif 4119 WREG32(CP_RB0_CNTL, tmp); 4120 4121 /* Initialize the ring buffer's read and write pointers */ 4122 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 4123 ring->wptr = 0; 4124 WREG32(CP_RB0_WPTR, ring->wptr); 4125 4126 /* set the wb address wether it's enabled or not */ 4127 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 4128 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 4129 4130 /* scratch register shadowing is no longer supported */ 4131 WREG32(SCRATCH_UMSK, 0); 4132 4133 if (!rdev->wb.enabled) 4134 tmp |= RB_NO_UPDATE; 4135 4136 mdelay(1); 4137 WREG32(CP_RB0_CNTL, tmp); 4138 4139 rb_addr = ring->gpu_addr >> 8; 4140 WREG32(CP_RB0_BASE, rb_addr); 4141 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4142 4143 /* start the ring */ 4144 cik_cp_gfx_start(rdev); 4145 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 4146 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 4147 if (r) { 4148 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 4149 return r; 4150 } 4151 4152 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX) 4153 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 4154 4155 return 0; 4156 } 4157 4158 u32 cik_gfx_get_rptr(struct radeon_device *rdev, 4159 struct radeon_ring *ring) 4160 { 4161 u32 rptr; 4162 4163 if (rdev->wb.enabled) 4164 rptr = rdev->wb.wb[ring->rptr_offs/4]; 4165 else 4166 rptr = RREG32(CP_RB0_RPTR); 4167 4168 return rptr; 4169 } 4170 4171 u32 cik_gfx_get_wptr(struct radeon_device *rdev, 4172 struct radeon_ring *ring) 4173 { 4174 u32 wptr; 4175 4176 wptr = RREG32(CP_RB0_WPTR); 4177 4178 return wptr; 4179 } 4180 4181 void cik_gfx_set_wptr(struct radeon_device *rdev, 4182 struct radeon_ring *ring) 4183 { 4184 WREG32(CP_RB0_WPTR, ring->wptr); 4185 (void)RREG32(CP_RB0_WPTR); 4186 } 4187 4188 u32 cik_compute_get_rptr(struct radeon_device *rdev, 4189 struct radeon_ring *ring) 4190 { 4191 u32 rptr; 4192 4193 if (rdev->wb.enabled) { 4194 rptr = rdev->wb.wb[ring->rptr_offs/4]; 4195 } else { 4196 spin_lock(&rdev->srbm_mutex); 4197 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 4198 rptr = RREG32(CP_HQD_PQ_RPTR); 4199 cik_srbm_select(rdev, 0, 0, 0, 0); 4200 spin_unlock(&rdev->srbm_mutex); 4201 } 4202 4203 return rptr; 4204 } 4205 4206 u32 cik_compute_get_wptr(struct radeon_device *rdev, 4207 struct radeon_ring *ring) 4208 { 4209 u32 wptr; 4210 4211 if (rdev->wb.enabled) { 4212 /* XXX check if swapping is necessary on BE */ 4213 wptr = rdev->wb.wb[ring->wptr_offs/4]; 4214 } else { 4215 spin_lock(&rdev->srbm_mutex); 4216 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 4217 wptr = RREG32(CP_HQD_PQ_WPTR); 4218 cik_srbm_select(rdev, 0, 0, 0, 0); 4219 spin_unlock(&rdev->srbm_mutex); 4220 } 4221 4222 return wptr; 4223 } 4224 4225 void cik_compute_set_wptr(struct radeon_device *rdev, 4226 struct radeon_ring *ring) 4227 { 4228 /* XXX check if swapping is necessary on BE */ 4229 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr; 4230 WDOORBELL32(ring->doorbell_index, ring->wptr); 4231 } 4232 4233 /** 4234 * cik_cp_compute_enable - enable/disable the compute CP MEs 4235 * 4236 * @rdev: radeon_device pointer 4237 * @enable: enable or disable the MEs 4238 * 4239 * Halts or unhalts the compute MEs. 4240 */ 4241 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 4242 { 4243 if (enable) 4244 WREG32(CP_MEC_CNTL, 0); 4245 else { 4246 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 4247 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; 4248 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; 4249 } 4250 udelay(50); 4251 } 4252 4253 /** 4254 * cik_cp_compute_load_microcode - load the compute CP ME ucode 4255 * 4256 * @rdev: radeon_device pointer 4257 * 4258 * Loads the compute MEC1&2 ucode. 4259 * Returns 0 for success, -EINVAL if the ucode is not available. 4260 */ 4261 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 4262 { 4263 const __be32 *fw_data; 4264 int i; 4265 4266 if (!rdev->mec_fw) 4267 return -EINVAL; 4268 4269 cik_cp_compute_enable(rdev, false); 4270 4271 /* MEC1 */ 4272 fw_data = (const __be32 *)rdev->mec_fw->data; 4273 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 4274 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 4275 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 4276 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 4277 4278 if (rdev->family == CHIP_KAVERI) { 4279 /* MEC2 */ 4280 fw_data = (const __be32 *)rdev->mec_fw->data; 4281 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 4282 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 4283 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 4284 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 4285 } 4286 4287 return 0; 4288 } 4289 4290 /** 4291 * cik_cp_compute_start - start the compute queues 4292 * 4293 * @rdev: radeon_device pointer 4294 * 4295 * Enable the compute queues. 4296 * Returns 0 for success, error for failure. 4297 */ 4298 static int cik_cp_compute_start(struct radeon_device *rdev) 4299 { 4300 cik_cp_compute_enable(rdev, true); 4301 4302 return 0; 4303 } 4304 4305 /** 4306 * cik_cp_compute_fini - stop the compute queues 4307 * 4308 * @rdev: radeon_device pointer 4309 * 4310 * Stop the compute queues and tear down the driver queue 4311 * info. 4312 */ 4313 static void cik_cp_compute_fini(struct radeon_device *rdev) 4314 { 4315 int i, idx, r; 4316 4317 cik_cp_compute_enable(rdev, false); 4318 4319 for (i = 0; i < 2; i++) { 4320 if (i == 0) 4321 idx = CAYMAN_RING_TYPE_CP1_INDEX; 4322 else 4323 idx = CAYMAN_RING_TYPE_CP2_INDEX; 4324 4325 if (rdev->ring[idx].mqd_obj) { 4326 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 4327 if (unlikely(r != 0)) 4328 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 4329 4330 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 4331 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 4332 4333 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 4334 rdev->ring[idx].mqd_obj = NULL; 4335 } 4336 } 4337 } 4338 4339 static void cik_mec_fini(struct radeon_device *rdev) 4340 { 4341 int r; 4342 4343 if (rdev->mec.hpd_eop_obj) { 4344 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 4345 if (unlikely(r != 0)) 4346 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 4347 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 4348 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 4349 4350 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 4351 rdev->mec.hpd_eop_obj = NULL; 4352 } 4353 } 4354 4355 #define MEC_HPD_SIZE 2048 4356 4357 static int cik_mec_init(struct radeon_device *rdev) 4358 { 4359 int r; 4360 u32 *hpd; 4361 4362 /* 4363 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 4364 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 4365 */ 4366 if (rdev->family == CHIP_KAVERI) 4367 rdev->mec.num_mec = 2; 4368 else 4369 rdev->mec.num_mec = 1; 4370 rdev->mec.num_pipe = 4; 4371 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 4372 4373 if (rdev->mec.hpd_eop_obj == NULL) { 4374 r = radeon_bo_create(rdev, 4375 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 4376 PAGE_SIZE, true, 4377 RADEON_GEM_DOMAIN_GTT, 0, NULL, 4378 &rdev->mec.hpd_eop_obj); 4379 if (r) { 4380 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 4381 return r; 4382 } 4383 } 4384 4385 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 4386 if (unlikely(r != 0)) { 4387 cik_mec_fini(rdev); 4388 return r; 4389 } 4390 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 4391 &rdev->mec.hpd_eop_gpu_addr); 4392 if (r) { 4393 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 4394 cik_mec_fini(rdev); 4395 return r; 4396 } 4397 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 4398 if (r) { 4399 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 4400 cik_mec_fini(rdev); 4401 return r; 4402 } 4403 4404 /* clear memory. Not sure if this is required or not */ 4405 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 4406 4407 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 4408 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 4409 4410 return 0; 4411 } 4412 4413 struct hqd_registers 4414 { 4415 u32 cp_mqd_base_addr; 4416 u32 cp_mqd_base_addr_hi; 4417 u32 cp_hqd_active; 4418 u32 cp_hqd_vmid; 4419 u32 cp_hqd_persistent_state; 4420 u32 cp_hqd_pipe_priority; 4421 u32 cp_hqd_queue_priority; 4422 u32 cp_hqd_quantum; 4423 u32 cp_hqd_pq_base; 4424 u32 cp_hqd_pq_base_hi; 4425 u32 cp_hqd_pq_rptr; 4426 u32 cp_hqd_pq_rptr_report_addr; 4427 u32 cp_hqd_pq_rptr_report_addr_hi; 4428 u32 cp_hqd_pq_wptr_poll_addr; 4429 u32 cp_hqd_pq_wptr_poll_addr_hi; 4430 u32 cp_hqd_pq_doorbell_control; 4431 u32 cp_hqd_pq_wptr; 4432 u32 cp_hqd_pq_control; 4433 u32 cp_hqd_ib_base_addr; 4434 u32 cp_hqd_ib_base_addr_hi; 4435 u32 cp_hqd_ib_rptr; 4436 u32 cp_hqd_ib_control; 4437 u32 cp_hqd_iq_timer; 4438 u32 cp_hqd_iq_rptr; 4439 u32 cp_hqd_dequeue_request; 4440 u32 cp_hqd_dma_offload; 4441 u32 cp_hqd_sema_cmd; 4442 u32 cp_hqd_msg_type; 4443 u32 cp_hqd_atomic0_preop_lo; 4444 u32 cp_hqd_atomic0_preop_hi; 4445 u32 cp_hqd_atomic1_preop_lo; 4446 u32 cp_hqd_atomic1_preop_hi; 4447 u32 cp_hqd_hq_scheduler0; 4448 u32 cp_hqd_hq_scheduler1; 4449 u32 cp_mqd_control; 4450 }; 4451 4452 struct bonaire_mqd 4453 { 4454 u32 header; 4455 u32 dispatch_initiator; 4456 u32 dimensions[3]; 4457 u32 start_idx[3]; 4458 u32 num_threads[3]; 4459 u32 pipeline_stat_enable; 4460 u32 perf_counter_enable; 4461 u32 pgm[2]; 4462 u32 tba[2]; 4463 u32 tma[2]; 4464 u32 pgm_rsrc[2]; 4465 u32 vmid; 4466 u32 resource_limits; 4467 u32 static_thread_mgmt01[2]; 4468 u32 tmp_ring_size; 4469 u32 static_thread_mgmt23[2]; 4470 u32 restart[3]; 4471 u32 thread_trace_enable; 4472 u32 reserved1; 4473 u32 user_data[16]; 4474 u32 vgtcs_invoke_count[2]; 4475 struct hqd_registers queue_state; 4476 u32 dequeue_cntr; 4477 u32 interrupt_queue[64]; 4478 }; 4479 4480 /** 4481 * cik_cp_compute_resume - setup the compute queue registers 4482 * 4483 * @rdev: radeon_device pointer 4484 * 4485 * Program the compute queues and test them to make sure they 4486 * are working. 4487 * Returns 0 for success, error for failure. 4488 */ 4489 static int cik_cp_compute_resume(struct radeon_device *rdev) 4490 { 4491 int r, i, j, idx; 4492 u32 tmp; 4493 bool use_doorbell = true; 4494 u64 hqd_gpu_addr; 4495 u64 mqd_gpu_addr; 4496 u64 eop_gpu_addr; 4497 u64 wb_gpu_addr; 4498 u32 *buf; 4499 struct bonaire_mqd *mqd; 4500 4501 r = cik_cp_compute_start(rdev); 4502 if (r) 4503 return r; 4504 4505 /* fix up chicken bits */ 4506 tmp = RREG32(CP_CPF_DEBUG); 4507 tmp |= (1 << 23); 4508 WREG32(CP_CPF_DEBUG, tmp); 4509 4510 /* init the pipes */ 4511 spin_lock(&rdev->srbm_mutex); 4512 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 4513 int me = (i < 4) ? 1 : 2; 4514 int pipe = (i < 4) ? i : (i - 4); 4515 4516 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 4517 4518 cik_srbm_select(rdev, me, pipe, 0, 0); 4519 4520 /* write the EOP addr */ 4521 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 4522 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 4523 4524 /* set the VMID assigned */ 4525 WREG32(CP_HPD_EOP_VMID, 0); 4526 4527 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4528 tmp = RREG32(CP_HPD_EOP_CONTROL); 4529 tmp &= ~EOP_SIZE_MASK; 4530 tmp |= order_base_2(MEC_HPD_SIZE / 8); 4531 WREG32(CP_HPD_EOP_CONTROL, tmp); 4532 } 4533 cik_srbm_select(rdev, 0, 0, 0, 0); 4534 spin_unlock(&rdev->srbm_mutex); 4535 4536 /* init the queues. Just two for now. */ 4537 for (i = 0; i < 2; i++) { 4538 if (i == 0) 4539 idx = CAYMAN_RING_TYPE_CP1_INDEX; 4540 else 4541 idx = CAYMAN_RING_TYPE_CP2_INDEX; 4542 4543 if (rdev->ring[idx].mqd_obj == NULL) { 4544 r = radeon_bo_create(rdev, 4545 sizeof(struct bonaire_mqd), 4546 PAGE_SIZE, true, 4547 RADEON_GEM_DOMAIN_GTT, 0, NULL, 4548 &rdev->ring[idx].mqd_obj); 4549 if (r) { 4550 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 4551 return r; 4552 } 4553 } 4554 4555 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 4556 if (unlikely(r != 0)) { 4557 cik_cp_compute_fini(rdev); 4558 return r; 4559 } 4560 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 4561 &mqd_gpu_addr); 4562 if (r) { 4563 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 4564 cik_cp_compute_fini(rdev); 4565 return r; 4566 } 4567 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 4568 if (r) { 4569 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 4570 cik_cp_compute_fini(rdev); 4571 return r; 4572 } 4573 4574 /* init the mqd struct */ 4575 memset(buf, 0, sizeof(struct bonaire_mqd)); 4576 4577 mqd = (struct bonaire_mqd *)buf; 4578 mqd->header = 0xC0310800; 4579 mqd->static_thread_mgmt01[0] = 0xffffffff; 4580 mqd->static_thread_mgmt01[1] = 0xffffffff; 4581 mqd->static_thread_mgmt23[0] = 0xffffffff; 4582 mqd->static_thread_mgmt23[1] = 0xffffffff; 4583 4584 spin_lock(&rdev->srbm_mutex); 4585 cik_srbm_select(rdev, rdev->ring[idx].me, 4586 rdev->ring[idx].pipe, 4587 rdev->ring[idx].queue, 0); 4588 4589 /* disable wptr polling */ 4590 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 4591 tmp &= ~WPTR_POLL_EN; 4592 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 4593 4594 /* enable doorbell? */ 4595 mqd->queue_state.cp_hqd_pq_doorbell_control = 4596 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 4597 if (use_doorbell) 4598 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 4599 else 4600 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 4601 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 4602 mqd->queue_state.cp_hqd_pq_doorbell_control); 4603 4604 /* disable the queue if it's active */ 4605 mqd->queue_state.cp_hqd_dequeue_request = 0; 4606 mqd->queue_state.cp_hqd_pq_rptr = 0; 4607 mqd->queue_state.cp_hqd_pq_wptr= 0; 4608 if (RREG32(CP_HQD_ACTIVE) & 1) { 4609 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 4610 for (j = 0; j < rdev->usec_timeout; j++) { 4611 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 4612 break; 4613 udelay(1); 4614 } 4615 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 4616 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 4617 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 4618 } 4619 4620 /* set the pointer to the MQD */ 4621 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 4622 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4623 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 4624 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 4625 /* set MQD vmid to 0 */ 4626 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 4627 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 4628 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 4629 4630 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4631 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 4632 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 4633 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4634 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 4635 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 4636 4637 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4638 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 4639 mqd->queue_state.cp_hqd_pq_control &= 4640 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 4641 4642 mqd->queue_state.cp_hqd_pq_control |= 4643 order_base_2(rdev->ring[idx].ring_size / 8); 4644 mqd->queue_state.cp_hqd_pq_control |= 4645 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8); 4646 #ifdef __BIG_ENDIAN 4647 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 4648 #endif 4649 mqd->queue_state.cp_hqd_pq_control &= 4650 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 4651 mqd->queue_state.cp_hqd_pq_control |= 4652 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 4653 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 4654 4655 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 4656 if (i == 0) 4657 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 4658 else 4659 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 4660 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4661 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4662 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 4663 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 4664 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 4665 4666 /* set the wb address wether it's enabled or not */ 4667 if (i == 0) 4668 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 4669 else 4670 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 4671 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 4672 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 4673 upper_32_bits(wb_gpu_addr) & 0xffff; 4674 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 4675 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 4676 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4677 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 4678 4679 /* enable the doorbell if requested */ 4680 if (use_doorbell) { 4681 mqd->queue_state.cp_hqd_pq_doorbell_control = 4682 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 4683 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 4684 mqd->queue_state.cp_hqd_pq_doorbell_control |= 4685 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index); 4686 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 4687 mqd->queue_state.cp_hqd_pq_doorbell_control &= 4688 ~(DOORBELL_SOURCE | DOORBELL_HIT); 4689 4690 } else { 4691 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 4692 } 4693 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 4694 mqd->queue_state.cp_hqd_pq_doorbell_control); 4695 4696 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4697 rdev->ring[idx].wptr = 0; 4698 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 4699 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 4700 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR); 4701 4702 /* set the vmid for the queue */ 4703 mqd->queue_state.cp_hqd_vmid = 0; 4704 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 4705 4706 /* activate the queue */ 4707 mqd->queue_state.cp_hqd_active = 1; 4708 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 4709 4710 cik_srbm_select(rdev, 0, 0, 0, 0); 4711 spin_unlock(&rdev->srbm_mutex); 4712 4713 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 4714 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 4715 4716 rdev->ring[idx].ready = true; 4717 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 4718 if (r) 4719 rdev->ring[idx].ready = false; 4720 } 4721 4722 return 0; 4723 } 4724 4725 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 4726 { 4727 cik_cp_gfx_enable(rdev, enable); 4728 cik_cp_compute_enable(rdev, enable); 4729 } 4730 4731 static int cik_cp_load_microcode(struct radeon_device *rdev) 4732 { 4733 int r; 4734 4735 r = cik_cp_gfx_load_microcode(rdev); 4736 if (r) 4737 return r; 4738 r = cik_cp_compute_load_microcode(rdev); 4739 if (r) 4740 return r; 4741 4742 return 0; 4743 } 4744 4745 static void cik_cp_fini(struct radeon_device *rdev) 4746 { 4747 cik_cp_gfx_fini(rdev); 4748 cik_cp_compute_fini(rdev); 4749 } 4750 4751 static int cik_cp_resume(struct radeon_device *rdev) 4752 { 4753 int r; 4754 4755 cik_enable_gui_idle_interrupt(rdev, false); 4756 4757 r = cik_cp_load_microcode(rdev); 4758 if (r) 4759 return r; 4760 4761 r = cik_cp_gfx_resume(rdev); 4762 if (r) 4763 return r; 4764 r = cik_cp_compute_resume(rdev); 4765 if (r) 4766 return r; 4767 4768 cik_enable_gui_idle_interrupt(rdev, true); 4769 4770 return 0; 4771 } 4772 4773 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 4774 { 4775 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 4776 RREG32(GRBM_STATUS)); 4777 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 4778 RREG32(GRBM_STATUS2)); 4779 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 4780 RREG32(GRBM_STATUS_SE0)); 4781 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 4782 RREG32(GRBM_STATUS_SE1)); 4783 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 4784 RREG32(GRBM_STATUS_SE2)); 4785 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 4786 RREG32(GRBM_STATUS_SE3)); 4787 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 4788 RREG32(SRBM_STATUS)); 4789 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 4790 RREG32(SRBM_STATUS2)); 4791 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 4792 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 4793 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 4794 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 4795 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 4796 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 4797 RREG32(CP_STALLED_STAT1)); 4798 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 4799 RREG32(CP_STALLED_STAT2)); 4800 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 4801 RREG32(CP_STALLED_STAT3)); 4802 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 4803 RREG32(CP_CPF_BUSY_STAT)); 4804 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 4805 RREG32(CP_CPF_STALLED_STAT1)); 4806 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 4807 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 4808 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 4809 RREG32(CP_CPC_STALLED_STAT1)); 4810 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 4811 } 4812 4813 /** 4814 * cik_gpu_check_soft_reset - check which blocks are busy 4815 * 4816 * @rdev: radeon_device pointer 4817 * 4818 * Check which blocks are busy and return the relevant reset 4819 * mask to be used by cik_gpu_soft_reset(). 4820 * Returns a mask of the blocks to be reset. 4821 */ 4822 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 4823 { 4824 u32 reset_mask = 0; 4825 u32 tmp; 4826 4827 /* GRBM_STATUS */ 4828 tmp = RREG32(GRBM_STATUS); 4829 if (tmp & (PA_BUSY | SC_BUSY | 4830 BCI_BUSY | SX_BUSY | 4831 TA_BUSY | VGT_BUSY | 4832 DB_BUSY | CB_BUSY | 4833 GDS_BUSY | SPI_BUSY | 4834 IA_BUSY | IA_BUSY_NO_DMA)) 4835 reset_mask |= RADEON_RESET_GFX; 4836 4837 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 4838 reset_mask |= RADEON_RESET_CP; 4839 4840 /* GRBM_STATUS2 */ 4841 tmp = RREG32(GRBM_STATUS2); 4842 if (tmp & RLC_BUSY) 4843 reset_mask |= RADEON_RESET_RLC; 4844 4845 /* SDMA0_STATUS_REG */ 4846 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 4847 if (!(tmp & SDMA_IDLE)) 4848 reset_mask |= RADEON_RESET_DMA; 4849 4850 /* SDMA1_STATUS_REG */ 4851 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 4852 if (!(tmp & SDMA_IDLE)) 4853 reset_mask |= RADEON_RESET_DMA1; 4854 4855 /* SRBM_STATUS2 */ 4856 tmp = RREG32(SRBM_STATUS2); 4857 if (tmp & SDMA_BUSY) 4858 reset_mask |= RADEON_RESET_DMA; 4859 4860 if (tmp & SDMA1_BUSY) 4861 reset_mask |= RADEON_RESET_DMA1; 4862 4863 /* SRBM_STATUS */ 4864 tmp = RREG32(SRBM_STATUS); 4865 4866 if (tmp & IH_BUSY) 4867 reset_mask |= RADEON_RESET_IH; 4868 4869 if (tmp & SEM_BUSY) 4870 reset_mask |= RADEON_RESET_SEM; 4871 4872 if (tmp & GRBM_RQ_PENDING) 4873 reset_mask |= RADEON_RESET_GRBM; 4874 4875 if (tmp & VMC_BUSY) 4876 reset_mask |= RADEON_RESET_VMC; 4877 4878 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 4879 MCC_BUSY | MCD_BUSY)) 4880 reset_mask |= RADEON_RESET_MC; 4881 4882 if (evergreen_is_display_hung(rdev)) 4883 reset_mask |= RADEON_RESET_DISPLAY; 4884 4885 /* Skip MC reset as it's mostly likely not hung, just busy */ 4886 if (reset_mask & RADEON_RESET_MC) { 4887 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 4888 reset_mask &= ~RADEON_RESET_MC; 4889 } 4890 4891 return reset_mask; 4892 } 4893 4894 /** 4895 * cik_gpu_soft_reset - soft reset GPU 4896 * 4897 * @rdev: radeon_device pointer 4898 * @reset_mask: mask of which blocks to reset 4899 * 4900 * Soft reset the blocks specified in @reset_mask. 4901 */ 4902 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 4903 { 4904 struct evergreen_mc_save save; 4905 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4906 u32 tmp; 4907 4908 if (reset_mask == 0) 4909 return; 4910 4911 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 4912 4913 cik_print_gpu_status_regs(rdev); 4914 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 4915 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 4916 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 4917 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 4918 4919 /* disable CG/PG */ 4920 cik_fini_pg(rdev); 4921 cik_fini_cg(rdev); 4922 4923 /* stop the rlc */ 4924 cik_rlc_stop(rdev); 4925 4926 /* Disable GFX parsing/prefetching */ 4927 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 4928 4929 /* Disable MEC parsing/prefetching */ 4930 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 4931 4932 if (reset_mask & RADEON_RESET_DMA) { 4933 /* sdma0 */ 4934 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 4935 tmp |= SDMA_HALT; 4936 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4937 } 4938 if (reset_mask & RADEON_RESET_DMA1) { 4939 /* sdma1 */ 4940 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 4941 tmp |= SDMA_HALT; 4942 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4943 } 4944 4945 evergreen_mc_stop(rdev, &save); 4946 if (evergreen_mc_wait_for_idle(rdev)) { 4947 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4948 } 4949 4950 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 4951 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 4952 4953 if (reset_mask & RADEON_RESET_CP) { 4954 grbm_soft_reset |= SOFT_RESET_CP; 4955 4956 srbm_soft_reset |= SOFT_RESET_GRBM; 4957 } 4958 4959 if (reset_mask & RADEON_RESET_DMA) 4960 srbm_soft_reset |= SOFT_RESET_SDMA; 4961 4962 if (reset_mask & RADEON_RESET_DMA1) 4963 srbm_soft_reset |= SOFT_RESET_SDMA1; 4964 4965 if (reset_mask & RADEON_RESET_DISPLAY) 4966 srbm_soft_reset |= SOFT_RESET_DC; 4967 4968 if (reset_mask & RADEON_RESET_RLC) 4969 grbm_soft_reset |= SOFT_RESET_RLC; 4970 4971 if (reset_mask & RADEON_RESET_SEM) 4972 srbm_soft_reset |= SOFT_RESET_SEM; 4973 4974 if (reset_mask & RADEON_RESET_IH) 4975 srbm_soft_reset |= SOFT_RESET_IH; 4976 4977 if (reset_mask & RADEON_RESET_GRBM) 4978 srbm_soft_reset |= SOFT_RESET_GRBM; 4979 4980 if (reset_mask & RADEON_RESET_VMC) 4981 srbm_soft_reset |= SOFT_RESET_VMC; 4982 4983 if (!(rdev->flags & RADEON_IS_IGP)) { 4984 if (reset_mask & RADEON_RESET_MC) 4985 srbm_soft_reset |= SOFT_RESET_MC; 4986 } 4987 4988 if (grbm_soft_reset) { 4989 tmp = RREG32(GRBM_SOFT_RESET); 4990 tmp |= grbm_soft_reset; 4991 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4992 WREG32(GRBM_SOFT_RESET, tmp); 4993 tmp = RREG32(GRBM_SOFT_RESET); 4994 4995 udelay(50); 4996 4997 tmp &= ~grbm_soft_reset; 4998 WREG32(GRBM_SOFT_RESET, tmp); 4999 tmp = RREG32(GRBM_SOFT_RESET); 5000 } 5001 5002 if (srbm_soft_reset) { 5003 tmp = RREG32(SRBM_SOFT_RESET); 5004 tmp |= srbm_soft_reset; 5005 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5006 WREG32(SRBM_SOFT_RESET, tmp); 5007 tmp = RREG32(SRBM_SOFT_RESET); 5008 5009 udelay(50); 5010 5011 tmp &= ~srbm_soft_reset; 5012 WREG32(SRBM_SOFT_RESET, tmp); 5013 tmp = RREG32(SRBM_SOFT_RESET); 5014 } 5015 5016 /* Wait a little for things to settle down */ 5017 udelay(50); 5018 5019 evergreen_mc_resume(rdev, &save); 5020 udelay(50); 5021 5022 cik_print_gpu_status_regs(rdev); 5023 } 5024 5025 struct kv_reset_save_regs { 5026 u32 gmcon_reng_execute; 5027 u32 gmcon_misc; 5028 u32 gmcon_misc3; 5029 }; 5030 5031 static void kv_save_regs_for_reset(struct radeon_device *rdev, 5032 struct kv_reset_save_regs *save) 5033 { 5034 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE); 5035 save->gmcon_misc = RREG32(GMCON_MISC); 5036 save->gmcon_misc3 = RREG32(GMCON_MISC3); 5037 5038 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP); 5039 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE | 5040 STCTRL_STUTTER_EN)); 5041 } 5042 5043 static void kv_restore_regs_for_reset(struct radeon_device *rdev, 5044 struct kv_reset_save_regs *save) 5045 { 5046 int i; 5047 5048 WREG32(GMCON_PGFSM_WRITE, 0); 5049 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff); 5050 5051 for (i = 0; i < 5; i++) 5052 WREG32(GMCON_PGFSM_WRITE, 0); 5053 5054 WREG32(GMCON_PGFSM_WRITE, 0); 5055 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff); 5056 5057 for (i = 0; i < 5; i++) 5058 WREG32(GMCON_PGFSM_WRITE, 0); 5059 5060 WREG32(GMCON_PGFSM_WRITE, 0x210000); 5061 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff); 5062 5063 for (i = 0; i < 5; i++) 5064 WREG32(GMCON_PGFSM_WRITE, 0); 5065 5066 WREG32(GMCON_PGFSM_WRITE, 0x21003); 5067 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff); 5068 5069 for (i = 0; i < 5; i++) 5070 WREG32(GMCON_PGFSM_WRITE, 0); 5071 5072 WREG32(GMCON_PGFSM_WRITE, 0x2b00); 5073 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff); 5074 5075 for (i = 0; i < 5; i++) 5076 WREG32(GMCON_PGFSM_WRITE, 0); 5077 5078 WREG32(GMCON_PGFSM_WRITE, 0); 5079 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff); 5080 5081 for (i = 0; i < 5; i++) 5082 WREG32(GMCON_PGFSM_WRITE, 0); 5083 5084 WREG32(GMCON_PGFSM_WRITE, 0x420000); 5085 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff); 5086 5087 for (i = 0; i < 5; i++) 5088 WREG32(GMCON_PGFSM_WRITE, 0); 5089 5090 WREG32(GMCON_PGFSM_WRITE, 0x120202); 5091 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff); 5092 5093 for (i = 0; i < 5; i++) 5094 WREG32(GMCON_PGFSM_WRITE, 0); 5095 5096 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36); 5097 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff); 5098 5099 for (i = 0; i < 5; i++) 5100 WREG32(GMCON_PGFSM_WRITE, 0); 5101 5102 WREG32(GMCON_PGFSM_WRITE, 0x373f3e); 5103 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff); 5104 5105 for (i = 0; i < 5; i++) 5106 WREG32(GMCON_PGFSM_WRITE, 0); 5107 5108 WREG32(GMCON_PGFSM_WRITE, 0x3e1332); 5109 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff); 5110 5111 WREG32(GMCON_MISC3, save->gmcon_misc3); 5112 WREG32(GMCON_MISC, save->gmcon_misc); 5113 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute); 5114 } 5115 5116 static void cik_gpu_pci_config_reset(struct radeon_device *rdev) 5117 { 5118 struct evergreen_mc_save save; 5119 struct kv_reset_save_regs kv_save = { 0 }; 5120 u32 tmp, i; 5121 5122 dev_info(rdev->dev, "GPU pci config reset\n"); 5123 5124 /* disable dpm? */ 5125 5126 /* disable cg/pg */ 5127 cik_fini_pg(rdev); 5128 cik_fini_cg(rdev); 5129 5130 /* Disable GFX parsing/prefetching */ 5131 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 5132 5133 /* Disable MEC parsing/prefetching */ 5134 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 5135 5136 /* sdma0 */ 5137 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 5138 tmp |= SDMA_HALT; 5139 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 5140 /* sdma1 */ 5141 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 5142 tmp |= SDMA_HALT; 5143 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 5144 /* XXX other engines? */ 5145 5146 /* halt the rlc, disable cp internal ints */ 5147 cik_rlc_stop(rdev); 5148 5149 udelay(50); 5150 5151 /* disable mem access */ 5152 evergreen_mc_stop(rdev, &save); 5153 if (evergreen_mc_wait_for_idle(rdev)) { 5154 dev_warn(rdev->dev, "Wait for MC idle timed out !\n"); 5155 } 5156 5157 if (rdev->flags & RADEON_IS_IGP) 5158 kv_save_regs_for_reset(rdev, &kv_save); 5159 5160 /* disable BM */ 5161 pci_disable_busmaster(rdev->pdev->dev); 5162 /* reset */ 5163 radeon_pci_config_reset(rdev); 5164 5165 udelay(100); 5166 5167 /* wait for asic to come out of reset */ 5168 for (i = 0; i < rdev->usec_timeout; i++) { 5169 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff) 5170 break; 5171 udelay(1); 5172 } 5173 5174 /* does asic init need to be run first??? */ 5175 if (rdev->flags & RADEON_IS_IGP) 5176 kv_restore_regs_for_reset(rdev, &kv_save); 5177 } 5178 5179 /** 5180 * cik_asic_reset - soft reset GPU 5181 * 5182 * @rdev: radeon_device pointer 5183 * 5184 * Look up which blocks are hung and attempt 5185 * to reset them. 5186 * Returns 0 for success. 5187 */ 5188 int cik_asic_reset(struct radeon_device *rdev) 5189 { 5190 u32 reset_mask; 5191 5192 reset_mask = cik_gpu_check_soft_reset(rdev); 5193 5194 if (reset_mask) 5195 r600_set_bios_scratch_engine_hung(rdev, true); 5196 5197 /* try soft reset */ 5198 cik_gpu_soft_reset(rdev, reset_mask); 5199 5200 reset_mask = cik_gpu_check_soft_reset(rdev); 5201 5202 /* try pci config reset */ 5203 if (reset_mask && radeon_hard_reset) 5204 cik_gpu_pci_config_reset(rdev); 5205 5206 reset_mask = cik_gpu_check_soft_reset(rdev); 5207 5208 if (!reset_mask) 5209 r600_set_bios_scratch_engine_hung(rdev, false); 5210 5211 return 0; 5212 } 5213 5214 /** 5215 * cik_gfx_is_lockup - check if the 3D engine is locked up 5216 * 5217 * @rdev: radeon_device pointer 5218 * @ring: radeon_ring structure holding ring information 5219 * 5220 * Check if the 3D engine is locked up (CIK). 5221 * Returns true if the engine is locked, false if not. 5222 */ 5223 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 5224 { 5225 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 5226 5227 if (!(reset_mask & (RADEON_RESET_GFX | 5228 RADEON_RESET_COMPUTE | 5229 RADEON_RESET_CP))) { 5230 radeon_ring_lockup_update(rdev, ring); 5231 return false; 5232 } 5233 return radeon_ring_test_lockup(rdev, ring); 5234 } 5235 5236 /* MC */ 5237 /** 5238 * cik_mc_program - program the GPU memory controller 5239 * 5240 * @rdev: radeon_device pointer 5241 * 5242 * Set the location of vram, gart, and AGP in the GPU's 5243 * physical address space (CIK). 5244 */ 5245 static void cik_mc_program(struct radeon_device *rdev) 5246 { 5247 struct evergreen_mc_save save; 5248 u32 tmp; 5249 int i, j; 5250 5251 /* Initialize HDP */ 5252 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 5253 WREG32((0x2c14 + j), 0x00000000); 5254 WREG32((0x2c18 + j), 0x00000000); 5255 WREG32((0x2c1c + j), 0x00000000); 5256 WREG32((0x2c20 + j), 0x00000000); 5257 WREG32((0x2c24 + j), 0x00000000); 5258 } 5259 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 5260 5261 evergreen_mc_stop(rdev, &save); 5262 if (radeon_mc_wait_for_idle(rdev)) { 5263 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 5264 } 5265 /* Lockout access through VGA aperture*/ 5266 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 5267 /* Update configuration */ 5268 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 5269 rdev->mc.vram_start >> 12); 5270 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 5271 rdev->mc.vram_end >> 12); 5272 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 5273 rdev->vram_scratch.gpu_addr >> 12); 5274 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 5275 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 5276 WREG32(MC_VM_FB_LOCATION, tmp); 5277 /* XXX double check these! */ 5278 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 5279 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 5280 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 5281 WREG32(MC_VM_AGP_BASE, 0); 5282 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 5283 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 5284 if (radeon_mc_wait_for_idle(rdev)) { 5285 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 5286 } 5287 evergreen_mc_resume(rdev, &save); 5288 /* we need to own VRAM, so turn off the VGA renderer here 5289 * to stop it overwriting our objects */ 5290 rv515_vga_render_disable(rdev); 5291 } 5292 5293 /** 5294 * cik_mc_init - initialize the memory controller driver params 5295 * 5296 * @rdev: radeon_device pointer 5297 * 5298 * Look up the amount of vram, vram width, and decide how to place 5299 * vram and gart within the GPU's physical address space (CIK). 5300 * Returns 0 for success. 5301 */ 5302 static int cik_mc_init(struct radeon_device *rdev) 5303 { 5304 u32 tmp; 5305 int chansize, numchan; 5306 5307 /* Get VRAM informations */ 5308 rdev->mc.vram_is_ddr = true; 5309 tmp = RREG32(MC_ARB_RAMCFG); 5310 if (tmp & CHANSIZE_MASK) { 5311 chansize = 64; 5312 } else { 5313 chansize = 32; 5314 } 5315 tmp = RREG32(MC_SHARED_CHMAP); 5316 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 5317 case 0: 5318 default: 5319 numchan = 1; 5320 break; 5321 case 1: 5322 numchan = 2; 5323 break; 5324 case 2: 5325 numchan = 4; 5326 break; 5327 case 3: 5328 numchan = 8; 5329 break; 5330 case 4: 5331 numchan = 3; 5332 break; 5333 case 5: 5334 numchan = 6; 5335 break; 5336 case 6: 5337 numchan = 10; 5338 break; 5339 case 7: 5340 numchan = 12; 5341 break; 5342 case 8: 5343 numchan = 16; 5344 break; 5345 } 5346 rdev->mc.vram_width = numchan * chansize; 5347 /* Could aper size report 0 ? */ 5348 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 5349 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 5350 /* size in MB on si */ 5351 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 5352 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL; 5353 rdev->mc.visible_vram_size = rdev->mc.aper_size; 5354 si_vram_gtt_location(rdev, &rdev->mc); 5355 radeon_update_bandwidth_info(rdev); 5356 5357 return 0; 5358 } 5359 5360 /* 5361 * GART 5362 * VMID 0 is the physical GPU addresses as used by the kernel. 5363 * VMIDs 1-15 are used for userspace clients and are handled 5364 * by the radeon vm/hsa code. 5365 */ 5366 /** 5367 * cik_pcie_gart_tlb_flush - gart tlb flush callback 5368 * 5369 * @rdev: radeon_device pointer 5370 * 5371 * Flush the TLB for the VMID 0 page table (CIK). 5372 */ 5373 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 5374 { 5375 /* flush hdp cache */ 5376 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 5377 5378 /* bits 0-15 are the VM contexts0-15 */ 5379 WREG32(VM_INVALIDATE_REQUEST, 0x1); 5380 } 5381 5382 /** 5383 * cik_pcie_gart_enable - gart enable 5384 * 5385 * @rdev: radeon_device pointer 5386 * 5387 * This sets up the TLBs, programs the page tables for VMID0, 5388 * sets up the hw for VMIDs 1-15 which are allocated on 5389 * demand, and sets up the global locations for the LDS, GDS, 5390 * and GPUVM for FSA64 clients (CIK). 5391 * Returns 0 for success, errors for failure. 5392 */ 5393 static int cik_pcie_gart_enable(struct radeon_device *rdev) 5394 { 5395 int r, i; 5396 5397 if (rdev->gart.robj == NULL) { 5398 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 5399 return -EINVAL; 5400 } 5401 r = radeon_gart_table_vram_pin(rdev); 5402 if (r) 5403 return r; 5404 /* Setup TLB control */ 5405 WREG32(MC_VM_MX_L1_TLB_CNTL, 5406 (0xA << 7) | 5407 ENABLE_L1_TLB | 5408 ENABLE_L1_FRAGMENT_PROCESSING | 5409 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 5410 ENABLE_ADVANCED_DRIVER_MODEL | 5411 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 5412 /* Setup L2 cache */ 5413 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 5414 ENABLE_L2_FRAGMENT_PROCESSING | 5415 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 5416 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 5417 EFFECTIVE_L2_QUEUE_SIZE(7) | 5418 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 5419 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 5420 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 5421 BANK_SELECT(4) | 5422 L2_CACHE_BIGK_FRAGMENT_SIZE(4)); 5423 /* setup context0 */ 5424 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 5425 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 5426 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 5427 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 5428 (u32)(rdev->dummy_page.addr >> 12)); 5429 WREG32(VM_CONTEXT0_CNTL2, 0); 5430 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 5431 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 5432 5433 WREG32(0x15D4, 0); 5434 WREG32(0x15D8, 0); 5435 WREG32(0x15DC, 0); 5436 5437 /* restore context1-15 */ 5438 /* set vm size, must be a multiple of 4 */ 5439 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 5440 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 5441 for (i = 1; i < 16; i++) { 5442 if (i < 8) 5443 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 5444 rdev->vm_manager.saved_table_addr[i]); 5445 else 5446 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 5447 rdev->vm_manager.saved_table_addr[i]); 5448 } 5449 5450 /* enable context1-15 */ 5451 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 5452 (u32)(rdev->dummy_page.addr >> 12)); 5453 WREG32(VM_CONTEXT1_CNTL2, 4); 5454 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 5455 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) | 5456 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 5457 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 5458 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 5459 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 5460 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 5461 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 5462 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 5463 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 5464 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 5465 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 5466 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 5467 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 5468 5469 if (rdev->family == CHIP_KAVERI) { 5470 u32 tmp = RREG32(CHUB_CONTROL); 5471 tmp &= ~BYPASS_VM; 5472 WREG32(CHUB_CONTROL, tmp); 5473 } 5474 5475 /* XXX SH_MEM regs */ 5476 /* where to put LDS, scratch, GPUVM in FSA64 space */ 5477 spin_lock(&rdev->srbm_mutex); 5478 for (i = 0; i < 16; i++) { 5479 cik_srbm_select(rdev, 0, 0, 0, i); 5480 /* CP and shaders */ 5481 WREG32(SH_MEM_CONFIG, 0); 5482 WREG32(SH_MEM_APE1_BASE, 1); 5483 WREG32(SH_MEM_APE1_LIMIT, 0); 5484 WREG32(SH_MEM_BASES, 0); 5485 /* SDMA GFX */ 5486 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 5487 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 5488 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 5489 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 5490 /* XXX SDMA RLC - todo */ 5491 } 5492 cik_srbm_select(rdev, 0, 0, 0, 0); 5493 spin_unlock(&rdev->srbm_mutex); 5494 5495 cik_pcie_gart_tlb_flush(rdev); 5496 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 5497 (unsigned)(rdev->mc.gtt_size >> 20), 5498 (unsigned long long)rdev->gart.table_addr); 5499 rdev->gart.ready = true; 5500 return 0; 5501 } 5502 5503 /** 5504 * cik_pcie_gart_disable - gart disable 5505 * 5506 * @rdev: radeon_device pointer 5507 * 5508 * This disables all VM page table (CIK). 5509 */ 5510 static void cik_pcie_gart_disable(struct radeon_device *rdev) 5511 { 5512 unsigned i; 5513 5514 for (i = 1; i < 16; ++i) { 5515 uint32_t reg; 5516 if (i < 8) 5517 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2); 5518 else 5519 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2); 5520 rdev->vm_manager.saved_table_addr[i] = RREG32(reg); 5521 } 5522 5523 /* Disable all tables */ 5524 WREG32(VM_CONTEXT0_CNTL, 0); 5525 WREG32(VM_CONTEXT1_CNTL, 0); 5526 /* Setup TLB control */ 5527 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 5528 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 5529 /* Setup L2 cache */ 5530 WREG32(VM_L2_CNTL, 5531 ENABLE_L2_FRAGMENT_PROCESSING | 5532 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 5533 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 5534 EFFECTIVE_L2_QUEUE_SIZE(7) | 5535 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 5536 WREG32(VM_L2_CNTL2, 0); 5537 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 5538 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 5539 radeon_gart_table_vram_unpin(rdev); 5540 } 5541 5542 /** 5543 * cik_pcie_gart_fini - vm fini callback 5544 * 5545 * @rdev: radeon_device pointer 5546 * 5547 * Tears down the driver GART/VM setup (CIK). 5548 */ 5549 static void cik_pcie_gart_fini(struct radeon_device *rdev) 5550 { 5551 cik_pcie_gart_disable(rdev); 5552 radeon_gart_table_vram_free(rdev); 5553 radeon_gart_fini(rdev); 5554 } 5555 5556 /* vm parser */ 5557 /** 5558 * cik_ib_parse - vm ib_parse callback 5559 * 5560 * @rdev: radeon_device pointer 5561 * @ib: indirect buffer pointer 5562 * 5563 * CIK uses hw IB checking so this is a nop (CIK). 5564 */ 5565 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 5566 { 5567 return 0; 5568 } 5569 5570 /* 5571 * vm 5572 * VMID 0 is the physical GPU addresses as used by the kernel. 5573 * VMIDs 1-15 are used for userspace clients and are handled 5574 * by the radeon vm/hsa code. 5575 */ 5576 /** 5577 * cik_vm_init - cik vm init callback 5578 * 5579 * @rdev: radeon_device pointer 5580 * 5581 * Inits cik specific vm parameters (number of VMs, base of vram for 5582 * VMIDs 1-15) (CIK). 5583 * Returns 0 for success. 5584 */ 5585 int cik_vm_init(struct radeon_device *rdev) 5586 { 5587 /* number of VMs */ 5588 rdev->vm_manager.nvm = 16; 5589 /* base offset of vram pages */ 5590 if (rdev->flags & RADEON_IS_IGP) { 5591 u64 tmp = RREG32(MC_VM_FB_OFFSET); 5592 tmp <<= 22; 5593 rdev->vm_manager.vram_base_offset = tmp; 5594 } else 5595 rdev->vm_manager.vram_base_offset = 0; 5596 5597 return 0; 5598 } 5599 5600 /** 5601 * cik_vm_fini - cik vm fini callback 5602 * 5603 * @rdev: radeon_device pointer 5604 * 5605 * Tear down any asic specific VM setup (CIK). 5606 */ 5607 void cik_vm_fini(struct radeon_device *rdev) 5608 { 5609 } 5610 5611 /** 5612 * cik_vm_decode_fault - print human readable fault info 5613 * 5614 * @rdev: radeon_device pointer 5615 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 5616 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 5617 * 5618 * Print human readable fault information (CIK). 5619 */ 5620 static void cik_vm_decode_fault(struct radeon_device *rdev, 5621 u32 status, u32 addr, u32 mc_client) 5622 { 5623 u32 mc_id; 5624 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 5625 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 5626 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 5627 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 5628 5629 if (rdev->family == CHIP_HAWAII) 5630 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 5631 else 5632 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 5633 5634 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 5635 protections, vmid, addr, 5636 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 5637 block, mc_client, mc_id); 5638 } 5639 5640 /** 5641 * cik_vm_flush - cik vm flush using the CP 5642 * 5643 * @rdev: radeon_device pointer 5644 * 5645 * Update the page table base and flush the VM TLB 5646 * using the CP (CIK). 5647 */ 5648 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 5649 { 5650 struct radeon_ring *ring = &rdev->ring[ridx]; 5651 int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); 5652 5653 if (vm == NULL) 5654 return; 5655 5656 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5657 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5658 WRITE_DATA_DST_SEL(0))); 5659 if (vm->id < 8) { 5660 radeon_ring_write(ring, 5661 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 5662 } else { 5663 radeon_ring_write(ring, 5664 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 5665 } 5666 radeon_ring_write(ring, 0); 5667 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 5668 5669 /* update SH_MEM_* regs */ 5670 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5671 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5672 WRITE_DATA_DST_SEL(0))); 5673 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 5674 radeon_ring_write(ring, 0); 5675 radeon_ring_write(ring, VMID(vm->id)); 5676 5677 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 5678 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5679 WRITE_DATA_DST_SEL(0))); 5680 radeon_ring_write(ring, SH_MEM_BASES >> 2); 5681 radeon_ring_write(ring, 0); 5682 5683 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 5684 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 5685 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 5686 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 5687 5688 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5689 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5690 WRITE_DATA_DST_SEL(0))); 5691 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 5692 radeon_ring_write(ring, 0); 5693 radeon_ring_write(ring, VMID(0)); 5694 5695 /* HDP flush */ 5696 cik_hdp_flush_cp_ring_emit(rdev, ridx); 5697 5698 /* bits 0-15 are the VM contexts0-15 */ 5699 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5700 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5701 WRITE_DATA_DST_SEL(0))); 5702 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 5703 radeon_ring_write(ring, 0); 5704 radeon_ring_write(ring, 1 << vm->id); 5705 5706 /* compute doesn't have PFP */ 5707 if (usepfp) { 5708 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5709 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5710 radeon_ring_write(ring, 0x0); 5711 } 5712 } 5713 5714 /* 5715 * RLC 5716 * The RLC is a multi-purpose microengine that handles a 5717 * variety of functions, the most important of which is 5718 * the interrupt controller. 5719 */ 5720 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, 5721 bool enable) 5722 { 5723 u32 tmp = RREG32(CP_INT_CNTL_RING0); 5724 5725 if (enable) 5726 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5727 else 5728 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 5729 WREG32(CP_INT_CNTL_RING0, tmp); 5730 } 5731 5732 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable) 5733 { 5734 u32 tmp; 5735 5736 tmp = RREG32(RLC_LB_CNTL); 5737 if (enable) 5738 tmp |= LOAD_BALANCE_ENABLE; 5739 else 5740 tmp &= ~LOAD_BALANCE_ENABLE; 5741 WREG32(RLC_LB_CNTL, tmp); 5742 } 5743 5744 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) 5745 { 5746 u32 i, j, k; 5747 u32 mask; 5748 5749 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 5750 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 5751 cik_select_se_sh(rdev, i, j); 5752 for (k = 0; k < rdev->usec_timeout; k++) { 5753 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 5754 break; 5755 udelay(1); 5756 } 5757 } 5758 } 5759 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5760 5761 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 5762 for (k = 0; k < rdev->usec_timeout; k++) { 5763 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 5764 break; 5765 udelay(1); 5766 } 5767 } 5768 5769 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc) 5770 { 5771 u32 tmp; 5772 5773 tmp = RREG32(RLC_CNTL); 5774 if (tmp != rlc) 5775 WREG32(RLC_CNTL, rlc); 5776 } 5777 5778 static u32 cik_halt_rlc(struct radeon_device *rdev) 5779 { 5780 u32 data, orig; 5781 5782 orig = data = RREG32(RLC_CNTL); 5783 5784 if (data & RLC_ENABLE) { 5785 u32 i; 5786 5787 data &= ~RLC_ENABLE; 5788 WREG32(RLC_CNTL, data); 5789 5790 for (i = 0; i < rdev->usec_timeout; i++) { 5791 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0) 5792 break; 5793 udelay(1); 5794 } 5795 5796 cik_wait_for_rlc_serdes(rdev); 5797 } 5798 5799 return orig; 5800 } 5801 5802 void cik_enter_rlc_safe_mode(struct radeon_device *rdev) 5803 { 5804 u32 tmp, i, mask; 5805 5806 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE); 5807 WREG32(RLC_GPR_REG2, tmp); 5808 5809 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS; 5810 for (i = 0; i < rdev->usec_timeout; i++) { 5811 if ((RREG32(RLC_GPM_STAT) & mask) == mask) 5812 break; 5813 udelay(1); 5814 } 5815 5816 for (i = 0; i < rdev->usec_timeout; i++) { 5817 if ((RREG32(RLC_GPR_REG2) & REQ) == 0) 5818 break; 5819 udelay(1); 5820 } 5821 } 5822 5823 void cik_exit_rlc_safe_mode(struct radeon_device *rdev) 5824 { 5825 u32 tmp; 5826 5827 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE); 5828 WREG32(RLC_GPR_REG2, tmp); 5829 } 5830 5831 /** 5832 * cik_rlc_stop - stop the RLC ME 5833 * 5834 * @rdev: radeon_device pointer 5835 * 5836 * Halt the RLC ME (MicroEngine) (CIK). 5837 */ 5838 static void cik_rlc_stop(struct radeon_device *rdev) 5839 { 5840 WREG32(RLC_CNTL, 0); 5841 5842 cik_enable_gui_idle_interrupt(rdev, false); 5843 5844 cik_wait_for_rlc_serdes(rdev); 5845 } 5846 5847 /** 5848 * cik_rlc_start - start the RLC ME 5849 * 5850 * @rdev: radeon_device pointer 5851 * 5852 * Unhalt the RLC ME (MicroEngine) (CIK). 5853 */ 5854 static void cik_rlc_start(struct radeon_device *rdev) 5855 { 5856 WREG32(RLC_CNTL, RLC_ENABLE); 5857 5858 cik_enable_gui_idle_interrupt(rdev, true); 5859 5860 udelay(50); 5861 } 5862 5863 /** 5864 * cik_rlc_resume - setup the RLC hw 5865 * 5866 * @rdev: radeon_device pointer 5867 * 5868 * Initialize the RLC registers, load the ucode, 5869 * and start the RLC (CIK). 5870 * Returns 0 for success, -EINVAL if the ucode is not available. 5871 */ 5872 static int cik_rlc_resume(struct radeon_device *rdev) 5873 { 5874 u32 i, size, tmp; 5875 const __be32 *fw_data; 5876 5877 if (!rdev->rlc_fw) 5878 return -EINVAL; 5879 5880 switch (rdev->family) { 5881 case CHIP_BONAIRE: 5882 case CHIP_HAWAII: 5883 default: 5884 size = BONAIRE_RLC_UCODE_SIZE; 5885 break; 5886 case CHIP_KAVERI: 5887 size = KV_RLC_UCODE_SIZE; 5888 break; 5889 case CHIP_KABINI: 5890 size = KB_RLC_UCODE_SIZE; 5891 break; 5892 case CHIP_MULLINS: 5893 size = ML_RLC_UCODE_SIZE; 5894 break; 5895 } 5896 5897 cik_rlc_stop(rdev); 5898 5899 /* disable CG */ 5900 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 5901 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 5902 5903 si_rlc_reset(rdev); 5904 5905 cik_init_pg(rdev); 5906 5907 cik_init_cg(rdev); 5908 5909 WREG32(RLC_LB_CNTR_INIT, 0); 5910 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 5911 5912 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5913 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 5914 WREG32(RLC_LB_PARAMS, 0x00600408); 5915 WREG32(RLC_LB_CNTL, 0x80000004); 5916 5917 WREG32(RLC_MC_CNTL, 0); 5918 WREG32(RLC_UCODE_CNTL, 0); 5919 5920 fw_data = (const __be32 *)rdev->rlc_fw->data; 5921 WREG32(RLC_GPM_UCODE_ADDR, 0); 5922 for (i = 0; i < size; i++) 5923 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 5924 WREG32(RLC_GPM_UCODE_ADDR, 0); 5925 5926 /* XXX - find out what chips support lbpw */ 5927 cik_enable_lbpw(rdev, false); 5928 5929 if (rdev->family == CHIP_BONAIRE) 5930 WREG32(RLC_DRIVER_DMA_STATUS, 0); 5931 5932 cik_rlc_start(rdev); 5933 5934 return 0; 5935 } 5936 5937 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) 5938 { 5939 u32 data, orig, tmp, tmp2; 5940 5941 orig = data = RREG32(RLC_CGCG_CGLS_CTRL); 5942 5943 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) { 5944 cik_enable_gui_idle_interrupt(rdev, true); 5945 5946 tmp = cik_halt_rlc(rdev); 5947 5948 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5949 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5950 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5951 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; 5952 WREG32(RLC_SERDES_WR_CTRL, tmp2); 5953 5954 cik_update_rlc(rdev, tmp); 5955 5956 data |= CGCG_EN | CGLS_EN; 5957 } else { 5958 cik_enable_gui_idle_interrupt(rdev, false); 5959 5960 RREG32(CB_CGTT_SCLK_CTRL); 5961 RREG32(CB_CGTT_SCLK_CTRL); 5962 RREG32(CB_CGTT_SCLK_CTRL); 5963 RREG32(CB_CGTT_SCLK_CTRL); 5964 5965 data &= ~(CGCG_EN | CGLS_EN); 5966 } 5967 5968 if (orig != data) 5969 WREG32(RLC_CGCG_CGLS_CTRL, data); 5970 5971 } 5972 5973 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) 5974 { 5975 u32 data, orig, tmp = 0; 5976 5977 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) { 5978 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) { 5979 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) { 5980 orig = data = RREG32(CP_MEM_SLP_CNTL); 5981 data |= CP_MEM_LS_EN; 5982 if (orig != data) 5983 WREG32(CP_MEM_SLP_CNTL, data); 5984 } 5985 } 5986 5987 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 5988 data &= 0xfffffffd; 5989 if (orig != data) 5990 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 5991 5992 tmp = cik_halt_rlc(rdev); 5993 5994 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 5995 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5996 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5997 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; 5998 WREG32(RLC_SERDES_WR_CTRL, data); 5999 6000 cik_update_rlc(rdev, tmp); 6001 6002 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) { 6003 orig = data = RREG32(CGTS_SM_CTRL_REG); 6004 data &= ~SM_MODE_MASK; 6005 data |= SM_MODE(0x2); 6006 data |= SM_MODE_ENABLE; 6007 data &= ~CGTS_OVERRIDE; 6008 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) && 6009 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS)) 6010 data &= ~CGTS_LS_OVERRIDE; 6011 data &= ~ON_MONITOR_ADD_MASK; 6012 data |= ON_MONITOR_ADD_EN; 6013 data |= ON_MONITOR_ADD(0x96); 6014 if (orig != data) 6015 WREG32(CGTS_SM_CTRL_REG, data); 6016 } 6017 } else { 6018 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); 6019 data |= 0x00000002; 6020 if (orig != data) 6021 WREG32(RLC_CGTT_MGCG_OVERRIDE, data); 6022 6023 data = RREG32(RLC_MEM_SLP_CNTL); 6024 if (data & RLC_MEM_LS_EN) { 6025 data &= ~RLC_MEM_LS_EN; 6026 WREG32(RLC_MEM_SLP_CNTL, data); 6027 } 6028 6029 data = RREG32(CP_MEM_SLP_CNTL); 6030 if (data & CP_MEM_LS_EN) { 6031 data &= ~CP_MEM_LS_EN; 6032 WREG32(CP_MEM_SLP_CNTL, data); 6033 } 6034 6035 orig = data = RREG32(CGTS_SM_CTRL_REG); 6036 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE; 6037 if (orig != data) 6038 WREG32(CGTS_SM_CTRL_REG, data); 6039 6040 tmp = cik_halt_rlc(rdev); 6041 6042 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6043 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 6044 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 6045 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; 6046 WREG32(RLC_SERDES_WR_CTRL, data); 6047 6048 cik_update_rlc(rdev, tmp); 6049 } 6050 } 6051 6052 static const u32 mc_cg_registers[] = 6053 { 6054 MC_HUB_MISC_HUB_CG, 6055 MC_HUB_MISC_SIP_CG, 6056 MC_HUB_MISC_VM_CG, 6057 MC_XPB_CLK_GAT, 6058 ATC_MISC_CG, 6059 MC_CITF_MISC_WR_CG, 6060 MC_CITF_MISC_RD_CG, 6061 MC_CITF_MISC_VM_CG, 6062 VM_L2_CG, 6063 }; 6064 6065 static void cik_enable_mc_ls(struct radeon_device *rdev, 6066 bool enable) 6067 { 6068 int i; 6069 u32 orig, data; 6070 6071 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 6072 orig = data = RREG32(mc_cg_registers[i]); 6073 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS)) 6074 data |= MC_LS_ENABLE; 6075 else 6076 data &= ~MC_LS_ENABLE; 6077 if (data != orig) 6078 WREG32(mc_cg_registers[i], data); 6079 } 6080 } 6081 6082 static void cik_enable_mc_mgcg(struct radeon_device *rdev, 6083 bool enable) 6084 { 6085 int i; 6086 u32 orig, data; 6087 6088 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { 6089 orig = data = RREG32(mc_cg_registers[i]); 6090 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG)) 6091 data |= MC_CG_ENABLE; 6092 else 6093 data &= ~MC_CG_ENABLE; 6094 if (data != orig) 6095 WREG32(mc_cg_registers[i], data); 6096 } 6097 } 6098 6099 static void cik_enable_sdma_mgcg(struct radeon_device *rdev, 6100 bool enable) 6101 { 6102 u32 orig, data; 6103 6104 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) { 6105 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100); 6106 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100); 6107 } else { 6108 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET); 6109 data |= 0xff000000; 6110 if (data != orig) 6111 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data); 6112 6113 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET); 6114 data |= 0xff000000; 6115 if (data != orig) 6116 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data); 6117 } 6118 } 6119 6120 static void cik_enable_sdma_mgls(struct radeon_device *rdev, 6121 bool enable) 6122 { 6123 u32 orig, data; 6124 6125 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) { 6126 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 6127 data |= 0x100; 6128 if (orig != data) 6129 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 6130 6131 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 6132 data |= 0x100; 6133 if (orig != data) 6134 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 6135 } else { 6136 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET); 6137 data &= ~0x100; 6138 if (orig != data) 6139 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data); 6140 6141 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET); 6142 data &= ~0x100; 6143 if (orig != data) 6144 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data); 6145 } 6146 } 6147 6148 static void cik_enable_uvd_mgcg(struct radeon_device *rdev, 6149 bool enable) 6150 { 6151 u32 orig, data; 6152 6153 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) { 6154 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 6155 data = 0xfff; 6156 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 6157 6158 orig = data = RREG32(UVD_CGC_CTRL); 6159 data |= DCM; 6160 if (orig != data) 6161 WREG32(UVD_CGC_CTRL, data); 6162 } else { 6163 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); 6164 data &= ~0xfff; 6165 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data); 6166 6167 orig = data = RREG32(UVD_CGC_CTRL); 6168 data &= ~DCM; 6169 if (orig != data) 6170 WREG32(UVD_CGC_CTRL, data); 6171 } 6172 } 6173 6174 static void cik_enable_bif_mgls(struct radeon_device *rdev, 6175 bool enable) 6176 { 6177 u32 orig, data; 6178 6179 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 6180 6181 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS)) 6182 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | 6183 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN; 6184 else 6185 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN | 6186 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN); 6187 6188 if (orig != data) 6189 WREG32_PCIE_PORT(PCIE_CNTL2, data); 6190 } 6191 6192 static void cik_enable_hdp_mgcg(struct radeon_device *rdev, 6193 bool enable) 6194 { 6195 u32 orig, data; 6196 6197 orig = data = RREG32(HDP_HOST_PATH_CNTL); 6198 6199 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG)) 6200 data &= ~CLOCK_GATING_DIS; 6201 else 6202 data |= CLOCK_GATING_DIS; 6203 6204 if (orig != data) 6205 WREG32(HDP_HOST_PATH_CNTL, data); 6206 } 6207 6208 static void cik_enable_hdp_ls(struct radeon_device *rdev, 6209 bool enable) 6210 { 6211 u32 orig, data; 6212 6213 orig = data = RREG32(HDP_MEM_POWER_LS); 6214 6215 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS)) 6216 data |= HDP_LS_ENABLE; 6217 else 6218 data &= ~HDP_LS_ENABLE; 6219 6220 if (orig != data) 6221 WREG32(HDP_MEM_POWER_LS, data); 6222 } 6223 6224 void cik_update_cg(struct radeon_device *rdev, 6225 u32 block, bool enable) 6226 { 6227 6228 if (block & RADEON_CG_BLOCK_GFX) { 6229 cik_enable_gui_idle_interrupt(rdev, false); 6230 /* order matters! */ 6231 if (enable) { 6232 cik_enable_mgcg(rdev, true); 6233 cik_enable_cgcg(rdev, true); 6234 } else { 6235 cik_enable_cgcg(rdev, false); 6236 cik_enable_mgcg(rdev, false); 6237 } 6238 cik_enable_gui_idle_interrupt(rdev, true); 6239 } 6240 6241 if (block & RADEON_CG_BLOCK_MC) { 6242 if (!(rdev->flags & RADEON_IS_IGP)) { 6243 cik_enable_mc_mgcg(rdev, enable); 6244 cik_enable_mc_ls(rdev, enable); 6245 } 6246 } 6247 6248 if (block & RADEON_CG_BLOCK_SDMA) { 6249 cik_enable_sdma_mgcg(rdev, enable); 6250 cik_enable_sdma_mgls(rdev, enable); 6251 } 6252 6253 if (block & RADEON_CG_BLOCK_BIF) { 6254 cik_enable_bif_mgls(rdev, enable); 6255 } 6256 6257 if (block & RADEON_CG_BLOCK_UVD) { 6258 if (rdev->has_uvd) 6259 cik_enable_uvd_mgcg(rdev, enable); 6260 } 6261 6262 if (block & RADEON_CG_BLOCK_HDP) { 6263 cik_enable_hdp_mgcg(rdev, enable); 6264 cik_enable_hdp_ls(rdev, enable); 6265 } 6266 6267 if (block & RADEON_CG_BLOCK_VCE) { 6268 vce_v2_0_enable_mgcg(rdev, enable); 6269 } 6270 } 6271 6272 static void cik_init_cg(struct radeon_device *rdev) 6273 { 6274 6275 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true); 6276 6277 if (rdev->has_uvd) 6278 si_init_uvd_internal_cg(rdev); 6279 6280 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 6281 RADEON_CG_BLOCK_SDMA | 6282 RADEON_CG_BLOCK_BIF | 6283 RADEON_CG_BLOCK_UVD | 6284 RADEON_CG_BLOCK_HDP), true); 6285 } 6286 6287 static void cik_fini_cg(struct radeon_device *rdev) 6288 { 6289 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC | 6290 RADEON_CG_BLOCK_SDMA | 6291 RADEON_CG_BLOCK_BIF | 6292 RADEON_CG_BLOCK_UVD | 6293 RADEON_CG_BLOCK_HDP), false); 6294 6295 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); 6296 } 6297 6298 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev, 6299 bool enable) 6300 { 6301 u32 data, orig; 6302 6303 orig = data = RREG32(RLC_PG_CNTL); 6304 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 6305 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 6306 else 6307 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE; 6308 if (orig != data) 6309 WREG32(RLC_PG_CNTL, data); 6310 } 6311 6312 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev, 6313 bool enable) 6314 { 6315 u32 data, orig; 6316 6317 orig = data = RREG32(RLC_PG_CNTL); 6318 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS)) 6319 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 6320 else 6321 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE; 6322 if (orig != data) 6323 WREG32(RLC_PG_CNTL, data); 6324 } 6325 6326 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable) 6327 { 6328 u32 data, orig; 6329 6330 orig = data = RREG32(RLC_PG_CNTL); 6331 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP)) 6332 data &= ~DISABLE_CP_PG; 6333 else 6334 data |= DISABLE_CP_PG; 6335 if (orig != data) 6336 WREG32(RLC_PG_CNTL, data); 6337 } 6338 6339 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable) 6340 { 6341 u32 data, orig; 6342 6343 orig = data = RREG32(RLC_PG_CNTL); 6344 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS)) 6345 data &= ~DISABLE_GDS_PG; 6346 else 6347 data |= DISABLE_GDS_PG; 6348 if (orig != data) 6349 WREG32(RLC_PG_CNTL, data); 6350 } 6351 6352 #define CP_ME_TABLE_SIZE 96 6353 #define CP_ME_TABLE_OFFSET 2048 6354 #define CP_MEC_TABLE_OFFSET 4096 6355 6356 void cik_init_cp_pg_table(struct radeon_device *rdev) 6357 { 6358 const __be32 *fw_data; 6359 volatile u32 *dst_ptr; 6360 int me, i, max_me = 4; 6361 u32 bo_offset = 0; 6362 u32 table_offset; 6363 6364 if (rdev->family == CHIP_KAVERI) 6365 max_me = 5; 6366 6367 if (rdev->rlc.cp_table_ptr == NULL) 6368 return; 6369 6370 /* write the cp table buffer */ 6371 dst_ptr = rdev->rlc.cp_table_ptr; 6372 for (me = 0; me < max_me; me++) { 6373 if (me == 0) { 6374 fw_data = (const __be32 *)rdev->ce_fw->data; 6375 table_offset = CP_ME_TABLE_OFFSET; 6376 } else if (me == 1) { 6377 fw_data = (const __be32 *)rdev->pfp_fw->data; 6378 table_offset = CP_ME_TABLE_OFFSET; 6379 } else if (me == 2) { 6380 fw_data = (const __be32 *)rdev->me_fw->data; 6381 table_offset = CP_ME_TABLE_OFFSET; 6382 } else { 6383 fw_data = (const __be32 *)rdev->mec_fw->data; 6384 table_offset = CP_MEC_TABLE_OFFSET; 6385 } 6386 6387 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) { 6388 dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i])); 6389 } 6390 bo_offset += CP_ME_TABLE_SIZE; 6391 } 6392 } 6393 6394 static void cik_enable_gfx_cgpg(struct radeon_device *rdev, 6395 bool enable) 6396 { 6397 u32 data, orig; 6398 6399 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { 6400 orig = data = RREG32(RLC_PG_CNTL); 6401 data |= GFX_PG_ENABLE; 6402 if (orig != data) 6403 WREG32(RLC_PG_CNTL, data); 6404 6405 orig = data = RREG32(RLC_AUTO_PG_CTRL); 6406 data |= AUTO_PG_EN; 6407 if (orig != data) 6408 WREG32(RLC_AUTO_PG_CTRL, data); 6409 } else { 6410 orig = data = RREG32(RLC_PG_CNTL); 6411 data &= ~GFX_PG_ENABLE; 6412 if (orig != data) 6413 WREG32(RLC_PG_CNTL, data); 6414 6415 orig = data = RREG32(RLC_AUTO_PG_CTRL); 6416 data &= ~AUTO_PG_EN; 6417 if (orig != data) 6418 WREG32(RLC_AUTO_PG_CTRL, data); 6419 6420 data = RREG32(DB_RENDER_CONTROL); 6421 } 6422 } 6423 6424 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) 6425 { 6426 u32 mask = 0, tmp, tmp1; 6427 int i; 6428 6429 cik_select_se_sh(rdev, se, sh); 6430 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); 6431 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); 6432 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 6433 6434 tmp &= 0xffff0000; 6435 6436 tmp |= tmp1; 6437 tmp >>= 16; 6438 6439 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) { 6440 mask <<= 1; 6441 mask |= 1; 6442 } 6443 6444 return (~tmp) & mask; 6445 } 6446 6447 static void cik_init_ao_cu_mask(struct radeon_device *rdev) 6448 { 6449 u32 i, j, k, active_cu_number = 0; 6450 u32 mask, counter, cu_bitmap; 6451 u32 tmp = 0; 6452 6453 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 6454 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 6455 mask = 1; 6456 cu_bitmap = 0; 6457 counter = 0; 6458 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) { 6459 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) { 6460 if (counter < 2) 6461 cu_bitmap |= mask; 6462 counter ++; 6463 } 6464 mask <<= 1; 6465 } 6466 6467 active_cu_number += counter; 6468 tmp |= (cu_bitmap << (i * 16 + j * 8)); 6469 } 6470 } 6471 6472 WREG32(RLC_PG_AO_CU_MASK, tmp); 6473 6474 tmp = RREG32(RLC_MAX_PG_CU); 6475 tmp &= ~MAX_PU_CU_MASK; 6476 tmp |= MAX_PU_CU(active_cu_number); 6477 WREG32(RLC_MAX_PG_CU, tmp); 6478 } 6479 6480 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev, 6481 bool enable) 6482 { 6483 u32 data, orig; 6484 6485 orig = data = RREG32(RLC_PG_CNTL); 6486 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG)) 6487 data |= STATIC_PER_CU_PG_ENABLE; 6488 else 6489 data &= ~STATIC_PER_CU_PG_ENABLE; 6490 if (orig != data) 6491 WREG32(RLC_PG_CNTL, data); 6492 } 6493 6494 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev, 6495 bool enable) 6496 { 6497 u32 data, orig; 6498 6499 orig = data = RREG32(RLC_PG_CNTL); 6500 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG)) 6501 data |= DYN_PER_CU_PG_ENABLE; 6502 else 6503 data &= ~DYN_PER_CU_PG_ENABLE; 6504 if (orig != data) 6505 WREG32(RLC_PG_CNTL, data); 6506 } 6507 6508 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 6509 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D 6510 6511 static void cik_init_gfx_cgpg(struct radeon_device *rdev) 6512 { 6513 u32 data, orig; 6514 u32 i; 6515 6516 if (rdev->rlc.cs_data) { 6517 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 6518 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr)); 6519 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr)); 6520 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size); 6521 } else { 6522 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET); 6523 for (i = 0; i < 3; i++) 6524 WREG32(RLC_GPM_SCRATCH_DATA, 0); 6525 } 6526 if (rdev->rlc.reg_list) { 6527 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET); 6528 for (i = 0; i < rdev->rlc.reg_list_size; i++) 6529 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]); 6530 } 6531 6532 orig = data = RREG32(RLC_PG_CNTL); 6533 data |= GFX_PG_SRC; 6534 if (orig != data) 6535 WREG32(RLC_PG_CNTL, data); 6536 6537 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); 6538 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8); 6539 6540 data = RREG32(CP_RB_WPTR_POLL_CNTL); 6541 data &= ~IDLE_POLL_COUNT_MASK; 6542 data |= IDLE_POLL_COUNT(0x60); 6543 WREG32(CP_RB_WPTR_POLL_CNTL, data); 6544 6545 data = 0x10101010; 6546 WREG32(RLC_PG_DELAY, data); 6547 6548 data = RREG32(RLC_PG_DELAY_2); 6549 data &= ~0xff; 6550 data |= 0x3; 6551 WREG32(RLC_PG_DELAY_2, data); 6552 6553 data = RREG32(RLC_AUTO_PG_CTRL); 6554 data &= ~GRBM_REG_SGIT_MASK; 6555 data |= GRBM_REG_SGIT(0x700); 6556 WREG32(RLC_AUTO_PG_CTRL, data); 6557 6558 } 6559 6560 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable) 6561 { 6562 cik_enable_gfx_cgpg(rdev, enable); 6563 cik_enable_gfx_static_mgpg(rdev, enable); 6564 cik_enable_gfx_dynamic_mgpg(rdev, enable); 6565 } 6566 6567 u32 cik_get_csb_size(struct radeon_device *rdev) 6568 { 6569 u32 count = 0; 6570 const struct cs_section_def *sect = NULL; 6571 const struct cs_extent_def *ext = NULL; 6572 6573 if (rdev->rlc.cs_data == NULL) 6574 return 0; 6575 6576 /* begin clear state */ 6577 count += 2; 6578 /* context control state */ 6579 count += 3; 6580 6581 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 6582 for (ext = sect->section; ext->extent != NULL; ++ext) { 6583 if (sect->id == SECT_CONTEXT) 6584 count += 2 + ext->reg_count; 6585 else 6586 return 0; 6587 } 6588 } 6589 /* pa_sc_raster_config/pa_sc_raster_config1 */ 6590 count += 4; 6591 /* end clear state */ 6592 count += 2; 6593 /* clear state */ 6594 count += 2; 6595 6596 return count; 6597 } 6598 6599 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) 6600 { 6601 u32 count = 0, i; 6602 const struct cs_section_def *sect = NULL; 6603 const struct cs_extent_def *ext = NULL; 6604 6605 if (rdev->rlc.cs_data == NULL) 6606 return; 6607 if (buffer == NULL) 6608 return; 6609 6610 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 6611 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 6612 6613 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6614 buffer[count++] = cpu_to_le32(0x80000000); 6615 buffer[count++] = cpu_to_le32(0x80000000); 6616 6617 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) { 6618 for (ext = sect->section; ext->extent != NULL; ++ext) { 6619 if (sect->id == SECT_CONTEXT) { 6620 buffer[count++] = 6621 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 6622 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000); 6623 for (i = 0; i < ext->reg_count; i++) 6624 buffer[count++] = cpu_to_le32(ext->extent[i]); 6625 } else { 6626 return; 6627 } 6628 } 6629 } 6630 6631 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 6632 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 6633 switch (rdev->family) { 6634 case CHIP_BONAIRE: 6635 buffer[count++] = cpu_to_le32(0x16000012); 6636 buffer[count++] = cpu_to_le32(0x00000000); 6637 break; 6638 case CHIP_KAVERI: 6639 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ 6640 buffer[count++] = cpu_to_le32(0x00000000); 6641 break; 6642 case CHIP_KABINI: 6643 case CHIP_MULLINS: 6644 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ 6645 buffer[count++] = cpu_to_le32(0x00000000); 6646 break; 6647 case CHIP_HAWAII: 6648 buffer[count++] = cpu_to_le32(0x3a00161a); 6649 buffer[count++] = cpu_to_le32(0x0000002e); 6650 break; 6651 default: 6652 buffer[count++] = cpu_to_le32(0x00000000); 6653 buffer[count++] = cpu_to_le32(0x00000000); 6654 break; 6655 } 6656 6657 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 6658 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 6659 6660 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 6661 buffer[count++] = cpu_to_le32(0); 6662 } 6663 6664 static void cik_init_pg(struct radeon_device *rdev) 6665 { 6666 if (rdev->pg_flags) { 6667 cik_enable_sck_slowdown_on_pu(rdev, true); 6668 cik_enable_sck_slowdown_on_pd(rdev, true); 6669 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 6670 cik_init_gfx_cgpg(rdev); 6671 cik_enable_cp_pg(rdev, true); 6672 cik_enable_gds_pg(rdev, true); 6673 } 6674 cik_init_ao_cu_mask(rdev); 6675 cik_update_gfx_pg(rdev, true); 6676 } 6677 } 6678 6679 static void cik_fini_pg(struct radeon_device *rdev) 6680 { 6681 if (rdev->pg_flags) { 6682 cik_update_gfx_pg(rdev, false); 6683 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { 6684 cik_enable_cp_pg(rdev, false); 6685 cik_enable_gds_pg(rdev, false); 6686 } 6687 } 6688 } 6689 6690 /* 6691 * Interrupts 6692 * Starting with r6xx, interrupts are handled via a ring buffer. 6693 * Ring buffers are areas of GPU accessible memory that the GPU 6694 * writes interrupt vectors into and the host reads vectors out of. 6695 * There is a rptr (read pointer) that determines where the 6696 * host is currently reading, and a wptr (write pointer) 6697 * which determines where the GPU has written. When the 6698 * pointers are equal, the ring is idle. When the GPU 6699 * writes vectors to the ring buffer, it increments the 6700 * wptr. When there is an interrupt, the host then starts 6701 * fetching commands and processing them until the pointers are 6702 * equal again at which point it updates the rptr. 6703 */ 6704 6705 /** 6706 * cik_enable_interrupts - Enable the interrupt ring buffer 6707 * 6708 * @rdev: radeon_device pointer 6709 * 6710 * Enable the interrupt ring buffer (CIK). 6711 */ 6712 static void cik_enable_interrupts(struct radeon_device *rdev) 6713 { 6714 u32 ih_cntl = RREG32(IH_CNTL); 6715 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 6716 6717 ih_cntl |= ENABLE_INTR; 6718 ih_rb_cntl |= IH_RB_ENABLE; 6719 WREG32(IH_CNTL, ih_cntl); 6720 WREG32(IH_RB_CNTL, ih_rb_cntl); 6721 rdev->ih.enabled = true; 6722 } 6723 6724 /** 6725 * cik_disable_interrupts - Disable the interrupt ring buffer 6726 * 6727 * @rdev: radeon_device pointer 6728 * 6729 * Disable the interrupt ring buffer (CIK). 6730 */ 6731 static void cik_disable_interrupts(struct radeon_device *rdev) 6732 { 6733 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 6734 u32 ih_cntl = RREG32(IH_CNTL); 6735 6736 ih_rb_cntl &= ~IH_RB_ENABLE; 6737 ih_cntl &= ~ENABLE_INTR; 6738 WREG32(IH_RB_CNTL, ih_rb_cntl); 6739 WREG32(IH_CNTL, ih_cntl); 6740 /* set rptr, wptr to 0 */ 6741 WREG32(IH_RB_RPTR, 0); 6742 WREG32(IH_RB_WPTR, 0); 6743 rdev->ih.enabled = false; 6744 rdev->ih.rptr = 0; 6745 } 6746 6747 /** 6748 * cik_disable_interrupt_state - Disable all interrupt sources 6749 * 6750 * @rdev: radeon_device pointer 6751 * 6752 * Clear all interrupt enable bits used by the driver (CIK). 6753 */ 6754 static void cik_disable_interrupt_state(struct radeon_device *rdev) 6755 { 6756 u32 tmp; 6757 6758 /* gfx ring */ 6759 tmp = RREG32(CP_INT_CNTL_RING0) & 6760 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6761 WREG32(CP_INT_CNTL_RING0, tmp); 6762 /* sdma */ 6763 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6764 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 6765 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6766 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 6767 /* compute queues */ 6768 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 6769 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 6770 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 6771 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 6772 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 6773 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 6774 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 6775 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 6776 /* grbm */ 6777 WREG32(GRBM_INT_CNTL, 0); 6778 /* vline/vblank, etc. */ 6779 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 6780 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 6781 if (rdev->num_crtc >= 4) { 6782 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 6783 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 6784 } 6785 if (rdev->num_crtc >= 6) { 6786 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 6787 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 6788 } 6789 /* pflip */ 6790 if (rdev->num_crtc >= 2) { 6791 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 6792 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 6793 } 6794 if (rdev->num_crtc >= 4) { 6795 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 6796 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 6797 } 6798 if (rdev->num_crtc >= 6) { 6799 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 6800 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 6801 } 6802 6803 /* dac hotplug */ 6804 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 6805 6806 /* digital hotplug */ 6807 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6808 WREG32(DC_HPD1_INT_CONTROL, tmp); 6809 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6810 WREG32(DC_HPD2_INT_CONTROL, tmp); 6811 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6812 WREG32(DC_HPD3_INT_CONTROL, tmp); 6813 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6814 WREG32(DC_HPD4_INT_CONTROL, tmp); 6815 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6816 WREG32(DC_HPD5_INT_CONTROL, tmp); 6817 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 6818 WREG32(DC_HPD6_INT_CONTROL, tmp); 6819 6820 } 6821 6822 /** 6823 * cik_irq_init - init and enable the interrupt ring 6824 * 6825 * @rdev: radeon_device pointer 6826 * 6827 * Allocate a ring buffer for the interrupt controller, 6828 * enable the RLC, disable interrupts, enable the IH 6829 * ring buffer and enable it (CIK). 6830 * Called at device load and reume. 6831 * Returns 0 for success, errors for failure. 6832 */ 6833 static int cik_irq_init(struct radeon_device *rdev) 6834 { 6835 int ret = 0; 6836 int rb_bufsz; 6837 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 6838 6839 /* allocate ring */ 6840 ret = r600_ih_ring_alloc(rdev); 6841 if (ret) 6842 return ret; 6843 6844 /* disable irqs */ 6845 cik_disable_interrupts(rdev); 6846 6847 /* init rlc */ 6848 ret = cik_rlc_resume(rdev); 6849 if (ret) { 6850 r600_ih_ring_fini(rdev); 6851 return ret; 6852 } 6853 6854 /* setup interrupt control */ 6855 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 6856 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 6857 interrupt_cntl = RREG32(INTERRUPT_CNTL); 6858 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 6859 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 6860 */ 6861 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 6862 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 6863 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 6864 WREG32(INTERRUPT_CNTL, interrupt_cntl); 6865 6866 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 6867 rb_bufsz = order_base_2(rdev->ih.ring_size / 4); 6868 6869 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 6870 IH_WPTR_OVERFLOW_CLEAR | 6871 (rb_bufsz << 1)); 6872 6873 if (rdev->wb.enabled) 6874 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 6875 6876 /* set the writeback address whether it's enabled or not */ 6877 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 6878 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 6879 6880 WREG32(IH_RB_CNTL, ih_rb_cntl); 6881 6882 /* set rptr, wptr to 0 */ 6883 WREG32(IH_RB_RPTR, 0); 6884 WREG32(IH_RB_WPTR, 0); 6885 6886 /* Default settings for IH_CNTL (disabled at first) */ 6887 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 6888 /* RPTR_REARM only works if msi's are enabled */ 6889 if (rdev->msi_enabled) 6890 ih_cntl |= RPTR_REARM; 6891 WREG32(IH_CNTL, ih_cntl); 6892 6893 /* force the active interrupt state to all disabled */ 6894 cik_disable_interrupt_state(rdev); 6895 6896 pci_enable_busmaster(rdev->pdev->dev); 6897 6898 /* enable irqs */ 6899 cik_enable_interrupts(rdev); 6900 6901 return ret; 6902 } 6903 6904 /** 6905 * cik_irq_set - enable/disable interrupt sources 6906 * 6907 * @rdev: radeon_device pointer 6908 * 6909 * Enable interrupt sources on the GPU (vblanks, hpd, 6910 * etc.) (CIK). 6911 * Returns 0 for success, errors for failure. 6912 */ 6913 int cik_irq_set(struct radeon_device *rdev) 6914 { 6915 u32 cp_int_cntl; 6916 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 6917 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 6918 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 6919 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 6920 u32 grbm_int_cntl = 0; 6921 u32 dma_cntl, dma_cntl1; 6922 u32 thermal_int; 6923 6924 if (!rdev->irq.installed) { 6925 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 6926 return -EINVAL; 6927 } 6928 /* don't enable anything if the ih is disabled */ 6929 if (!rdev->ih.enabled) { 6930 cik_disable_interrupts(rdev); 6931 /* force the active interrupt state to all disabled */ 6932 cik_disable_interrupt_state(rdev); 6933 return 0; 6934 } 6935 6936 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & 6937 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 6938 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 6939 6940 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 6941 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 6942 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 6943 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 6944 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 6945 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 6946 6947 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 6948 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 6949 6950 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6951 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6952 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6953 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6954 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6955 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6956 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6957 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 6958 6959 if (rdev->flags & RADEON_IS_IGP) 6960 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & 6961 ~(THERM_INTH_MASK | THERM_INTL_MASK); 6962 else 6963 thermal_int = RREG32_SMC(CG_THERMAL_INT) & 6964 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); 6965 6966 /* enable CP interrupts on all rings */ 6967 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 6968 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 6969 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 6970 } 6971 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 6972 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6973 DRM_DEBUG("si_irq_set: sw int cp1\n"); 6974 if (ring->me == 1) { 6975 switch (ring->pipe) { 6976 case 0: 6977 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 6978 break; 6979 case 1: 6980 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 6981 break; 6982 case 2: 6983 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6984 break; 6985 case 3: 6986 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 6987 break; 6988 default: 6989 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 6990 break; 6991 } 6992 } else if (ring->me == 2) { 6993 switch (ring->pipe) { 6994 case 0: 6995 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 6996 break; 6997 case 1: 6998 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 6999 break; 7000 case 2: 7001 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 7002 break; 7003 case 3: 7004 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 7005 break; 7006 default: 7007 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 7008 break; 7009 } 7010 } else { 7011 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 7012 } 7013 } 7014 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 7015 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7016 DRM_DEBUG("si_irq_set: sw int cp2\n"); 7017 if (ring->me == 1) { 7018 switch (ring->pipe) { 7019 case 0: 7020 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 7021 break; 7022 case 1: 7023 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 7024 break; 7025 case 2: 7026 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 7027 break; 7028 case 3: 7029 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 7030 break; 7031 default: 7032 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 7033 break; 7034 } 7035 } else if (ring->me == 2) { 7036 switch (ring->pipe) { 7037 case 0: 7038 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 7039 break; 7040 case 1: 7041 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 7042 break; 7043 case 2: 7044 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 7045 break; 7046 case 3: 7047 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 7048 break; 7049 default: 7050 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 7051 break; 7052 } 7053 } else { 7054 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 7055 } 7056 } 7057 7058 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 7059 DRM_DEBUG("cik_irq_set: sw int dma\n"); 7060 dma_cntl |= TRAP_ENABLE; 7061 } 7062 7063 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 7064 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 7065 dma_cntl1 |= TRAP_ENABLE; 7066 } 7067 7068 if (rdev->irq.crtc_vblank_int[0] || 7069 atomic_read(&rdev->irq.pflip[0])) { 7070 DRM_DEBUG("cik_irq_set: vblank 0\n"); 7071 crtc1 |= VBLANK_INTERRUPT_MASK; 7072 } 7073 if (rdev->irq.crtc_vblank_int[1] || 7074 atomic_read(&rdev->irq.pflip[1])) { 7075 DRM_DEBUG("cik_irq_set: vblank 1\n"); 7076 crtc2 |= VBLANK_INTERRUPT_MASK; 7077 } 7078 if (rdev->irq.crtc_vblank_int[2] || 7079 atomic_read(&rdev->irq.pflip[2])) { 7080 DRM_DEBUG("cik_irq_set: vblank 2\n"); 7081 crtc3 |= VBLANK_INTERRUPT_MASK; 7082 } 7083 if (rdev->irq.crtc_vblank_int[3] || 7084 atomic_read(&rdev->irq.pflip[3])) { 7085 DRM_DEBUG("cik_irq_set: vblank 3\n"); 7086 crtc4 |= VBLANK_INTERRUPT_MASK; 7087 } 7088 if (rdev->irq.crtc_vblank_int[4] || 7089 atomic_read(&rdev->irq.pflip[4])) { 7090 DRM_DEBUG("cik_irq_set: vblank 4\n"); 7091 crtc5 |= VBLANK_INTERRUPT_MASK; 7092 } 7093 if (rdev->irq.crtc_vblank_int[5] || 7094 atomic_read(&rdev->irq.pflip[5])) { 7095 DRM_DEBUG("cik_irq_set: vblank 5\n"); 7096 crtc6 |= VBLANK_INTERRUPT_MASK; 7097 } 7098 if (rdev->irq.hpd[0]) { 7099 DRM_DEBUG("cik_irq_set: hpd 1\n"); 7100 hpd1 |= DC_HPDx_INT_EN; 7101 } 7102 if (rdev->irq.hpd[1]) { 7103 DRM_DEBUG("cik_irq_set: hpd 2\n"); 7104 hpd2 |= DC_HPDx_INT_EN; 7105 } 7106 if (rdev->irq.hpd[2]) { 7107 DRM_DEBUG("cik_irq_set: hpd 3\n"); 7108 hpd3 |= DC_HPDx_INT_EN; 7109 } 7110 if (rdev->irq.hpd[3]) { 7111 DRM_DEBUG("cik_irq_set: hpd 4\n"); 7112 hpd4 |= DC_HPDx_INT_EN; 7113 } 7114 if (rdev->irq.hpd[4]) { 7115 DRM_DEBUG("cik_irq_set: hpd 5\n"); 7116 hpd5 |= DC_HPDx_INT_EN; 7117 } 7118 if (rdev->irq.hpd[5]) { 7119 DRM_DEBUG("cik_irq_set: hpd 6\n"); 7120 hpd6 |= DC_HPDx_INT_EN; 7121 } 7122 7123 if (rdev->irq.dpm_thermal) { 7124 DRM_DEBUG("dpm thermal\n"); 7125 if (rdev->flags & RADEON_IS_IGP) 7126 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK; 7127 else 7128 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; 7129 } 7130 7131 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 7132 7133 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 7134 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 7135 7136 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 7137 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 7138 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 7139 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 7140 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 7141 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 7142 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 7143 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 7144 7145 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 7146 7147 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 7148 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 7149 if (rdev->num_crtc >= 4) { 7150 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 7151 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 7152 } 7153 if (rdev->num_crtc >= 6) { 7154 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 7155 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 7156 } 7157 7158 if (rdev->num_crtc >= 2) { 7159 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 7160 GRPH_PFLIP_INT_MASK); 7161 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 7162 GRPH_PFLIP_INT_MASK); 7163 } 7164 if (rdev->num_crtc >= 4) { 7165 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 7166 GRPH_PFLIP_INT_MASK); 7167 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 7168 GRPH_PFLIP_INT_MASK); 7169 } 7170 if (rdev->num_crtc >= 6) { 7171 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 7172 GRPH_PFLIP_INT_MASK); 7173 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 7174 GRPH_PFLIP_INT_MASK); 7175 } 7176 7177 WREG32(DC_HPD1_INT_CONTROL, hpd1); 7178 WREG32(DC_HPD2_INT_CONTROL, hpd2); 7179 WREG32(DC_HPD3_INT_CONTROL, hpd3); 7180 WREG32(DC_HPD4_INT_CONTROL, hpd4); 7181 WREG32(DC_HPD5_INT_CONTROL, hpd5); 7182 WREG32(DC_HPD6_INT_CONTROL, hpd6); 7183 7184 if (rdev->flags & RADEON_IS_IGP) 7185 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int); 7186 else 7187 WREG32_SMC(CG_THERMAL_INT, thermal_int); 7188 7189 return 0; 7190 } 7191 7192 /** 7193 * cik_irq_ack - ack interrupt sources 7194 * 7195 * @rdev: radeon_device pointer 7196 * 7197 * Ack interrupt sources on the GPU (vblanks, hpd, 7198 * etc.) (CIK). Certain interrupts sources are sw 7199 * generated and do not require an explicit ack. 7200 */ 7201 static inline void cik_irq_ack(struct radeon_device *rdev) 7202 { 7203 u32 tmp; 7204 7205 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 7206 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 7207 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 7208 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 7209 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 7210 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 7211 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 7212 7213 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS + 7214 EVERGREEN_CRTC0_REGISTER_OFFSET); 7215 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS + 7216 EVERGREEN_CRTC1_REGISTER_OFFSET); 7217 if (rdev->num_crtc >= 4) { 7218 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS + 7219 EVERGREEN_CRTC2_REGISTER_OFFSET); 7220 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS + 7221 EVERGREEN_CRTC3_REGISTER_OFFSET); 7222 } 7223 if (rdev->num_crtc >= 6) { 7224 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS + 7225 EVERGREEN_CRTC4_REGISTER_OFFSET); 7226 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS + 7227 EVERGREEN_CRTC5_REGISTER_OFFSET); 7228 } 7229 7230 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED) 7231 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, 7232 GRPH_PFLIP_INT_CLEAR); 7233 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED) 7234 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, 7235 GRPH_PFLIP_INT_CLEAR); 7236 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 7237 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 7238 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 7239 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 7240 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 7241 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 7242 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 7243 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 7244 7245 if (rdev->num_crtc >= 4) { 7246 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED) 7247 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, 7248 GRPH_PFLIP_INT_CLEAR); 7249 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED) 7250 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, 7251 GRPH_PFLIP_INT_CLEAR); 7252 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 7253 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 7254 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 7255 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 7256 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 7257 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 7258 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 7259 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 7260 } 7261 7262 if (rdev->num_crtc >= 6) { 7263 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED) 7264 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, 7265 GRPH_PFLIP_INT_CLEAR); 7266 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED) 7267 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, 7268 GRPH_PFLIP_INT_CLEAR); 7269 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 7270 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 7271 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 7272 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 7273 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 7274 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 7275 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 7276 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 7277 } 7278 7279 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 7280 tmp = RREG32(DC_HPD1_INT_CONTROL); 7281 tmp |= DC_HPDx_INT_ACK; 7282 WREG32(DC_HPD1_INT_CONTROL, tmp); 7283 } 7284 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 7285 tmp = RREG32(DC_HPD2_INT_CONTROL); 7286 tmp |= DC_HPDx_INT_ACK; 7287 WREG32(DC_HPD2_INT_CONTROL, tmp); 7288 } 7289 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 7290 tmp = RREG32(DC_HPD3_INT_CONTROL); 7291 tmp |= DC_HPDx_INT_ACK; 7292 WREG32(DC_HPD3_INT_CONTROL, tmp); 7293 } 7294 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 7295 tmp = RREG32(DC_HPD4_INT_CONTROL); 7296 tmp |= DC_HPDx_INT_ACK; 7297 WREG32(DC_HPD4_INT_CONTROL, tmp); 7298 } 7299 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 7300 tmp = RREG32(DC_HPD5_INT_CONTROL); 7301 tmp |= DC_HPDx_INT_ACK; 7302 WREG32(DC_HPD5_INT_CONTROL, tmp); 7303 } 7304 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 7305 tmp = RREG32(DC_HPD5_INT_CONTROL); 7306 tmp |= DC_HPDx_INT_ACK; 7307 WREG32(DC_HPD6_INT_CONTROL, tmp); 7308 } 7309 } 7310 7311 /** 7312 * cik_irq_disable - disable interrupts 7313 * 7314 * @rdev: radeon_device pointer 7315 * 7316 * Disable interrupts on the hw (CIK). 7317 */ 7318 static void cik_irq_disable(struct radeon_device *rdev) 7319 { 7320 cik_disable_interrupts(rdev); 7321 /* Wait and acknowledge irq */ 7322 mdelay(1); 7323 cik_irq_ack(rdev); 7324 cik_disable_interrupt_state(rdev); 7325 } 7326 7327 /** 7328 * cik_irq_disable - disable interrupts for suspend 7329 * 7330 * @rdev: radeon_device pointer 7331 * 7332 * Disable interrupts and stop the RLC (CIK). 7333 * Used for suspend. 7334 */ 7335 static void cik_irq_suspend(struct radeon_device *rdev) 7336 { 7337 cik_irq_disable(rdev); 7338 cik_rlc_stop(rdev); 7339 } 7340 7341 /** 7342 * cik_irq_fini - tear down interrupt support 7343 * 7344 * @rdev: radeon_device pointer 7345 * 7346 * Disable interrupts on the hw and free the IH ring 7347 * buffer (CIK). 7348 * Used for driver unload. 7349 */ 7350 static void cik_irq_fini(struct radeon_device *rdev) 7351 { 7352 cik_irq_suspend(rdev); 7353 r600_ih_ring_fini(rdev); 7354 } 7355 7356 /** 7357 * cik_get_ih_wptr - get the IH ring buffer wptr 7358 * 7359 * @rdev: radeon_device pointer 7360 * 7361 * Get the IH ring buffer wptr from either the register 7362 * or the writeback memory buffer (CIK). Also check for 7363 * ring buffer overflow and deal with it. 7364 * Used by cik_irq_process(). 7365 * Returns the value of the wptr. 7366 */ 7367 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 7368 { 7369 u32 wptr, tmp; 7370 7371 if (rdev->wb.enabled) 7372 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 7373 else 7374 wptr = RREG32(IH_RB_WPTR); 7375 7376 if (wptr & RB_OVERFLOW) { 7377 wptr &= ~RB_OVERFLOW; 7378 /* When a ring buffer overflow happen start parsing interrupt 7379 * from the last not overwritten vector (wptr + 16). Hopefully 7380 * this should allow us to catchup. 7381 */ 7382 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", 7383 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask); 7384 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 7385 tmp = RREG32(IH_RB_CNTL); 7386 tmp |= IH_WPTR_OVERFLOW_CLEAR; 7387 WREG32(IH_RB_CNTL, tmp); 7388 } 7389 return (wptr & rdev->ih.ptr_mask); 7390 } 7391 7392 /* CIK IV Ring 7393 * Each IV ring entry is 128 bits: 7394 * [7:0] - interrupt source id 7395 * [31:8] - reserved 7396 * [59:32] - interrupt source data 7397 * [63:60] - reserved 7398 * [71:64] - RINGID 7399 * CP: 7400 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 7401 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 7402 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 7403 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 7404 * PIPE_ID - ME0 0=3D 7405 * - ME1&2 compute dispatcher (4 pipes each) 7406 * SDMA: 7407 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 7408 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 7409 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 7410 * [79:72] - VMID 7411 * [95:80] - PASID 7412 * [127:96] - reserved 7413 */ 7414 /** 7415 * cik_irq_process - interrupt handler 7416 * 7417 * @rdev: radeon_device pointer 7418 * 7419 * Interrupt hander (CIK). Walk the IH ring, 7420 * ack interrupts and schedule work to handle 7421 * interrupt events. 7422 * Returns irq process return code. 7423 */ 7424 irqreturn_t cik_irq_process(struct radeon_device *rdev) 7425 { 7426 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7427 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7428 u32 wptr; 7429 u32 rptr; 7430 u32 src_id, src_data, ring_id; 7431 u8 me_id, pipe_id, queue_id; 7432 u32 ring_index; 7433 bool queue_hotplug = false; 7434 bool queue_reset = false; 7435 u32 addr, status, mc_client; 7436 bool queue_thermal = false; 7437 7438 if (!rdev->ih.enabled || rdev->shutdown) 7439 return IRQ_NONE; 7440 7441 wptr = cik_get_ih_wptr(rdev); 7442 7443 restart_ih: 7444 /* is somebody else already processing irqs? */ 7445 if (atomic_xchg(&rdev->ih.lock, 1)) 7446 return IRQ_NONE; 7447 7448 rptr = rdev->ih.rptr; 7449 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 7450 7451 /* Order reading of wptr vs. reading of IH ring data */ 7452 rmb(); 7453 7454 /* display interrupts */ 7455 cik_irq_ack(rdev); 7456 7457 while (rptr != wptr) { 7458 /* wptr/rptr are in bytes! */ 7459 ring_index = rptr / 4; 7460 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 7461 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 7462 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 7463 7464 switch (src_id) { 7465 case 1: /* D1 vblank/vline */ 7466 switch (src_data) { 7467 case 0: /* D1 vblank */ 7468 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 7469 if (rdev->irq.crtc_vblank_int[0]) { 7470 drm_handle_vblank(rdev->ddev, 0); 7471 rdev->pm.vblank_sync = true; 7472 wake_up(&rdev->irq.vblank_queue); 7473 } 7474 if (atomic_read(&rdev->irq.pflip[0])) 7475 radeon_crtc_handle_vblank(rdev, 0); 7476 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 7477 DRM_DEBUG("IH: D1 vblank\n"); 7478 } 7479 break; 7480 case 1: /* D1 vline */ 7481 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 7482 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 7483 DRM_DEBUG("IH: D1 vline\n"); 7484 } 7485 break; 7486 default: 7487 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7488 break; 7489 } 7490 break; 7491 case 2: /* D2 vblank/vline */ 7492 switch (src_data) { 7493 case 0: /* D2 vblank */ 7494 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 7495 if (rdev->irq.crtc_vblank_int[1]) { 7496 drm_handle_vblank(rdev->ddev, 1); 7497 rdev->pm.vblank_sync = true; 7498 wake_up(&rdev->irq.vblank_queue); 7499 } 7500 if (atomic_read(&rdev->irq.pflip[1])) 7501 radeon_crtc_handle_vblank(rdev, 1); 7502 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 7503 DRM_DEBUG("IH: D2 vblank\n"); 7504 } 7505 break; 7506 case 1: /* D2 vline */ 7507 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 7508 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 7509 DRM_DEBUG("IH: D2 vline\n"); 7510 } 7511 break; 7512 default: 7513 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7514 break; 7515 } 7516 break; 7517 case 3: /* D3 vblank/vline */ 7518 switch (src_data) { 7519 case 0: /* D3 vblank */ 7520 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 7521 if (rdev->irq.crtc_vblank_int[2]) { 7522 drm_handle_vblank(rdev->ddev, 2); 7523 rdev->pm.vblank_sync = true; 7524 wake_up(&rdev->irq.vblank_queue); 7525 } 7526 if (atomic_read(&rdev->irq.pflip[2])) 7527 radeon_crtc_handle_vblank(rdev, 2); 7528 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 7529 DRM_DEBUG("IH: D3 vblank\n"); 7530 } 7531 break; 7532 case 1: /* D3 vline */ 7533 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 7534 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 7535 DRM_DEBUG("IH: D3 vline\n"); 7536 } 7537 break; 7538 default: 7539 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7540 break; 7541 } 7542 break; 7543 case 4: /* D4 vblank/vline */ 7544 switch (src_data) { 7545 case 0: /* D4 vblank */ 7546 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 7547 if (rdev->irq.crtc_vblank_int[3]) { 7548 drm_handle_vblank(rdev->ddev, 3); 7549 rdev->pm.vblank_sync = true; 7550 wake_up(&rdev->irq.vblank_queue); 7551 } 7552 if (atomic_read(&rdev->irq.pflip[3])) 7553 radeon_crtc_handle_vblank(rdev, 3); 7554 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 7555 DRM_DEBUG("IH: D4 vblank\n"); 7556 } 7557 break; 7558 case 1: /* D4 vline */ 7559 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 7560 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 7561 DRM_DEBUG("IH: D4 vline\n"); 7562 } 7563 break; 7564 default: 7565 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7566 break; 7567 } 7568 break; 7569 case 5: /* D5 vblank/vline */ 7570 switch (src_data) { 7571 case 0: /* D5 vblank */ 7572 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 7573 if (rdev->irq.crtc_vblank_int[4]) { 7574 drm_handle_vblank(rdev->ddev, 4); 7575 rdev->pm.vblank_sync = true; 7576 wake_up(&rdev->irq.vblank_queue); 7577 } 7578 if (atomic_read(&rdev->irq.pflip[4])) 7579 radeon_crtc_handle_vblank(rdev, 4); 7580 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 7581 DRM_DEBUG("IH: D5 vblank\n"); 7582 } 7583 break; 7584 case 1: /* D5 vline */ 7585 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 7586 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 7587 DRM_DEBUG("IH: D5 vline\n"); 7588 } 7589 break; 7590 default: 7591 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7592 break; 7593 } 7594 break; 7595 case 6: /* D6 vblank/vline */ 7596 switch (src_data) { 7597 case 0: /* D6 vblank */ 7598 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 7599 if (rdev->irq.crtc_vblank_int[5]) { 7600 drm_handle_vblank(rdev->ddev, 5); 7601 rdev->pm.vblank_sync = true; 7602 wake_up(&rdev->irq.vblank_queue); 7603 } 7604 if (atomic_read(&rdev->irq.pflip[5])) 7605 radeon_crtc_handle_vblank(rdev, 5); 7606 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 7607 DRM_DEBUG("IH: D6 vblank\n"); 7608 } 7609 break; 7610 case 1: /* D6 vline */ 7611 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 7612 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 7613 DRM_DEBUG("IH: D6 vline\n"); 7614 } 7615 break; 7616 default: 7617 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7618 break; 7619 } 7620 break; 7621 case 8: /* D1 page flip */ 7622 case 10: /* D2 page flip */ 7623 case 12: /* D3 page flip */ 7624 case 14: /* D4 page flip */ 7625 case 16: /* D5 page flip */ 7626 case 18: /* D6 page flip */ 7627 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1); 7628 if (radeon_use_pflipirq > 0) 7629 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1); 7630 break; 7631 case 42: /* HPD hotplug */ 7632 switch (src_data) { 7633 case 0: 7634 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 7635 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 7636 queue_hotplug = true; 7637 DRM_DEBUG("IH: HPD1\n"); 7638 } 7639 break; 7640 case 1: 7641 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 7642 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 7643 queue_hotplug = true; 7644 DRM_DEBUG("IH: HPD2\n"); 7645 } 7646 break; 7647 case 2: 7648 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 7649 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 7650 queue_hotplug = true; 7651 DRM_DEBUG("IH: HPD3\n"); 7652 } 7653 break; 7654 case 3: 7655 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 7656 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 7657 queue_hotplug = true; 7658 DRM_DEBUG("IH: HPD4\n"); 7659 } 7660 break; 7661 case 4: 7662 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 7663 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 7664 queue_hotplug = true; 7665 DRM_DEBUG("IH: HPD5\n"); 7666 } 7667 break; 7668 case 5: 7669 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 7670 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 7671 queue_hotplug = true; 7672 DRM_DEBUG("IH: HPD6\n"); 7673 } 7674 break; 7675 default: 7676 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7677 break; 7678 } 7679 break; 7680 case 124: /* UVD */ 7681 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); 7682 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); 7683 break; 7684 case 146: 7685 case 147: 7686 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 7687 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 7688 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 7689 /* reset addr and status */ 7690 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 7691 if (addr == 0x0 && status == 0x0) 7692 break; 7693 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 7694 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 7695 addr); 7696 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 7697 status); 7698 cik_vm_decode_fault(rdev, status, addr, mc_client); 7699 break; 7700 case 167: /* VCE */ 7701 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); 7702 switch (src_data) { 7703 case 0: 7704 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); 7705 break; 7706 case 1: 7707 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); 7708 break; 7709 default: 7710 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); 7711 break; 7712 } 7713 break; 7714 case 176: /* GFX RB CP_INT */ 7715 case 177: /* GFX IB CP_INT */ 7716 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 7717 break; 7718 case 181: /* CP EOP event */ 7719 DRM_DEBUG("IH: CP EOP\n"); 7720 /* XXX check the bitfield order! */ 7721 me_id = (ring_id & 0x60) >> 5; 7722 pipe_id = (ring_id & 0x18) >> 3; 7723 queue_id = (ring_id & 0x7) >> 0; 7724 switch (me_id) { 7725 case 0: 7726 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 7727 break; 7728 case 1: 7729 case 2: 7730 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 7731 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7732 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 7733 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7734 break; 7735 } 7736 break; 7737 case 184: /* CP Privileged reg access */ 7738 DRM_ERROR("Illegal register access in command stream\n"); 7739 /* XXX check the bitfield order! */ 7740 me_id = (ring_id & 0x60) >> 5; 7741 pipe_id = (ring_id & 0x18) >> 3; 7742 queue_id = (ring_id & 0x7) >> 0; 7743 switch (me_id) { 7744 case 0: 7745 /* This results in a full GPU reset, but all we need to do is soft 7746 * reset the CP for gfx 7747 */ 7748 queue_reset = true; 7749 break; 7750 case 1: 7751 /* XXX compute */ 7752 queue_reset = true; 7753 break; 7754 case 2: 7755 /* XXX compute */ 7756 queue_reset = true; 7757 break; 7758 } 7759 break; 7760 case 185: /* CP Privileged inst */ 7761 DRM_ERROR("Illegal instruction in command stream\n"); 7762 /* XXX check the bitfield order! */ 7763 me_id = (ring_id & 0x60) >> 5; 7764 pipe_id = (ring_id & 0x18) >> 3; 7765 queue_id = (ring_id & 0x7) >> 0; 7766 switch (me_id) { 7767 case 0: 7768 /* This results in a full GPU reset, but all we need to do is soft 7769 * reset the CP for gfx 7770 */ 7771 queue_reset = true; 7772 break; 7773 case 1: 7774 /* XXX compute */ 7775 queue_reset = true; 7776 break; 7777 case 2: 7778 /* XXX compute */ 7779 queue_reset = true; 7780 break; 7781 } 7782 break; 7783 case 224: /* SDMA trap event */ 7784 /* XXX check the bitfield order! */ 7785 me_id = (ring_id & 0x3) >> 0; 7786 queue_id = (ring_id & 0xc) >> 2; 7787 DRM_DEBUG("IH: SDMA trap\n"); 7788 switch (me_id) { 7789 case 0: 7790 switch (queue_id) { 7791 case 0: 7792 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 7793 break; 7794 case 1: 7795 /* XXX compute */ 7796 break; 7797 case 2: 7798 /* XXX compute */ 7799 break; 7800 } 7801 break; 7802 case 1: 7803 switch (queue_id) { 7804 case 0: 7805 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 7806 break; 7807 case 1: 7808 /* XXX compute */ 7809 break; 7810 case 2: 7811 /* XXX compute */ 7812 break; 7813 } 7814 break; 7815 } 7816 break; 7817 case 230: /* thermal low to high */ 7818 DRM_DEBUG("IH: thermal low to high\n"); 7819 rdev->pm.dpm.thermal.high_to_low = false; 7820 queue_thermal = true; 7821 break; 7822 case 231: /* thermal high to low */ 7823 DRM_DEBUG("IH: thermal high to low\n"); 7824 rdev->pm.dpm.thermal.high_to_low = true; 7825 queue_thermal = true; 7826 break; 7827 case 233: /* GUI IDLE */ 7828 DRM_DEBUG("IH: GUI idle\n"); 7829 break; 7830 case 241: /* SDMA Privileged inst */ 7831 case 247: /* SDMA Privileged inst */ 7832 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 7833 /* XXX check the bitfield order! */ 7834 me_id = (ring_id & 0x3) >> 0; 7835 queue_id = (ring_id & 0xc) >> 2; 7836 switch (me_id) { 7837 case 0: 7838 switch (queue_id) { 7839 case 0: 7840 queue_reset = true; 7841 break; 7842 case 1: 7843 /* XXX compute */ 7844 queue_reset = true; 7845 break; 7846 case 2: 7847 /* XXX compute */ 7848 queue_reset = true; 7849 break; 7850 } 7851 break; 7852 case 1: 7853 switch (queue_id) { 7854 case 0: 7855 queue_reset = true; 7856 break; 7857 case 1: 7858 /* XXX compute */ 7859 queue_reset = true; 7860 break; 7861 case 2: 7862 /* XXX compute */ 7863 queue_reset = true; 7864 break; 7865 } 7866 break; 7867 } 7868 break; 7869 default: 7870 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 7871 break; 7872 } 7873 7874 /* wptr/rptr are in bytes! */ 7875 rptr += 16; 7876 rptr &= rdev->ih.ptr_mask; 7877 WREG32(IH_RB_RPTR, rptr); 7878 } 7879 if (queue_hotplug) 7880 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work); 7881 if (queue_reset) 7882 taskqueue_enqueue(rdev->tq, &rdev->reset_work); 7883 if (queue_thermal) 7884 taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work); 7885 rdev->ih.rptr = rptr; 7886 atomic_set(&rdev->ih.lock, 0); 7887 7888 /* make sure wptr hasn't changed while processing */ 7889 wptr = cik_get_ih_wptr(rdev); 7890 if (wptr != rptr) 7891 goto restart_ih; 7892 7893 return IRQ_HANDLED; 7894 } 7895 7896 /* 7897 * startup/shutdown callbacks 7898 */ 7899 /** 7900 * cik_startup - program the asic to a functional state 7901 * 7902 * @rdev: radeon_device pointer 7903 * 7904 * Programs the asic to a functional state (CIK). 7905 * Called by cik_init() and cik_resume(). 7906 * Returns 0 for success, error for failure. 7907 */ 7908 static int cik_startup(struct radeon_device *rdev) 7909 { 7910 struct radeon_ring *ring; 7911 u32 nop; 7912 int r; 7913 7914 /* enable pcie gen2/3 link */ 7915 cik_pcie_gen3_enable(rdev); 7916 /* enable aspm */ 7917 cik_program_aspm(rdev); 7918 7919 /* scratch needs to be initialized before MC */ 7920 r = r600_vram_scratch_init(rdev); 7921 if (r) 7922 return r; 7923 7924 cik_mc_program(rdev); 7925 7926 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) { 7927 r = ci_mc_load_microcode(rdev); 7928 if (r) { 7929 DRM_ERROR("Failed to load MC firmware!\n"); 7930 return r; 7931 } 7932 } 7933 7934 r = cik_pcie_gart_enable(rdev); 7935 if (r) 7936 return r; 7937 cik_gpu_init(rdev); 7938 7939 /* allocate rlc buffers */ 7940 if (rdev->flags & RADEON_IS_IGP) { 7941 if (rdev->family == CHIP_KAVERI) { 7942 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list; 7943 rdev->rlc.reg_list_size = 7944 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list); 7945 } else { 7946 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list; 7947 rdev->rlc.reg_list_size = 7948 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list); 7949 } 7950 } 7951 rdev->rlc.cs_data = ci_cs_data; 7952 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; 7953 r = sumo_rlc_init(rdev); 7954 if (r) { 7955 DRM_ERROR("Failed to init rlc BOs!\n"); 7956 return r; 7957 } 7958 7959 /* allocate wb buffer */ 7960 r = radeon_wb_init(rdev); 7961 if (r) 7962 return r; 7963 7964 /* allocate mec buffers */ 7965 r = cik_mec_init(rdev); 7966 if (r) { 7967 DRM_ERROR("Failed to init MEC BOs!\n"); 7968 return r; 7969 } 7970 7971 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 7972 if (r) { 7973 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7974 return r; 7975 } 7976 7977 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 7978 if (r) { 7979 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7980 return r; 7981 } 7982 7983 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 7984 if (r) { 7985 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 7986 return r; 7987 } 7988 7989 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 7990 if (r) { 7991 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7992 return r; 7993 } 7994 7995 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 7996 if (r) { 7997 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 7998 return r; 7999 } 8000 8001 r = radeon_uvd_resume(rdev); 8002 if (!r) { 8003 r = uvd_v4_2_resume(rdev); 8004 if (!r) { 8005 r = radeon_fence_driver_start_ring(rdev, 8006 R600_RING_TYPE_UVD_INDEX); 8007 if (r) 8008 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 8009 } 8010 } 8011 if (r) 8012 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 8013 8014 r = radeon_vce_resume(rdev); 8015 if (!r) { 8016 r = vce_v2_0_resume(rdev); 8017 if (!r) 8018 r = radeon_fence_driver_start_ring(rdev, 8019 TN_RING_TYPE_VCE1_INDEX); 8020 if (!r) 8021 r = radeon_fence_driver_start_ring(rdev, 8022 TN_RING_TYPE_VCE2_INDEX); 8023 } 8024 if (r) { 8025 dev_err(rdev->dev, "VCE init error (%d).\n", r); 8026 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; 8027 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; 8028 } 8029 8030 /* Enable IRQ */ 8031 if (!rdev->irq.installed) { 8032 r = radeon_irq_kms_init(rdev); 8033 if (r) 8034 return r; 8035 } 8036 8037 r = cik_irq_init(rdev); 8038 if (r) { 8039 DRM_ERROR("radeon: IH init failed (%d).\n", r); 8040 radeon_irq_kms_fini(rdev); 8041 return r; 8042 } 8043 cik_irq_set(rdev); 8044 8045 if (rdev->family == CHIP_HAWAII) { 8046 nop = RADEON_CP_PACKET2; 8047 } else { 8048 nop = PACKET3(PACKET3_NOP, 0x3FFF); 8049 } 8050 8051 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 8052 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 8053 nop); 8054 if (r) 8055 return r; 8056 8057 /* set up the compute queues */ 8058 /* type-2 packets are deprecated on MEC, use type-3 instead */ 8059 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 8060 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 8061 nop); 8062 if (r) 8063 return r; 8064 ring->me = 1; /* first MEC */ 8065 ring->pipe = 0; /* first pipe */ 8066 ring->queue = 0; /* first queue */ 8067 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 8068 8069 /* type-2 packets are deprecated on MEC, use type-3 instead */ 8070 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 8071 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 8072 nop); 8073 if (r) 8074 return r; 8075 /* dGPU only have 1 MEC */ 8076 ring->me = 1; /* first MEC */ 8077 ring->pipe = 0; /* first pipe */ 8078 ring->queue = 1; /* second queue */ 8079 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 8080 8081 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 8082 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 8083 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 8084 if (r) 8085 return r; 8086 8087 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 8088 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 8089 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 8090 if (r) 8091 return r; 8092 8093 r = cik_cp_resume(rdev); 8094 if (r) 8095 return r; 8096 8097 r = cik_sdma_resume(rdev); 8098 if (r) 8099 return r; 8100 8101 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 8102 if (ring->ring_size) { 8103 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 8104 RADEON_CP_PACKET2); 8105 if (!r) 8106 r = uvd_v1_0_init(rdev); 8107 if (r) 8108 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 8109 } 8110 8111 r = -ENOENT; 8112 8113 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 8114 if (ring->ring_size) 8115 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 8116 VCE_CMD_NO_OP); 8117 8118 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 8119 if (ring->ring_size) 8120 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 8121 VCE_CMD_NO_OP); 8122 8123 if (!r) 8124 r = vce_v1_0_init(rdev); 8125 else if (r != -ENOENT) 8126 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); 8127 8128 r = radeon_ib_pool_init(rdev); 8129 if (r) { 8130 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 8131 return r; 8132 } 8133 8134 r = radeon_vm_manager_init(rdev); 8135 if (r) { 8136 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 8137 return r; 8138 } 8139 8140 r = dce6_audio_init(rdev); 8141 if (r) 8142 return r; 8143 8144 return 0; 8145 } 8146 8147 /** 8148 * cik_resume - resume the asic to a functional state 8149 * 8150 * @rdev: radeon_device pointer 8151 * 8152 * Programs the asic to a functional state (CIK). 8153 * Called at resume. 8154 * Returns 0 for success, error for failure. 8155 */ 8156 int cik_resume(struct radeon_device *rdev) 8157 { 8158 int r; 8159 8160 /* post card */ 8161 atom_asic_init(rdev->mode_info.atom_context); 8162 8163 /* init golden registers */ 8164 cik_init_golden_registers(rdev); 8165 8166 if (rdev->pm.pm_method == PM_METHOD_DPM) 8167 radeon_pm_resume(rdev); 8168 8169 rdev->accel_working = true; 8170 r = cik_startup(rdev); 8171 if (r) { 8172 DRM_ERROR("cik startup failed on resume\n"); 8173 rdev->accel_working = false; 8174 return r; 8175 } 8176 8177 return r; 8178 8179 } 8180 8181 /** 8182 * cik_suspend - suspend the asic 8183 * 8184 * @rdev: radeon_device pointer 8185 * 8186 * Bring the chip into a state suitable for suspend (CIK). 8187 * Called at suspend. 8188 * Returns 0 for success. 8189 */ 8190 int cik_suspend(struct radeon_device *rdev) 8191 { 8192 radeon_pm_suspend(rdev); 8193 dce6_audio_fini(rdev); 8194 radeon_vm_manager_fini(rdev); 8195 cik_cp_enable(rdev, false); 8196 cik_sdma_enable(rdev, false); 8197 uvd_v1_0_fini(rdev); 8198 radeon_uvd_suspend(rdev); 8199 radeon_vce_suspend(rdev); 8200 cik_fini_pg(rdev); 8201 cik_fini_cg(rdev); 8202 cik_irq_suspend(rdev); 8203 radeon_wb_disable(rdev); 8204 cik_pcie_gart_disable(rdev); 8205 return 0; 8206 } 8207 8208 /* Plan is to move initialization in that function and use 8209 * helper function so that radeon_device_init pretty much 8210 * do nothing more than calling asic specific function. This 8211 * should also allow to remove a bunch of callback function 8212 * like vram_info. 8213 */ 8214 /** 8215 * cik_init - asic specific driver and hw init 8216 * 8217 * @rdev: radeon_device pointer 8218 * 8219 * Setup asic specific driver variables and program the hw 8220 * to a functional state (CIK). 8221 * Called at driver startup. 8222 * Returns 0 for success, errors for failure. 8223 */ 8224 int cik_init(struct radeon_device *rdev) 8225 { 8226 struct radeon_ring *ring; 8227 int r; 8228 8229 /* Read BIOS */ 8230 if (!radeon_get_bios(rdev)) { 8231 if (ASIC_IS_AVIVO(rdev)) 8232 return -EINVAL; 8233 } 8234 /* Must be an ATOMBIOS */ 8235 if (!rdev->is_atom_bios) { 8236 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 8237 return -EINVAL; 8238 } 8239 r = radeon_atombios_init(rdev); 8240 if (r) 8241 return r; 8242 8243 /* Post card if necessary */ 8244 if (!radeon_card_posted(rdev)) { 8245 if (!rdev->bios) { 8246 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 8247 return -EINVAL; 8248 } 8249 DRM_INFO("GPU not posted. posting now...\n"); 8250 atom_asic_init(rdev->mode_info.atom_context); 8251 } 8252 /* init golden registers */ 8253 cik_init_golden_registers(rdev); 8254 /* Initialize scratch registers */ 8255 cik_scratch_init(rdev); 8256 /* Initialize surface registers */ 8257 radeon_surface_init(rdev); 8258 /* Initialize clocks */ 8259 radeon_get_clock_info(rdev->ddev); 8260 8261 /* Fence driver */ 8262 r = radeon_fence_driver_init(rdev); 8263 if (r) 8264 return r; 8265 8266 /* initialize memory controller */ 8267 r = cik_mc_init(rdev); 8268 if (r) 8269 return r; 8270 /* Memory manager */ 8271 r = radeon_bo_init(rdev); 8272 if (r) 8273 return r; 8274 8275 if (rdev->flags & RADEON_IS_IGP) { 8276 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 8277 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 8278 r = cik_init_microcode(rdev); 8279 if (r) { 8280 DRM_ERROR("Failed to load firmware!\n"); 8281 return r; 8282 } 8283 } 8284 } else { 8285 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 8286 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 8287 !rdev->mc_fw) { 8288 r = cik_init_microcode(rdev); 8289 if (r) { 8290 DRM_ERROR("Failed to load firmware!\n"); 8291 return r; 8292 } 8293 } 8294 } 8295 8296 /* Initialize power management */ 8297 radeon_pm_init(rdev); 8298 8299 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 8300 ring->ring_obj = NULL; 8301 r600_ring_init(rdev, ring, 1024 * 1024); 8302 8303 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 8304 ring->ring_obj = NULL; 8305 r600_ring_init(rdev, ring, 1024 * 1024); 8306 r = radeon_doorbell_get(rdev, &ring->doorbell_index); 8307 if (r) 8308 return r; 8309 8310 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 8311 ring->ring_obj = NULL; 8312 r600_ring_init(rdev, ring, 1024 * 1024); 8313 r = radeon_doorbell_get(rdev, &ring->doorbell_index); 8314 if (r) 8315 return r; 8316 8317 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 8318 ring->ring_obj = NULL; 8319 r600_ring_init(rdev, ring, 256 * 1024); 8320 8321 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 8322 ring->ring_obj = NULL; 8323 r600_ring_init(rdev, ring, 256 * 1024); 8324 8325 r = radeon_uvd_init(rdev); 8326 if (!r) { 8327 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 8328 ring->ring_obj = NULL; 8329 r600_ring_init(rdev, ring, 4096); 8330 } 8331 8332 r = radeon_vce_init(rdev); 8333 if (!r) { 8334 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 8335 ring->ring_obj = NULL; 8336 r600_ring_init(rdev, ring, 4096); 8337 8338 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 8339 ring->ring_obj = NULL; 8340 r600_ring_init(rdev, ring, 4096); 8341 } 8342 8343 rdev->ih.ring_obj = NULL; 8344 r600_ih_ring_init(rdev, 64 * 1024); 8345 8346 r = r600_pcie_gart_init(rdev); 8347 if (r) 8348 return r; 8349 8350 rdev->accel_working = true; 8351 r = cik_startup(rdev); 8352 if (r) { 8353 dev_err(rdev->dev, "disabling GPU acceleration\n"); 8354 cik_cp_fini(rdev); 8355 cik_sdma_fini(rdev); 8356 cik_irq_fini(rdev); 8357 sumo_rlc_fini(rdev); 8358 cik_mec_fini(rdev); 8359 radeon_wb_fini(rdev); 8360 radeon_ib_pool_fini(rdev); 8361 radeon_vm_manager_fini(rdev); 8362 radeon_irq_kms_fini(rdev); 8363 cik_pcie_gart_fini(rdev); 8364 rdev->accel_working = false; 8365 } 8366 8367 /* Don't start up if the MC ucode is missing. 8368 * The default clocks and voltages before the MC ucode 8369 * is loaded are not suffient for advanced operations. 8370 */ 8371 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 8372 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 8373 return -EINVAL; 8374 } 8375 8376 return 0; 8377 } 8378 8379 /** 8380 * cik_fini - asic specific driver and hw fini 8381 * 8382 * @rdev: radeon_device pointer 8383 * 8384 * Tear down the asic specific driver variables and program the hw 8385 * to an idle state (CIK). 8386 * Called at driver unload. 8387 */ 8388 void cik_fini(struct radeon_device *rdev) 8389 { 8390 radeon_pm_fini(rdev); 8391 cik_cp_fini(rdev); 8392 cik_sdma_fini(rdev); 8393 cik_fini_pg(rdev); 8394 cik_fini_cg(rdev); 8395 cik_irq_fini(rdev); 8396 sumo_rlc_fini(rdev); 8397 cik_mec_fini(rdev); 8398 radeon_wb_fini(rdev); 8399 radeon_vm_manager_fini(rdev); 8400 radeon_ib_pool_fini(rdev); 8401 radeon_irq_kms_fini(rdev); 8402 uvd_v1_0_fini(rdev); 8403 radeon_uvd_fini(rdev); 8404 radeon_vce_fini(rdev); 8405 cik_pcie_gart_fini(rdev); 8406 r600_vram_scratch_fini(rdev); 8407 radeon_gem_fini(rdev); 8408 radeon_fence_driver_fini(rdev); 8409 radeon_bo_fini(rdev); 8410 radeon_atombios_fini(rdev); 8411 kfree(rdev->bios); 8412 rdev->bios = NULL; 8413 } 8414 8415 void dce8_program_fmt(struct drm_encoder *encoder) 8416 { 8417 struct drm_device *dev = encoder->dev; 8418 struct radeon_device *rdev = dev->dev_private; 8419 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); 8420 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc); 8421 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); 8422 int bpc = 0; 8423 u32 tmp = 0; 8424 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE; 8425 8426 if (connector) { 8427 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 8428 bpc = radeon_get_monitor_bpc(connector); 8429 dither = radeon_connector->dither; 8430 } 8431 8432 /* LVDS/eDP FMT is set up by atom */ 8433 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT) 8434 return; 8435 8436 /* not needed for analog */ 8437 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) || 8438 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2)) 8439 return; 8440 8441 if (bpc == 0) 8442 return; 8443 8444 switch (bpc) { 8445 case 6: 8446 if (dither == RADEON_FMT_DITHER_ENABLE) 8447 /* XXX sort out optimal dither settings */ 8448 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE | 8449 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0)); 8450 else 8451 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0)); 8452 break; 8453 case 8: 8454 if (dither == RADEON_FMT_DITHER_ENABLE) 8455 /* XXX sort out optimal dither settings */ 8456 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE | 8457 FMT_RGB_RANDOM_ENABLE | 8458 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1)); 8459 else 8460 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1)); 8461 break; 8462 case 10: 8463 if (dither == RADEON_FMT_DITHER_ENABLE) 8464 /* XXX sort out optimal dither settings */ 8465 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE | 8466 FMT_RGB_RANDOM_ENABLE | 8467 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2)); 8468 else 8469 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2)); 8470 break; 8471 default: 8472 /* not needed */ 8473 break; 8474 } 8475 8476 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp); 8477 } 8478 8479 /* display watermark setup */ 8480 /** 8481 * dce8_line_buffer_adjust - Set up the line buffer 8482 * 8483 * @rdev: radeon_device pointer 8484 * @radeon_crtc: the selected display controller 8485 * @mode: the current display mode on the selected display 8486 * controller 8487 * 8488 * Setup up the line buffer allocation for 8489 * the selected display controller (CIK). 8490 * Returns the line buffer size in pixels. 8491 */ 8492 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 8493 struct radeon_crtc *radeon_crtc, 8494 struct drm_display_mode *mode) 8495 { 8496 u32 tmp, buffer_alloc, i; 8497 u32 pipe_offset = radeon_crtc->crtc_id * 0x20; 8498 /* 8499 * Line Buffer Setup 8500 * There are 6 line buffers, one for each display controllers. 8501 * There are 3 partitions per LB. Select the number of partitions 8502 * to enable based on the display width. For display widths larger 8503 * than 4096, you need use to use 2 display controllers and combine 8504 * them using the stereo blender. 8505 */ 8506 if (radeon_crtc->base.enabled && mode) { 8507 if (mode->crtc_hdisplay < 1920) { 8508 tmp = 1; 8509 buffer_alloc = 2; 8510 } else if (mode->crtc_hdisplay < 2560) { 8511 tmp = 2; 8512 buffer_alloc = 2; 8513 } else if (mode->crtc_hdisplay < 4096) { 8514 tmp = 0; 8515 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 8516 } else { 8517 DRM_DEBUG_KMS("Mode too big for LB!\n"); 8518 tmp = 0; 8519 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4; 8520 } 8521 } else { 8522 tmp = 1; 8523 buffer_alloc = 0; 8524 } 8525 8526 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 8527 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 8528 8529 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset, 8530 DMIF_BUFFERS_ALLOCATED(buffer_alloc)); 8531 for (i = 0; i < rdev->usec_timeout; i++) { 8532 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) & 8533 DMIF_BUFFERS_ALLOCATED_COMPLETED) 8534 break; 8535 udelay(1); 8536 } 8537 8538 if (radeon_crtc->base.enabled && mode) { 8539 switch (tmp) { 8540 case 0: 8541 default: 8542 return 4096 * 2; 8543 case 1: 8544 return 1920 * 2; 8545 case 2: 8546 return 2560 * 2; 8547 } 8548 } 8549 8550 /* controller not enabled, so no lb used */ 8551 return 0; 8552 } 8553 8554 /** 8555 * cik_get_number_of_dram_channels - get the number of dram channels 8556 * 8557 * @rdev: radeon_device pointer 8558 * 8559 * Look up the number of video ram channels (CIK). 8560 * Used for display watermark bandwidth calculations 8561 * Returns the number of dram channels 8562 */ 8563 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 8564 { 8565 u32 tmp = RREG32(MC_SHARED_CHMAP); 8566 8567 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 8568 case 0: 8569 default: 8570 return 1; 8571 case 1: 8572 return 2; 8573 case 2: 8574 return 4; 8575 case 3: 8576 return 8; 8577 case 4: 8578 return 3; 8579 case 5: 8580 return 6; 8581 case 6: 8582 return 10; 8583 case 7: 8584 return 12; 8585 case 8: 8586 return 16; 8587 } 8588 } 8589 8590 struct dce8_wm_params { 8591 u32 dram_channels; /* number of dram channels */ 8592 u32 yclk; /* bandwidth per dram data pin in kHz */ 8593 u32 sclk; /* engine clock in kHz */ 8594 u32 disp_clk; /* display clock in kHz */ 8595 u32 src_width; /* viewport width */ 8596 u32 active_time; /* active display time in ns */ 8597 u32 blank_time; /* blank time in ns */ 8598 bool interlaced; /* mode is interlaced */ 8599 fixed20_12 vsc; /* vertical scale ratio */ 8600 u32 num_heads; /* number of active crtcs */ 8601 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 8602 u32 lb_size; /* line buffer allocated to pipe */ 8603 u32 vtaps; /* vertical scaler taps */ 8604 }; 8605 8606 /** 8607 * dce8_dram_bandwidth - get the dram bandwidth 8608 * 8609 * @wm: watermark calculation data 8610 * 8611 * Calculate the raw dram bandwidth (CIK). 8612 * Used for display watermark bandwidth calculations 8613 * Returns the dram bandwidth in MBytes/s 8614 */ 8615 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 8616 { 8617 /* Calculate raw DRAM Bandwidth */ 8618 fixed20_12 dram_efficiency; /* 0.7 */ 8619 fixed20_12 yclk, dram_channels, bandwidth; 8620 fixed20_12 a; 8621 8622 a.full = dfixed_const(1000); 8623 yclk.full = dfixed_const(wm->yclk); 8624 yclk.full = dfixed_div(yclk, a); 8625 dram_channels.full = dfixed_const(wm->dram_channels * 4); 8626 a.full = dfixed_const(10); 8627 dram_efficiency.full = dfixed_const(7); 8628 dram_efficiency.full = dfixed_div(dram_efficiency, a); 8629 bandwidth.full = dfixed_mul(dram_channels, yclk); 8630 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 8631 8632 return dfixed_trunc(bandwidth); 8633 } 8634 8635 /** 8636 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 8637 * 8638 * @wm: watermark calculation data 8639 * 8640 * Calculate the dram bandwidth used for display (CIK). 8641 * Used for display watermark bandwidth calculations 8642 * Returns the dram bandwidth for display in MBytes/s 8643 */ 8644 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 8645 { 8646 /* Calculate DRAM Bandwidth and the part allocated to display. */ 8647 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 8648 fixed20_12 yclk, dram_channels, bandwidth; 8649 fixed20_12 a; 8650 8651 a.full = dfixed_const(1000); 8652 yclk.full = dfixed_const(wm->yclk); 8653 yclk.full = dfixed_div(yclk, a); 8654 dram_channels.full = dfixed_const(wm->dram_channels * 4); 8655 a.full = dfixed_const(10); 8656 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 8657 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 8658 bandwidth.full = dfixed_mul(dram_channels, yclk); 8659 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 8660 8661 return dfixed_trunc(bandwidth); 8662 } 8663 8664 /** 8665 * dce8_data_return_bandwidth - get the data return bandwidth 8666 * 8667 * @wm: watermark calculation data 8668 * 8669 * Calculate the data return bandwidth used for display (CIK). 8670 * Used for display watermark bandwidth calculations 8671 * Returns the data return bandwidth in MBytes/s 8672 */ 8673 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 8674 { 8675 /* Calculate the display Data return Bandwidth */ 8676 fixed20_12 return_efficiency; /* 0.8 */ 8677 fixed20_12 sclk, bandwidth; 8678 fixed20_12 a; 8679 8680 a.full = dfixed_const(1000); 8681 sclk.full = dfixed_const(wm->sclk); 8682 sclk.full = dfixed_div(sclk, a); 8683 a.full = dfixed_const(10); 8684 return_efficiency.full = dfixed_const(8); 8685 return_efficiency.full = dfixed_div(return_efficiency, a); 8686 a.full = dfixed_const(32); 8687 bandwidth.full = dfixed_mul(a, sclk); 8688 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 8689 8690 return dfixed_trunc(bandwidth); 8691 } 8692 8693 /** 8694 * dce8_dmif_request_bandwidth - get the dmif bandwidth 8695 * 8696 * @wm: watermark calculation data 8697 * 8698 * Calculate the dmif bandwidth used for display (CIK). 8699 * Used for display watermark bandwidth calculations 8700 * Returns the dmif bandwidth in MBytes/s 8701 */ 8702 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 8703 { 8704 /* Calculate the DMIF Request Bandwidth */ 8705 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 8706 fixed20_12 disp_clk, bandwidth; 8707 fixed20_12 a, b; 8708 8709 a.full = dfixed_const(1000); 8710 disp_clk.full = dfixed_const(wm->disp_clk); 8711 disp_clk.full = dfixed_div(disp_clk, a); 8712 a.full = dfixed_const(32); 8713 b.full = dfixed_mul(a, disp_clk); 8714 8715 a.full = dfixed_const(10); 8716 disp_clk_request_efficiency.full = dfixed_const(8); 8717 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 8718 8719 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 8720 8721 return dfixed_trunc(bandwidth); 8722 } 8723 8724 /** 8725 * dce8_available_bandwidth - get the min available bandwidth 8726 * 8727 * @wm: watermark calculation data 8728 * 8729 * Calculate the min available bandwidth used for display (CIK). 8730 * Used for display watermark bandwidth calculations 8731 * Returns the min available bandwidth in MBytes/s 8732 */ 8733 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 8734 { 8735 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 8736 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 8737 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 8738 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 8739 8740 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 8741 } 8742 8743 /** 8744 * dce8_average_bandwidth - get the average available bandwidth 8745 * 8746 * @wm: watermark calculation data 8747 * 8748 * Calculate the average available bandwidth used for display (CIK). 8749 * Used for display watermark bandwidth calculations 8750 * Returns the average available bandwidth in MBytes/s 8751 */ 8752 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 8753 { 8754 /* Calculate the display mode Average Bandwidth 8755 * DisplayMode should contain the source and destination dimensions, 8756 * timing, etc. 8757 */ 8758 fixed20_12 bpp; 8759 fixed20_12 line_time; 8760 fixed20_12 src_width; 8761 fixed20_12 bandwidth; 8762 fixed20_12 a; 8763 8764 a.full = dfixed_const(1000); 8765 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 8766 line_time.full = dfixed_div(line_time, a); 8767 bpp.full = dfixed_const(wm->bytes_per_pixel); 8768 src_width.full = dfixed_const(wm->src_width); 8769 bandwidth.full = dfixed_mul(src_width, bpp); 8770 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 8771 bandwidth.full = dfixed_div(bandwidth, line_time); 8772 8773 return dfixed_trunc(bandwidth); 8774 } 8775 8776 /** 8777 * dce8_latency_watermark - get the latency watermark 8778 * 8779 * @wm: watermark calculation data 8780 * 8781 * Calculate the latency watermark (CIK). 8782 * Used for display watermark bandwidth calculations 8783 * Returns the latency watermark in ns 8784 */ 8785 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 8786 { 8787 /* First calculate the latency in ns */ 8788 u32 mc_latency = 2000; /* 2000 ns. */ 8789 u32 available_bandwidth = dce8_available_bandwidth(wm); 8790 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 8791 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 8792 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 8793 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 8794 (wm->num_heads * cursor_line_pair_return_time); 8795 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 8796 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 8797 u32 tmp, dmif_size = 12288; 8798 fixed20_12 a, b, c; 8799 8800 if (wm->num_heads == 0) 8801 return 0; 8802 8803 a.full = dfixed_const(2); 8804 b.full = dfixed_const(1); 8805 if ((wm->vsc.full > a.full) || 8806 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 8807 (wm->vtaps >= 5) || 8808 ((wm->vsc.full >= a.full) && wm->interlaced)) 8809 max_src_lines_per_dst_line = 4; 8810 else 8811 max_src_lines_per_dst_line = 2; 8812 8813 a.full = dfixed_const(available_bandwidth); 8814 b.full = dfixed_const(wm->num_heads); 8815 a.full = dfixed_div(a, b); 8816 8817 b.full = dfixed_const(mc_latency + 512); 8818 c.full = dfixed_const(wm->disp_clk); 8819 b.full = dfixed_div(b, c); 8820 8821 c.full = dfixed_const(dmif_size); 8822 b.full = dfixed_div(c, b); 8823 8824 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 8825 8826 b.full = dfixed_const(1000); 8827 c.full = dfixed_const(wm->disp_clk); 8828 b.full = dfixed_div(c, b); 8829 c.full = dfixed_const(wm->bytes_per_pixel); 8830 b.full = dfixed_mul(b, c); 8831 8832 lb_fill_bw = min(tmp, dfixed_trunc(b)); 8833 8834 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 8835 b.full = dfixed_const(1000); 8836 c.full = dfixed_const(lb_fill_bw); 8837 b.full = dfixed_div(c, b); 8838 a.full = dfixed_div(a, b); 8839 line_fill_time = dfixed_trunc(a); 8840 8841 if (line_fill_time < wm->active_time) 8842 return latency; 8843 else 8844 return latency + (line_fill_time - wm->active_time); 8845 8846 } 8847 8848 /** 8849 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 8850 * average and available dram bandwidth 8851 * 8852 * @wm: watermark calculation data 8853 * 8854 * Check if the display average bandwidth fits in the display 8855 * dram bandwidth (CIK). 8856 * Used for display watermark bandwidth calculations 8857 * Returns true if the display fits, false if not. 8858 */ 8859 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 8860 { 8861 if (dce8_average_bandwidth(wm) <= 8862 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 8863 return true; 8864 else 8865 return false; 8866 } 8867 8868 /** 8869 * dce8_average_bandwidth_vs_available_bandwidth - check 8870 * average and available bandwidth 8871 * 8872 * @wm: watermark calculation data 8873 * 8874 * Check if the display average bandwidth fits in the display 8875 * available bandwidth (CIK). 8876 * Used for display watermark bandwidth calculations 8877 * Returns true if the display fits, false if not. 8878 */ 8879 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 8880 { 8881 if (dce8_average_bandwidth(wm) <= 8882 (dce8_available_bandwidth(wm) / wm->num_heads)) 8883 return true; 8884 else 8885 return false; 8886 } 8887 8888 /** 8889 * dce8_check_latency_hiding - check latency hiding 8890 * 8891 * @wm: watermark calculation data 8892 * 8893 * Check latency hiding (CIK). 8894 * Used for display watermark bandwidth calculations 8895 * Returns true if the display fits, false if not. 8896 */ 8897 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 8898 { 8899 u32 lb_partitions = wm->lb_size / wm->src_width; 8900 u32 line_time = wm->active_time + wm->blank_time; 8901 u32 latency_tolerant_lines; 8902 u32 latency_hiding; 8903 fixed20_12 a; 8904 8905 a.full = dfixed_const(1); 8906 if (wm->vsc.full > a.full) 8907 latency_tolerant_lines = 1; 8908 else { 8909 if (lb_partitions <= (wm->vtaps + 1)) 8910 latency_tolerant_lines = 1; 8911 else 8912 latency_tolerant_lines = 2; 8913 } 8914 8915 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 8916 8917 if (dce8_latency_watermark(wm) <= latency_hiding) 8918 return true; 8919 else 8920 return false; 8921 } 8922 8923 /** 8924 * dce8_program_watermarks - program display watermarks 8925 * 8926 * @rdev: radeon_device pointer 8927 * @radeon_crtc: the selected display controller 8928 * @lb_size: line buffer size 8929 * @num_heads: number of display controllers in use 8930 * 8931 * Calculate and program the display watermarks for the 8932 * selected display controller (CIK). 8933 */ 8934 static void dce8_program_watermarks(struct radeon_device *rdev, 8935 struct radeon_crtc *radeon_crtc, 8936 u32 lb_size, u32 num_heads) 8937 { 8938 struct drm_display_mode *mode = &radeon_crtc->base.mode; 8939 struct dce8_wm_params wm_low, wm_high; 8940 u32 pixel_period; 8941 u32 line_time = 0; 8942 u32 latency_watermark_a = 0, latency_watermark_b = 0; 8943 u32 tmp, wm_mask; 8944 8945 if (radeon_crtc->base.enabled && num_heads && mode) { 8946 pixel_period = 1000000 / (u32)mode->clock; 8947 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 8948 8949 /* watermark for high clocks */ 8950 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 8951 rdev->pm.dpm_enabled) { 8952 wm_high.yclk = 8953 radeon_dpm_get_mclk(rdev, false) * 10; 8954 wm_high.sclk = 8955 radeon_dpm_get_sclk(rdev, false) * 10; 8956 } else { 8957 wm_high.yclk = rdev->pm.current_mclk * 10; 8958 wm_high.sclk = rdev->pm.current_sclk * 10; 8959 } 8960 8961 wm_high.disp_clk = mode->clock; 8962 wm_high.src_width = mode->crtc_hdisplay; 8963 wm_high.active_time = mode->crtc_hdisplay * pixel_period; 8964 wm_high.blank_time = line_time - wm_high.active_time; 8965 wm_high.interlaced = false; 8966 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 8967 wm_high.interlaced = true; 8968 wm_high.vsc = radeon_crtc->vsc; 8969 wm_high.vtaps = 1; 8970 if (radeon_crtc->rmx_type != RMX_OFF) 8971 wm_high.vtaps = 2; 8972 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ 8973 wm_high.lb_size = lb_size; 8974 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev); 8975 wm_high.num_heads = num_heads; 8976 8977 /* set for high clocks */ 8978 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535); 8979 8980 /* possibly force display priority to high */ 8981 /* should really do this at mode validation time... */ 8982 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || 8983 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) || 8984 !dce8_check_latency_hiding(&wm_high) || 8985 (rdev->disp_priority == 2)) { 8986 DRM_DEBUG_KMS("force priority to high\n"); 8987 } 8988 8989 /* watermark for low clocks */ 8990 if ((rdev->pm.pm_method == PM_METHOD_DPM) && 8991 rdev->pm.dpm_enabled) { 8992 wm_low.yclk = 8993 radeon_dpm_get_mclk(rdev, true) * 10; 8994 wm_low.sclk = 8995 radeon_dpm_get_sclk(rdev, true) * 10; 8996 } else { 8997 wm_low.yclk = rdev->pm.current_mclk * 10; 8998 wm_low.sclk = rdev->pm.current_sclk * 10; 8999 } 9000 9001 wm_low.disp_clk = mode->clock; 9002 wm_low.src_width = mode->crtc_hdisplay; 9003 wm_low.active_time = mode->crtc_hdisplay * pixel_period; 9004 wm_low.blank_time = line_time - wm_low.active_time; 9005 wm_low.interlaced = false; 9006 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 9007 wm_low.interlaced = true; 9008 wm_low.vsc = radeon_crtc->vsc; 9009 wm_low.vtaps = 1; 9010 if (radeon_crtc->rmx_type != RMX_OFF) 9011 wm_low.vtaps = 2; 9012 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ 9013 wm_low.lb_size = lb_size; 9014 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev); 9015 wm_low.num_heads = num_heads; 9016 9017 /* set for low clocks */ 9018 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535); 9019 9020 /* possibly force display priority to high */ 9021 /* should really do this at mode validation time... */ 9022 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || 9023 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) || 9024 !dce8_check_latency_hiding(&wm_low) || 9025 (rdev->disp_priority == 2)) { 9026 DRM_DEBUG_KMS("force priority to high\n"); 9027 } 9028 } 9029 9030 /* select wm A */ 9031 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 9032 tmp = wm_mask; 9033 tmp &= ~LATENCY_WATERMARK_MASK(3); 9034 tmp |= LATENCY_WATERMARK_MASK(1); 9035 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 9036 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 9037 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 9038 LATENCY_HIGH_WATERMARK(line_time))); 9039 /* select wm B */ 9040 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 9041 tmp &= ~LATENCY_WATERMARK_MASK(3); 9042 tmp |= LATENCY_WATERMARK_MASK(2); 9043 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 9044 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 9045 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 9046 LATENCY_HIGH_WATERMARK(line_time))); 9047 /* restore original selection */ 9048 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 9049 9050 /* save values for DPM */ 9051 radeon_crtc->line_time = line_time; 9052 radeon_crtc->wm_high = latency_watermark_a; 9053 radeon_crtc->wm_low = latency_watermark_b; 9054 } 9055 9056 /** 9057 * dce8_bandwidth_update - program display watermarks 9058 * 9059 * @rdev: radeon_device pointer 9060 * 9061 * Calculate and program the display watermarks and line 9062 * buffer allocation (CIK). 9063 */ 9064 void dce8_bandwidth_update(struct radeon_device *rdev) 9065 { 9066 struct drm_display_mode *mode = NULL; 9067 u32 num_heads = 0, lb_size; 9068 int i; 9069 9070 radeon_update_display_priority(rdev); 9071 9072 for (i = 0; i < rdev->num_crtc; i++) { 9073 if (rdev->mode_info.crtcs[i]->base.enabled) 9074 num_heads++; 9075 } 9076 for (i = 0; i < rdev->num_crtc; i++) { 9077 mode = &rdev->mode_info.crtcs[i]->base.mode; 9078 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 9079 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 9080 } 9081 } 9082 9083 /** 9084 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 9085 * 9086 * @rdev: radeon_device pointer 9087 * 9088 * Fetches a GPU clock counter snapshot (SI). 9089 * Returns the 64 bit clock counter snapshot. 9090 */ 9091 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 9092 { 9093 uint64_t clock; 9094 9095 spin_lock(&rdev->gpu_clock_mutex); 9096 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 9097 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 9098 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 9099 spin_unlock(&rdev->gpu_clock_mutex); 9100 return clock; 9101 } 9102 9103 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 9104 u32 cntl_reg, u32 status_reg) 9105 { 9106 int r, i; 9107 struct atom_clock_dividers dividers; 9108 uint32_t tmp; 9109 9110 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 9111 clock, false, ÷rs); 9112 if (r) 9113 return r; 9114 9115 tmp = RREG32_SMC(cntl_reg); 9116 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 9117 tmp |= dividers.post_divider; 9118 WREG32_SMC(cntl_reg, tmp); 9119 9120 for (i = 0; i < 100; i++) { 9121 if (RREG32_SMC(status_reg) & DCLK_STATUS) 9122 break; 9123 mdelay(10); 9124 } 9125 if (i == 100) 9126 return -ETIMEDOUT; 9127 9128 return 0; 9129 } 9130 9131 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 9132 { 9133 int r = 0; 9134 9135 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 9136 if (r) 9137 return r; 9138 9139 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 9140 return r; 9141 } 9142 9143 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) 9144 { 9145 int r, i; 9146 struct atom_clock_dividers dividers; 9147 u32 tmp; 9148 9149 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 9150 ecclk, false, ÷rs); 9151 if (r) 9152 return r; 9153 9154 for (i = 0; i < 100; i++) { 9155 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) 9156 break; 9157 mdelay(10); 9158 } 9159 if (i == 100) 9160 return -ETIMEDOUT; 9161 9162 tmp = RREG32_SMC(CG_ECLK_CNTL); 9163 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); 9164 tmp |= dividers.post_divider; 9165 WREG32_SMC(CG_ECLK_CNTL, tmp); 9166 9167 for (i = 0; i < 100; i++) { 9168 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) 9169 break; 9170 mdelay(10); 9171 } 9172 if (i == 100) 9173 return -ETIMEDOUT; 9174 9175 return 0; 9176 } 9177 9178 static void cik_pcie_gen3_enable(struct radeon_device *rdev) 9179 { 9180 struct pci_dev *root = rdev->pdev->bus->self; 9181 int bridge_pos, gpu_pos; 9182 u32 speed_cntl, mask, current_data_rate; 9183 int ret, i; 9184 u16 tmp16; 9185 9186 if (radeon_pcie_gen2 == 0) 9187 return; 9188 9189 if (rdev->flags & RADEON_IS_IGP) 9190 return; 9191 9192 if (!(rdev->flags & RADEON_IS_PCIE)) 9193 return; 9194 9195 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); 9196 if (ret != 0) 9197 return; 9198 9199 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) 9200 return; 9201 9202 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 9203 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 9204 LC_CURRENT_DATA_RATE_SHIFT; 9205 if (mask & DRM_PCIE_SPEED_80) { 9206 if (current_data_rate == 2) { 9207 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 9208 return; 9209 } 9210 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); 9211 } else if (mask & DRM_PCIE_SPEED_50) { 9212 if (current_data_rate == 1) { 9213 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 9214 return; 9215 } 9216 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); 9217 } 9218 9219 bridge_pos = pci_get_pciecap_ptr(root->dev); 9220 if (!bridge_pos) 9221 return; 9222 9223 gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev); 9224 if (!gpu_pos) 9225 return; 9226 9227 if (mask & DRM_PCIE_SPEED_80) { 9228 /* re-try equalization if gen3 is not already enabled */ 9229 if (current_data_rate != 2) { 9230 u16 bridge_cfg, gpu_cfg; 9231 u16 bridge_cfg2, gpu_cfg2; 9232 u32 max_lw, current_lw, tmp; 9233 9234 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 9235 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 9236 9237 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; 9238 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 9239 9240 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; 9241 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 9242 9243 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 9244 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; 9245 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; 9246 9247 if (current_lw < max_lw) { 9248 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 9249 if (tmp & LC_RENEGOTIATION_SUPPORT) { 9250 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); 9251 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); 9252 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; 9253 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); 9254 } 9255 } 9256 9257 for (i = 0; i < 10; i++) { 9258 /* check status */ 9259 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); 9260 if (tmp16 & PCI_EXP_DEVSTA_TRPND) 9261 break; 9262 9263 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); 9264 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); 9265 9266 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); 9267 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); 9268 9269 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 9270 tmp |= LC_SET_QUIESCE; 9271 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 9272 9273 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 9274 tmp |= LC_REDO_EQ; 9275 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 9276 9277 mdelay(100); 9278 9279 /* linkctl */ 9280 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); 9281 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 9282 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); 9283 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); 9284 9285 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); 9286 tmp16 &= ~PCI_EXP_LNKCTL_HAWD; 9287 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); 9288 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); 9289 9290 /* linkctl2 */ 9291 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); 9292 tmp16 &= ~((1 << 4) | (7 << 9)); 9293 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); 9294 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); 9295 9296 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 9297 tmp16 &= ~((1 << 4) | (7 << 9)); 9298 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); 9299 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 9300 9301 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); 9302 tmp &= ~LC_SET_QUIESCE; 9303 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); 9304 } 9305 } 9306 } 9307 9308 /* set the link speed */ 9309 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; 9310 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; 9311 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 9312 9313 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 9314 tmp16 &= ~0xf; 9315 if (mask & DRM_PCIE_SPEED_80) 9316 tmp16 |= 3; /* gen3 */ 9317 else if (mask & DRM_PCIE_SPEED_50) 9318 tmp16 |= 2; /* gen2 */ 9319 else 9320 tmp16 |= 1; /* gen1 */ 9321 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); 9322 9323 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 9324 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; 9325 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); 9326 9327 for (i = 0; i < rdev->usec_timeout; i++) { 9328 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 9329 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) 9330 break; 9331 udelay(1); 9332 } 9333 } 9334 9335 static void cik_program_aspm(struct radeon_device *rdev) 9336 { 9337 u32 data, orig; 9338 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; 9339 bool disable_clkreq = false; 9340 9341 if (radeon_aspm == 0) 9342 return; 9343 9344 /* XXX double check IGPs */ 9345 if (rdev->flags & RADEON_IS_IGP) 9346 return; 9347 9348 if (!(rdev->flags & RADEON_IS_PCIE)) 9349 return; 9350 9351 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 9352 data &= ~LC_XMIT_N_FTS_MASK; 9353 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; 9354 if (orig != data) 9355 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); 9356 9357 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); 9358 data |= LC_GO_TO_RECOVERY; 9359 if (orig != data) 9360 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); 9361 9362 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL); 9363 data |= P_IGNORE_EDB_ERR; 9364 if (orig != data) 9365 WREG32_PCIE_PORT(PCIE_P_CNTL, data); 9366 9367 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 9368 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); 9369 data |= LC_PMI_TO_L1_DIS; 9370 if (!disable_l0s) 9371 data |= LC_L0S_INACTIVITY(7); 9372 9373 if (!disable_l1) { 9374 data |= LC_L1_INACTIVITY(7); 9375 data &= ~LC_PMI_TO_L1_DIS; 9376 if (orig != data) 9377 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 9378 9379 if (!disable_plloff_in_l1) { 9380 bool clk_req_support; 9381 9382 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0); 9383 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 9384 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 9385 if (orig != data) 9386 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data); 9387 9388 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1); 9389 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 9390 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 9391 if (orig != data) 9392 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data); 9393 9394 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0); 9395 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); 9396 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); 9397 if (orig != data) 9398 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data); 9399 9400 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1); 9401 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); 9402 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); 9403 if (orig != data) 9404 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data); 9405 9406 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); 9407 data &= ~LC_DYN_LANES_PWR_STATE_MASK; 9408 data |= LC_DYN_LANES_PWR_STATE(3); 9409 if (orig != data) 9410 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); 9411 9412 if (!disable_clkreq) { 9413 #ifdef zMN_TODO 9414 struct pci_dev *root = rdev->pdev->bus->self; 9415 u32 lnkcap; 9416 9417 clk_req_support = false; 9418 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); 9419 if (lnkcap & PCI_EXP_LNKCAP_CLKPM) 9420 clk_req_support = true; 9421 #else 9422 clk_req_support = false; 9423 #endif 9424 } else { 9425 clk_req_support = false; 9426 } 9427 9428 if (clk_req_support) { 9429 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); 9430 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; 9431 if (orig != data) 9432 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); 9433 9434 orig = data = RREG32_SMC(THM_CLK_CNTL); 9435 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); 9436 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); 9437 if (orig != data) 9438 WREG32_SMC(THM_CLK_CNTL, data); 9439 9440 orig = data = RREG32_SMC(MISC_CLK_CTRL); 9441 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); 9442 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); 9443 if (orig != data) 9444 WREG32_SMC(MISC_CLK_CTRL, data); 9445 9446 orig = data = RREG32_SMC(CG_CLKPIN_CNTL); 9447 data &= ~BCLK_AS_XCLK; 9448 if (orig != data) 9449 WREG32_SMC(CG_CLKPIN_CNTL, data); 9450 9451 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2); 9452 data &= ~FORCE_BIF_REFCLK_EN; 9453 if (orig != data) 9454 WREG32_SMC(CG_CLKPIN_CNTL_2, data); 9455 9456 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL); 9457 data &= ~MPLL_CLKOUT_SEL_MASK; 9458 data |= MPLL_CLKOUT_SEL(4); 9459 if (orig != data) 9460 WREG32_SMC(MPLL_BYPASSCLK_SEL, data); 9461 } 9462 } 9463 } else { 9464 if (orig != data) 9465 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 9466 } 9467 9468 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2); 9469 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; 9470 if (orig != data) 9471 WREG32_PCIE_PORT(PCIE_CNTL2, data); 9472 9473 if (!disable_l0s) { 9474 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); 9475 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { 9476 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1); 9477 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { 9478 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); 9479 data &= ~LC_L0S_INACTIVITY_MASK; 9480 if (orig != data) 9481 WREG32_PCIE_PORT(PCIE_LC_CNTL, data); 9482 } 9483 } 9484 } 9485 } 9486