1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include <drm/drmP.h> 27 #include "radeon.h" 28 #include "radeon_asic.h" 29 #include "cikd.h" 30 #include "atom.h" 31 #include "cik_blit_shaders.h" 32 33 /* GFX */ 34 #define CIK_PFP_UCODE_SIZE 2144 35 #define CIK_ME_UCODE_SIZE 2144 36 #define CIK_CE_UCODE_SIZE 2144 37 /* compute */ 38 #define CIK_MEC_UCODE_SIZE 4192 39 /* interrupts */ 40 #define BONAIRE_RLC_UCODE_SIZE 2048 41 #define KB_RLC_UCODE_SIZE 2560 42 #define KV_RLC_UCODE_SIZE 2560 43 /* gddr controller */ 44 #define CIK_MC_UCODE_SIZE 7866 45 /* sdma */ 46 #define CIK_SDMA_UCODE_SIZE 1050 47 #define CIK_SDMA_UCODE_VERSION 64 48 49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); 50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); 51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin"); 52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin"); 53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin"); 54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin"); 55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin"); 56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin"); 57 MODULE_FIRMWARE("radeon/KAVERI_me.bin"); 58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin"); 59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin"); 60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin"); 61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin"); 62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin"); 63 MODULE_FIRMWARE("radeon/KABINI_me.bin"); 64 MODULE_FIRMWARE("radeon/KABINI_ce.bin"); 65 MODULE_FIRMWARE("radeon/KABINI_mec.bin"); 66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin"); 67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin"); 68 69 static void cik_rlc_stop(struct radeon_device *rdev); 70 71 /* 72 * Indirect registers accessor 73 */ 74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) 75 { 76 u32 r; 77 78 WREG32(PCIE_INDEX, reg); 79 (void)RREG32(PCIE_INDEX); 80 r = RREG32(PCIE_DATA); 81 return r; 82 } 83 84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) 85 { 86 WREG32(PCIE_INDEX, reg); 87 (void)RREG32(PCIE_INDEX); 88 WREG32(PCIE_DATA, v); 89 (void)RREG32(PCIE_DATA); 90 } 91 92 static const u32 bonaire_golden_spm_registers[] = 93 { 94 0x30800, 0xe0ffffff, 0xe0000000 95 }; 96 97 static const u32 bonaire_golden_common_registers[] = 98 { 99 0xc770, 0xffffffff, 0x00000800, 100 0xc774, 0xffffffff, 0x00000800, 101 0xc798, 0xffffffff, 0x00007fbf, 102 0xc79c, 0xffffffff, 0x00007faf 103 }; 104 105 static const u32 bonaire_golden_registers[] = 106 { 107 0x3354, 0x00000333, 0x00000333, 108 0x3350, 0x000c0fc0, 0x00040200, 109 0x9a10, 0x00010000, 0x00058208, 110 0x3c000, 0xffff1fff, 0x00140000, 111 0x3c200, 0xfdfc0fff, 0x00000100, 112 0x3c234, 0x40000000, 0x40000200, 113 0x9830, 0xffffffff, 0x00000000, 114 0x9834, 0xf00fffff, 0x00000400, 115 0x9838, 0x0002021c, 0x00020200, 116 0xc78, 0x00000080, 0x00000000, 117 0x5bb0, 0x000000f0, 0x00000070, 118 0x5bc0, 0xf0311fff, 0x80300000, 119 0x98f8, 0x73773777, 0x12010001, 120 0x350c, 0x00810000, 0x408af000, 121 0x7030, 0x31000111, 0x00000011, 122 0x2f48, 0x73773777, 0x12010001, 123 0x220c, 0x00007fb6, 0x0021a1b1, 124 0x2210, 0x00007fb6, 0x002021b1, 125 0x2180, 0x00007fb6, 0x00002191, 126 0x2218, 0x00007fb6, 0x002121b1, 127 0x221c, 0x00007fb6, 0x002021b1, 128 0x21dc, 0x00007fb6, 0x00002191, 129 0x21e0, 0x00007fb6, 0x00002191, 130 0x3628, 0x0000003f, 0x0000000a, 131 0x362c, 0x0000003f, 0x0000000a, 132 0x2ae4, 0x00073ffe, 0x000022a2, 133 0x240c, 0x000007ff, 0x00000000, 134 0x8a14, 0xf000003f, 0x00000007, 135 0x8bf0, 0x00002001, 0x00000001, 136 0x8b24, 0xffffffff, 0x00ffffff, 137 0x30a04, 0x0000ff0f, 0x00000000, 138 0x28a4c, 0x07ffffff, 0x06000000, 139 0x4d8, 0x00000fff, 0x00000100, 140 0x3e78, 0x00000001, 0x00000002, 141 0x9100, 0x03000000, 0x0362c688, 142 0x8c00, 0x000000ff, 0x00000001, 143 0xe40, 0x00001fff, 0x00001fff, 144 0x9060, 0x0000007f, 0x00000020, 145 0x9508, 0x00010000, 0x00010000, 146 0xac14, 0x000003ff, 0x000000f3, 147 0xac0c, 0xffffffff, 0x00001032 148 }; 149 150 static const u32 bonaire_mgcg_cgcg_init[] = 151 { 152 0xc420, 0xffffffff, 0xfffffffc, 153 0x30800, 0xffffffff, 0xe0000000, 154 0x3c2a0, 0xffffffff, 0x00000100, 155 0x3c208, 0xffffffff, 0x00000100, 156 0x3c2c0, 0xffffffff, 0xc0000100, 157 0x3c2c8, 0xffffffff, 0xc0000100, 158 0x3c2c4, 0xffffffff, 0xc0000100, 159 0x55e4, 0xffffffff, 0x00600100, 160 0x3c280, 0xffffffff, 0x00000100, 161 0x3c214, 0xffffffff, 0x06000100, 162 0x3c220, 0xffffffff, 0x00000100, 163 0x3c218, 0xffffffff, 0x06000100, 164 0x3c204, 0xffffffff, 0x00000100, 165 0x3c2e0, 0xffffffff, 0x00000100, 166 0x3c224, 0xffffffff, 0x00000100, 167 0x3c200, 0xffffffff, 0x00000100, 168 0x3c230, 0xffffffff, 0x00000100, 169 0x3c234, 0xffffffff, 0x00000100, 170 0x3c250, 0xffffffff, 0x00000100, 171 0x3c254, 0xffffffff, 0x00000100, 172 0x3c258, 0xffffffff, 0x00000100, 173 0x3c25c, 0xffffffff, 0x00000100, 174 0x3c260, 0xffffffff, 0x00000100, 175 0x3c27c, 0xffffffff, 0x00000100, 176 0x3c278, 0xffffffff, 0x00000100, 177 0x3c210, 0xffffffff, 0x06000100, 178 0x3c290, 0xffffffff, 0x00000100, 179 0x3c274, 0xffffffff, 0x00000100, 180 0x3c2b4, 0xffffffff, 0x00000100, 181 0x3c2b0, 0xffffffff, 0x00000100, 182 0x3c270, 0xffffffff, 0x00000100, 183 0x30800, 0xffffffff, 0xe0000000, 184 0x3c020, 0xffffffff, 0x00010000, 185 0x3c024, 0xffffffff, 0x00030002, 186 0x3c028, 0xffffffff, 0x00040007, 187 0x3c02c, 0xffffffff, 0x00060005, 188 0x3c030, 0xffffffff, 0x00090008, 189 0x3c034, 0xffffffff, 0x00010000, 190 0x3c038, 0xffffffff, 0x00030002, 191 0x3c03c, 0xffffffff, 0x00040007, 192 0x3c040, 0xffffffff, 0x00060005, 193 0x3c044, 0xffffffff, 0x00090008, 194 0x3c048, 0xffffffff, 0x00010000, 195 0x3c04c, 0xffffffff, 0x00030002, 196 0x3c050, 0xffffffff, 0x00040007, 197 0x3c054, 0xffffffff, 0x00060005, 198 0x3c058, 0xffffffff, 0x00090008, 199 0x3c05c, 0xffffffff, 0x00010000, 200 0x3c060, 0xffffffff, 0x00030002, 201 0x3c064, 0xffffffff, 0x00040007, 202 0x3c068, 0xffffffff, 0x00060005, 203 0x3c06c, 0xffffffff, 0x00090008, 204 0x3c070, 0xffffffff, 0x00010000, 205 0x3c074, 0xffffffff, 0x00030002, 206 0x3c078, 0xffffffff, 0x00040007, 207 0x3c07c, 0xffffffff, 0x00060005, 208 0x3c080, 0xffffffff, 0x00090008, 209 0x3c084, 0xffffffff, 0x00010000, 210 0x3c088, 0xffffffff, 0x00030002, 211 0x3c08c, 0xffffffff, 0x00040007, 212 0x3c090, 0xffffffff, 0x00060005, 213 0x3c094, 0xffffffff, 0x00090008, 214 0x3c098, 0xffffffff, 0x00010000, 215 0x3c09c, 0xffffffff, 0x00030002, 216 0x3c0a0, 0xffffffff, 0x00040007, 217 0x3c0a4, 0xffffffff, 0x00060005, 218 0x3c0a8, 0xffffffff, 0x00090008, 219 0x3c000, 0xffffffff, 0x96e00200, 220 0x8708, 0xffffffff, 0x00900100, 221 0xc424, 0xffffffff, 0x0020003f, 222 0x38, 0xffffffff, 0x0140001c, 223 0x3c, 0x000f0000, 0x000f0000, 224 0x220, 0xffffffff, 0xC060000C, 225 0x224, 0xc0000fff, 0x00000100, 226 0xf90, 0xffffffff, 0x00000100, 227 0xf98, 0x00000101, 0x00000000, 228 0x20a8, 0xffffffff, 0x00000104, 229 0x55e4, 0xff000fff, 0x00000100, 230 0x30cc, 0xc0000fff, 0x00000104, 231 0xc1e4, 0x00000001, 0x00000001, 232 0xd00c, 0xff000ff0, 0x00000100, 233 0xd80c, 0xff000ff0, 0x00000100 234 }; 235 236 static const u32 spectre_golden_spm_registers[] = 237 { 238 0x30800, 0xe0ffffff, 0xe0000000 239 }; 240 241 static const u32 spectre_golden_common_registers[] = 242 { 243 0xc770, 0xffffffff, 0x00000800, 244 0xc774, 0xffffffff, 0x00000800, 245 0xc798, 0xffffffff, 0x00007fbf, 246 0xc79c, 0xffffffff, 0x00007faf 247 }; 248 249 static const u32 spectre_golden_registers[] = 250 { 251 0x3c000, 0xffff1fff, 0x96940200, 252 0x3c00c, 0xffff0001, 0xff000000, 253 0x3c200, 0xfffc0fff, 0x00000100, 254 0x6ed8, 0x00010101, 0x00010000, 255 0x9834, 0xf00fffff, 0x00000400, 256 0x9838, 0xfffffffc, 0x00020200, 257 0x5bb0, 0x000000f0, 0x00000070, 258 0x5bc0, 0xf0311fff, 0x80300000, 259 0x98f8, 0x73773777, 0x12010001, 260 0x9b7c, 0x00ff0000, 0x00fc0000, 261 0x2f48, 0x73773777, 0x12010001, 262 0x8a14, 0xf000003f, 0x00000007, 263 0x8b24, 0xffffffff, 0x00ffffff, 264 0x28350, 0x3f3f3fff, 0x00000082, 265 0x28355, 0x0000003f, 0x00000000, 266 0x3e78, 0x00000001, 0x00000002, 267 0x913c, 0xffff03df, 0x00000004, 268 0xc768, 0x00000008, 0x00000008, 269 0x8c00, 0x000008ff, 0x00000800, 270 0x9508, 0x00010000, 0x00010000, 271 0xac0c, 0xffffffff, 0x54763210, 272 0x214f8, 0x01ff01ff, 0x00000002, 273 0x21498, 0x007ff800, 0x00200000, 274 0x2015c, 0xffffffff, 0x00000f40, 275 0x30934, 0xffffffff, 0x00000001 276 }; 277 278 static const u32 spectre_mgcg_cgcg_init[] = 279 { 280 0xc420, 0xffffffff, 0xfffffffc, 281 0x30800, 0xffffffff, 0xe0000000, 282 0x3c2a0, 0xffffffff, 0x00000100, 283 0x3c208, 0xffffffff, 0x00000100, 284 0x3c2c0, 0xffffffff, 0x00000100, 285 0x3c2c8, 0xffffffff, 0x00000100, 286 0x3c2c4, 0xffffffff, 0x00000100, 287 0x55e4, 0xffffffff, 0x00600100, 288 0x3c280, 0xffffffff, 0x00000100, 289 0x3c214, 0xffffffff, 0x06000100, 290 0x3c220, 0xffffffff, 0x00000100, 291 0x3c218, 0xffffffff, 0x06000100, 292 0x3c204, 0xffffffff, 0x00000100, 293 0x3c2e0, 0xffffffff, 0x00000100, 294 0x3c224, 0xffffffff, 0x00000100, 295 0x3c200, 0xffffffff, 0x00000100, 296 0x3c230, 0xffffffff, 0x00000100, 297 0x3c234, 0xffffffff, 0x00000100, 298 0x3c250, 0xffffffff, 0x00000100, 299 0x3c254, 0xffffffff, 0x00000100, 300 0x3c258, 0xffffffff, 0x00000100, 301 0x3c25c, 0xffffffff, 0x00000100, 302 0x3c260, 0xffffffff, 0x00000100, 303 0x3c27c, 0xffffffff, 0x00000100, 304 0x3c278, 0xffffffff, 0x00000100, 305 0x3c210, 0xffffffff, 0x06000100, 306 0x3c290, 0xffffffff, 0x00000100, 307 0x3c274, 0xffffffff, 0x00000100, 308 0x3c2b4, 0xffffffff, 0x00000100, 309 0x3c2b0, 0xffffffff, 0x00000100, 310 0x3c270, 0xffffffff, 0x00000100, 311 0x30800, 0xffffffff, 0xe0000000, 312 0x3c020, 0xffffffff, 0x00010000, 313 0x3c024, 0xffffffff, 0x00030002, 314 0x3c028, 0xffffffff, 0x00040007, 315 0x3c02c, 0xffffffff, 0x00060005, 316 0x3c030, 0xffffffff, 0x00090008, 317 0x3c034, 0xffffffff, 0x00010000, 318 0x3c038, 0xffffffff, 0x00030002, 319 0x3c03c, 0xffffffff, 0x00040007, 320 0x3c040, 0xffffffff, 0x00060005, 321 0x3c044, 0xffffffff, 0x00090008, 322 0x3c048, 0xffffffff, 0x00010000, 323 0x3c04c, 0xffffffff, 0x00030002, 324 0x3c050, 0xffffffff, 0x00040007, 325 0x3c054, 0xffffffff, 0x00060005, 326 0x3c058, 0xffffffff, 0x00090008, 327 0x3c05c, 0xffffffff, 0x00010000, 328 0x3c060, 0xffffffff, 0x00030002, 329 0x3c064, 0xffffffff, 0x00040007, 330 0x3c068, 0xffffffff, 0x00060005, 331 0x3c06c, 0xffffffff, 0x00090008, 332 0x3c070, 0xffffffff, 0x00010000, 333 0x3c074, 0xffffffff, 0x00030002, 334 0x3c078, 0xffffffff, 0x00040007, 335 0x3c07c, 0xffffffff, 0x00060005, 336 0x3c080, 0xffffffff, 0x00090008, 337 0x3c084, 0xffffffff, 0x00010000, 338 0x3c088, 0xffffffff, 0x00030002, 339 0x3c08c, 0xffffffff, 0x00040007, 340 0x3c090, 0xffffffff, 0x00060005, 341 0x3c094, 0xffffffff, 0x00090008, 342 0x3c098, 0xffffffff, 0x00010000, 343 0x3c09c, 0xffffffff, 0x00030002, 344 0x3c0a0, 0xffffffff, 0x00040007, 345 0x3c0a4, 0xffffffff, 0x00060005, 346 0x3c0a8, 0xffffffff, 0x00090008, 347 0x3c0ac, 0xffffffff, 0x00010000, 348 0x3c0b0, 0xffffffff, 0x00030002, 349 0x3c0b4, 0xffffffff, 0x00040007, 350 0x3c0b8, 0xffffffff, 0x00060005, 351 0x3c0bc, 0xffffffff, 0x00090008, 352 0x3c000, 0xffffffff, 0x96e00200, 353 0x8708, 0xffffffff, 0x00900100, 354 0xc424, 0xffffffff, 0x0020003f, 355 0x38, 0xffffffff, 0x0140001c, 356 0x3c, 0x000f0000, 0x000f0000, 357 0x220, 0xffffffff, 0xC060000C, 358 0x224, 0xc0000fff, 0x00000100, 359 0xf90, 0xffffffff, 0x00000100, 360 0xf98, 0x00000101, 0x00000000, 361 0x20a8, 0xffffffff, 0x00000104, 362 0x55e4, 0xff000fff, 0x00000100, 363 0x30cc, 0xc0000fff, 0x00000104, 364 0xc1e4, 0x00000001, 0x00000001, 365 0xd00c, 0xff000ff0, 0x00000100, 366 0xd80c, 0xff000ff0, 0x00000100 367 }; 368 369 static const u32 kalindi_golden_spm_registers[] = 370 { 371 0x30800, 0xe0ffffff, 0xe0000000 372 }; 373 374 static const u32 kalindi_golden_common_registers[] = 375 { 376 0xc770, 0xffffffff, 0x00000800, 377 0xc774, 0xffffffff, 0x00000800, 378 0xc798, 0xffffffff, 0x00007fbf, 379 0xc79c, 0xffffffff, 0x00007faf 380 }; 381 382 static const u32 kalindi_golden_registers[] = 383 { 384 0x3c000, 0xffffdfff, 0x6e944040, 385 0x55e4, 0xff607fff, 0xfc000100, 386 0x3c220, 0xff000fff, 0x00000100, 387 0x3c224, 0xff000fff, 0x00000100, 388 0x3c200, 0xfffc0fff, 0x00000100, 389 0x6ed8, 0x00010101, 0x00010000, 390 0x9830, 0xffffffff, 0x00000000, 391 0x9834, 0xf00fffff, 0x00000400, 392 0x5bb0, 0x000000f0, 0x00000070, 393 0x5bc0, 0xf0311fff, 0x80300000, 394 0x98f8, 0x73773777, 0x12010001, 395 0x98fc, 0xffffffff, 0x00000010, 396 0x9b7c, 0x00ff0000, 0x00fc0000, 397 0x8030, 0x00001f0f, 0x0000100a, 398 0x2f48, 0x73773777, 0x12010001, 399 0x2408, 0x000fffff, 0x000c007f, 400 0x8a14, 0xf000003f, 0x00000007, 401 0x8b24, 0x3fff3fff, 0x00ffcfff, 402 0x30a04, 0x0000ff0f, 0x00000000, 403 0x28a4c, 0x07ffffff, 0x06000000, 404 0x4d8, 0x00000fff, 0x00000100, 405 0x3e78, 0x00000001, 0x00000002, 406 0xc768, 0x00000008, 0x00000008, 407 0x8c00, 0x000000ff, 0x00000003, 408 0x214f8, 0x01ff01ff, 0x00000002, 409 0x21498, 0x007ff800, 0x00200000, 410 0x2015c, 0xffffffff, 0x00000f40, 411 0x88c4, 0x001f3ae3, 0x00000082, 412 0x88d4, 0x0000001f, 0x00000010, 413 0x30934, 0xffffffff, 0x00000000 414 }; 415 416 static const u32 kalindi_mgcg_cgcg_init[] = 417 { 418 0xc420, 0xffffffff, 0xfffffffc, 419 0x30800, 0xffffffff, 0xe0000000, 420 0x3c2a0, 0xffffffff, 0x00000100, 421 0x3c208, 0xffffffff, 0x00000100, 422 0x3c2c0, 0xffffffff, 0x00000100, 423 0x3c2c8, 0xffffffff, 0x00000100, 424 0x3c2c4, 0xffffffff, 0x00000100, 425 0x55e4, 0xffffffff, 0x00600100, 426 0x3c280, 0xffffffff, 0x00000100, 427 0x3c214, 0xffffffff, 0x06000100, 428 0x3c220, 0xffffffff, 0x00000100, 429 0x3c218, 0xffffffff, 0x06000100, 430 0x3c204, 0xffffffff, 0x00000100, 431 0x3c2e0, 0xffffffff, 0x00000100, 432 0x3c224, 0xffffffff, 0x00000100, 433 0x3c200, 0xffffffff, 0x00000100, 434 0x3c230, 0xffffffff, 0x00000100, 435 0x3c234, 0xffffffff, 0x00000100, 436 0x3c250, 0xffffffff, 0x00000100, 437 0x3c254, 0xffffffff, 0x00000100, 438 0x3c258, 0xffffffff, 0x00000100, 439 0x3c25c, 0xffffffff, 0x00000100, 440 0x3c260, 0xffffffff, 0x00000100, 441 0x3c27c, 0xffffffff, 0x00000100, 442 0x3c278, 0xffffffff, 0x00000100, 443 0x3c210, 0xffffffff, 0x06000100, 444 0x3c290, 0xffffffff, 0x00000100, 445 0x3c274, 0xffffffff, 0x00000100, 446 0x3c2b4, 0xffffffff, 0x00000100, 447 0x3c2b0, 0xffffffff, 0x00000100, 448 0x3c270, 0xffffffff, 0x00000100, 449 0x30800, 0xffffffff, 0xe0000000, 450 0x3c020, 0xffffffff, 0x00010000, 451 0x3c024, 0xffffffff, 0x00030002, 452 0x3c028, 0xffffffff, 0x00040007, 453 0x3c02c, 0xffffffff, 0x00060005, 454 0x3c030, 0xffffffff, 0x00090008, 455 0x3c034, 0xffffffff, 0x00010000, 456 0x3c038, 0xffffffff, 0x00030002, 457 0x3c03c, 0xffffffff, 0x00040007, 458 0x3c040, 0xffffffff, 0x00060005, 459 0x3c044, 0xffffffff, 0x00090008, 460 0x3c000, 0xffffffff, 0x96e00200, 461 0x8708, 0xffffffff, 0x00900100, 462 0xc424, 0xffffffff, 0x0020003f, 463 0x38, 0xffffffff, 0x0140001c, 464 0x3c, 0x000f0000, 0x000f0000, 465 0x220, 0xffffffff, 0xC060000C, 466 0x224, 0xc0000fff, 0x00000100, 467 0x20a8, 0xffffffff, 0x00000104, 468 0x55e4, 0xff000fff, 0x00000100, 469 0x30cc, 0xc0000fff, 0x00000104, 470 0xc1e4, 0x00000001, 0x00000001, 471 0xd00c, 0xff000ff0, 0x00000100, 472 0xd80c, 0xff000ff0, 0x00000100 473 }; 474 475 static void cik_init_golden_registers(struct radeon_device *rdev) 476 { 477 switch (rdev->family) { 478 case CHIP_BONAIRE: 479 radeon_program_register_sequence(rdev, 480 bonaire_mgcg_cgcg_init, 481 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); 482 radeon_program_register_sequence(rdev, 483 bonaire_golden_registers, 484 (const u32)ARRAY_SIZE(bonaire_golden_registers)); 485 radeon_program_register_sequence(rdev, 486 bonaire_golden_common_registers, 487 (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); 488 radeon_program_register_sequence(rdev, 489 bonaire_golden_spm_registers, 490 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); 491 break; 492 case CHIP_KABINI: 493 radeon_program_register_sequence(rdev, 494 kalindi_mgcg_cgcg_init, 495 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); 496 radeon_program_register_sequence(rdev, 497 kalindi_golden_registers, 498 (const u32)ARRAY_SIZE(kalindi_golden_registers)); 499 radeon_program_register_sequence(rdev, 500 kalindi_golden_common_registers, 501 (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); 502 radeon_program_register_sequence(rdev, 503 kalindi_golden_spm_registers, 504 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); 505 break; 506 case CHIP_KAVERI: 507 radeon_program_register_sequence(rdev, 508 spectre_mgcg_cgcg_init, 509 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); 510 radeon_program_register_sequence(rdev, 511 spectre_golden_registers, 512 (const u32)ARRAY_SIZE(spectre_golden_registers)); 513 radeon_program_register_sequence(rdev, 514 spectre_golden_common_registers, 515 (const u32)ARRAY_SIZE(spectre_golden_common_registers)); 516 radeon_program_register_sequence(rdev, 517 spectre_golden_spm_registers, 518 (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); 519 break; 520 default: 521 break; 522 } 523 } 524 525 /** 526 * cik_get_xclk - get the xclk 527 * 528 * @rdev: radeon_device pointer 529 * 530 * Returns the reference clock used by the gfx engine 531 * (CIK). 532 */ 533 u32 cik_get_xclk(struct radeon_device *rdev) 534 { 535 u32 reference_clock = rdev->clock.spll.reference_freq; 536 537 if (rdev->flags & RADEON_IS_IGP) { 538 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK) 539 return reference_clock / 2; 540 } else { 541 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE) 542 return reference_clock / 4; 543 } 544 return reference_clock; 545 } 546 547 /** 548 * cik_mm_rdoorbell - read a doorbell dword 549 * 550 * @rdev: radeon_device pointer 551 * @offset: byte offset into the aperture 552 * 553 * Returns the value in the doorbell aperture at the 554 * requested offset (CIK). 555 */ 556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 557 { 558 if (offset < rdev->doorbell.size) { 559 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset); 560 } else { 561 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 562 return 0; 563 } 564 } 565 566 /** 567 * cik_mm_wdoorbell - write a doorbell dword 568 * 569 * @rdev: radeon_device pointer 570 * @offset: byte offset into the aperture 571 * @v: value to write 572 * 573 * Writes @v to the doorbell aperture at the 574 * requested offset (CIK). 575 */ 576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 577 { 578 if (offset < rdev->doorbell.size) { 579 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset); 580 } else { 581 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 582 } 583 } 584 585 #define BONAIRE_IO_MC_REGS_SIZE 36 586 587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] = 588 { 589 {0x00000070, 0x04400000}, 590 {0x00000071, 0x80c01803}, 591 {0x00000072, 0x00004004}, 592 {0x00000073, 0x00000100}, 593 {0x00000074, 0x00ff0000}, 594 {0x00000075, 0x34000000}, 595 {0x00000076, 0x08000014}, 596 {0x00000077, 0x00cc08ec}, 597 {0x00000078, 0x00000400}, 598 {0x00000079, 0x00000000}, 599 {0x0000007a, 0x04090000}, 600 {0x0000007c, 0x00000000}, 601 {0x0000007e, 0x4408a8e8}, 602 {0x0000007f, 0x00000304}, 603 {0x00000080, 0x00000000}, 604 {0x00000082, 0x00000001}, 605 {0x00000083, 0x00000002}, 606 {0x00000084, 0xf3e4f400}, 607 {0x00000085, 0x052024e3}, 608 {0x00000087, 0x00000000}, 609 {0x00000088, 0x01000000}, 610 {0x0000008a, 0x1c0a0000}, 611 {0x0000008b, 0xff010000}, 612 {0x0000008d, 0xffffefff}, 613 {0x0000008e, 0xfff3efff}, 614 {0x0000008f, 0xfff3efbf}, 615 {0x00000092, 0xf7ffffff}, 616 {0x00000093, 0xffffff7f}, 617 {0x00000095, 0x00101101}, 618 {0x00000096, 0x00000fff}, 619 {0x00000097, 0x00116fff}, 620 {0x00000098, 0x60010000}, 621 {0x00000099, 0x10010000}, 622 {0x0000009a, 0x00006000}, 623 {0x0000009b, 0x00001000}, 624 {0x0000009f, 0x00b48000} 625 }; 626 627 /** 628 * cik_srbm_select - select specific register instances 629 * 630 * @rdev: radeon_device pointer 631 * @me: selected ME (micro engine) 632 * @pipe: pipe 633 * @queue: queue 634 * @vmid: VMID 635 * 636 * Switches the currently active registers instances. Some 637 * registers are instanced per VMID, others are instanced per 638 * me/pipe/queue combination. 639 */ 640 static void cik_srbm_select(struct radeon_device *rdev, 641 u32 me, u32 pipe, u32 queue, u32 vmid) 642 { 643 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) | 644 MEID(me & 0x3) | 645 VMID(vmid & 0xf) | 646 QUEUEID(queue & 0x7)); 647 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl); 648 } 649 650 /* ucode loading */ 651 /** 652 * ci_mc_load_microcode - load MC ucode into the hw 653 * 654 * @rdev: radeon_device pointer 655 * 656 * Load the GDDR MC ucode into the hw (CIK). 657 * Returns 0 on success, error on failure. 658 */ 659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev) 660 { 661 const __be32 *fw_data; 662 u32 running, blackout = 0; 663 u32 *io_mc_regs; 664 int i, ucode_size, regs_size; 665 666 if (!rdev->mc_fw) 667 return -EINVAL; 668 669 switch (rdev->family) { 670 case CHIP_BONAIRE: 671 default: 672 io_mc_regs = (u32 *)&bonaire_io_mc_regs; 673 ucode_size = CIK_MC_UCODE_SIZE; 674 regs_size = BONAIRE_IO_MC_REGS_SIZE; 675 break; 676 } 677 678 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK; 679 680 if (running == 0) { 681 if (running) { 682 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL); 683 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1); 684 } 685 686 /* reset the engine and set to writable */ 687 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 688 WREG32(MC_SEQ_SUP_CNTL, 0x00000010); 689 690 /* load mc io regs */ 691 for (i = 0; i < regs_size; i++) { 692 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]); 693 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]); 694 } 695 /* load the MC ucode */ 696 fw_data = (const __be32 *)rdev->mc_fw->data; 697 for (i = 0; i < ucode_size; i++) 698 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++)); 699 700 /* put the engine back into the active state */ 701 WREG32(MC_SEQ_SUP_CNTL, 0x00000008); 702 WREG32(MC_SEQ_SUP_CNTL, 0x00000004); 703 WREG32(MC_SEQ_SUP_CNTL, 0x00000001); 704 705 /* wait for training to complete */ 706 for (i = 0; i < rdev->usec_timeout; i++) { 707 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0) 708 break; 709 udelay(1); 710 } 711 for (i = 0; i < rdev->usec_timeout; i++) { 712 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1) 713 break; 714 udelay(1); 715 } 716 717 if (running) 718 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout); 719 } 720 721 return 0; 722 } 723 724 /** 725 * cik_init_microcode - load ucode images from disk 726 * 727 * @rdev: radeon_device pointer 728 * 729 * Use the firmware interface to load the ucode images into 730 * the driver (not loaded into hw). 731 * Returns 0 on success, error on failure. 732 */ 733 static int cik_init_microcode(struct radeon_device *rdev) 734 { 735 const char *chip_name; 736 size_t pfp_req_size, me_req_size, ce_req_size, 737 mec_req_size, rlc_req_size, mc_req_size, 738 sdma_req_size; 739 char fw_name[30]; 740 int err; 741 742 DRM_DEBUG("\n"); 743 744 switch (rdev->family) { 745 case CHIP_BONAIRE: 746 chip_name = "BONAIRE"; 747 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 748 me_req_size = CIK_ME_UCODE_SIZE * 4; 749 ce_req_size = CIK_CE_UCODE_SIZE * 4; 750 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 751 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4; 752 mc_req_size = CIK_MC_UCODE_SIZE * 4; 753 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 754 break; 755 case CHIP_KAVERI: 756 chip_name = "KAVERI"; 757 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 758 me_req_size = CIK_ME_UCODE_SIZE * 4; 759 ce_req_size = CIK_CE_UCODE_SIZE * 4; 760 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 761 rlc_req_size = KV_RLC_UCODE_SIZE * 4; 762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 763 break; 764 case CHIP_KABINI: 765 chip_name = "KABINI"; 766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4; 767 me_req_size = CIK_ME_UCODE_SIZE * 4; 768 ce_req_size = CIK_CE_UCODE_SIZE * 4; 769 mec_req_size = CIK_MEC_UCODE_SIZE * 4; 770 rlc_req_size = KB_RLC_UCODE_SIZE * 4; 771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4; 772 break; 773 default: BUG(); 774 } 775 776 DRM_INFO("Loading %s Microcode\n", chip_name); 777 778 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name); 779 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev); 780 if (err) 781 goto out; 782 if (rdev->pfp_fw->datasize != pfp_req_size) { 783 printk(KERN_ERR 784 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 785 rdev->pfp_fw->datasize, fw_name); 786 err = -EINVAL; 787 goto out; 788 } 789 790 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name); 791 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); 792 if (err) 793 goto out; 794 if (rdev->me_fw->datasize != me_req_size) { 795 printk(KERN_ERR 796 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 797 rdev->me_fw->datasize, fw_name); 798 err = -EINVAL; 799 } 800 801 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name); 802 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev); 803 if (err) 804 goto out; 805 if (rdev->ce_fw->datasize != ce_req_size) { 806 printk(KERN_ERR 807 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 808 rdev->ce_fw->datasize, fw_name); 809 err = -EINVAL; 810 } 811 812 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name); 813 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev); 814 if (err) 815 goto out; 816 if (rdev->mec_fw->datasize != mec_req_size) { 817 printk(KERN_ERR 818 "cik_cp: Bogus length %zu in firmware \"%s\"\n", 819 rdev->mec_fw->datasize, fw_name); 820 err = -EINVAL; 821 } 822 823 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name); 824 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev); 825 if (err) 826 goto out; 827 if (rdev->rlc_fw->datasize != rlc_req_size) { 828 printk(KERN_ERR 829 "cik_rlc: Bogus length %zu in firmware \"%s\"\n", 830 rdev->rlc_fw->datasize, fw_name); 831 err = -EINVAL; 832 } 833 834 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name); 835 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev); 836 if (err) 837 goto out; 838 if (rdev->sdma_fw->datasize != sdma_req_size) { 839 printk(KERN_ERR 840 "cik_sdma: Bogus length %zu in firmware \"%s\"\n", 841 rdev->sdma_fw->datasize, fw_name); 842 err = -EINVAL; 843 } 844 845 /* No MC ucode on APUs */ 846 if (!(rdev->flags & RADEON_IS_IGP)) { 847 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name); 848 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); 849 if (err) 850 goto out; 851 if (rdev->mc_fw->datasize != mc_req_size) { 852 printk(KERN_ERR 853 "cik_mc: Bogus length %zu in firmware \"%s\"\n", 854 rdev->mc_fw->datasize, fw_name); 855 err = -EINVAL; 856 } 857 } 858 859 out: 860 if (err) { 861 if (err != -EINVAL) 862 printk(KERN_ERR 863 "cik_cp: Failed to load firmware \"%s\"\n", 864 fw_name); 865 release_firmware(rdev->pfp_fw); 866 rdev->pfp_fw = NULL; 867 release_firmware(rdev->me_fw); 868 rdev->me_fw = NULL; 869 release_firmware(rdev->ce_fw); 870 rdev->ce_fw = NULL; 871 release_firmware(rdev->mec_fw); 872 rdev->mec_fw = NULL; 873 release_firmware(rdev->rlc_fw); 874 rdev->rlc_fw = NULL; 875 release_firmware(rdev->sdma_fw); 876 rdev->sdma_fw = NULL; 877 release_firmware(rdev->mc_fw); 878 rdev->mc_fw = NULL; 879 } 880 return err; 881 } 882 883 /* 884 * Core functions 885 */ 886 /** 887 * cik_tiling_mode_table_init - init the hw tiling table 888 * 889 * @rdev: radeon_device pointer 890 * 891 * Starting with SI, the tiling setup is done globally in a 892 * set of 32 tiling modes. Rather than selecting each set of 893 * parameters per surface as on older asics, we just select 894 * which index in the tiling table we want to use, and the 895 * surface uses those parameters (CIK). 896 */ 897 static void cik_tiling_mode_table_init(struct radeon_device *rdev) 898 { 899 const u32 num_tile_mode_states = 32; 900 const u32 num_secondary_tile_mode_states = 16; 901 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 902 u32 num_pipe_configs; 903 u32 num_rbs = rdev->config.cik.max_backends_per_se * 904 rdev->config.cik.max_shader_engines; 905 906 switch (rdev->config.cik.mem_row_size_in_kb) { 907 case 1: 908 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB; 909 break; 910 case 2: 911 default: 912 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB; 913 break; 914 case 4: 915 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB; 916 break; 917 } 918 919 num_pipe_configs = rdev->config.cik.max_tile_pipes; 920 if (num_pipe_configs > 8) 921 num_pipe_configs = 8; /* ??? */ 922 923 if (num_pipe_configs == 8) { 924 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 925 switch (reg_offset) { 926 case 0: 927 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 931 break; 932 case 1: 933 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 935 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 936 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 937 break; 938 case 2: 939 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 942 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 943 break; 944 case 3: 945 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 946 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 947 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 948 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 949 break; 950 case 4: 951 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 954 TILE_SPLIT(split_equal_to_row_size)); 955 break; 956 case 5: 957 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 958 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 959 break; 960 case 6: 961 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 965 break; 966 case 7: 967 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 970 TILE_SPLIT(split_equal_to_row_size)); 971 break; 972 case 8: 973 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 975 break; 976 case 9: 977 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 978 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 979 break; 980 case 10: 981 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 982 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 985 break; 986 case 11: 987 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 988 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 989 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 991 break; 992 case 12: 993 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 997 break; 998 case 13: 999 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1000 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1001 break; 1002 case 14: 1003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1004 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1007 break; 1008 case 16: 1009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1010 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1011 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1013 break; 1014 case 17: 1015 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1019 break; 1020 case 27: 1021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1022 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1023 break; 1024 case 28: 1025 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1026 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1029 break; 1030 case 29: 1031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1032 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1035 break; 1036 case 30: 1037 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1038 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 1040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1041 break; 1042 default: 1043 gb_tile_moden = 0; 1044 break; 1045 } 1046 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1047 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1048 } 1049 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1050 switch (reg_offset) { 1051 case 0: 1052 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1055 NUM_BANKS(ADDR_SURF_16_BANK)); 1056 break; 1057 case 1: 1058 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1061 NUM_BANKS(ADDR_SURF_16_BANK)); 1062 break; 1063 case 2: 1064 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1067 NUM_BANKS(ADDR_SURF_16_BANK)); 1068 break; 1069 case 3: 1070 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1071 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1072 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1073 NUM_BANKS(ADDR_SURF_16_BANK)); 1074 break; 1075 case 4: 1076 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1079 NUM_BANKS(ADDR_SURF_8_BANK)); 1080 break; 1081 case 5: 1082 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1085 NUM_BANKS(ADDR_SURF_4_BANK)); 1086 break; 1087 case 6: 1088 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1091 NUM_BANKS(ADDR_SURF_2_BANK)); 1092 break; 1093 case 8: 1094 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1097 NUM_BANKS(ADDR_SURF_16_BANK)); 1098 break; 1099 case 9: 1100 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1101 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1102 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1103 NUM_BANKS(ADDR_SURF_16_BANK)); 1104 break; 1105 case 10: 1106 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1109 NUM_BANKS(ADDR_SURF_16_BANK)); 1110 break; 1111 case 11: 1112 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1115 NUM_BANKS(ADDR_SURF_16_BANK)); 1116 break; 1117 case 12: 1118 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1121 NUM_BANKS(ADDR_SURF_8_BANK)); 1122 break; 1123 case 13: 1124 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1127 NUM_BANKS(ADDR_SURF_4_BANK)); 1128 break; 1129 case 14: 1130 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1133 NUM_BANKS(ADDR_SURF_2_BANK)); 1134 break; 1135 default: 1136 gb_tile_moden = 0; 1137 break; 1138 } 1139 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1140 } 1141 } else if (num_pipe_configs == 4) { 1142 if (num_rbs == 4) { 1143 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1144 switch (reg_offset) { 1145 case 0: 1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1148 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1150 break; 1151 case 1: 1152 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1153 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1154 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1156 break; 1157 case 2: 1158 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1160 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1162 break; 1163 case 3: 1164 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1165 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1166 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1168 break; 1169 case 4: 1170 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1172 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1173 TILE_SPLIT(split_equal_to_row_size)); 1174 break; 1175 case 5: 1176 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1178 break; 1179 case 6: 1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1182 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1184 break; 1185 case 7: 1186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1188 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1189 TILE_SPLIT(split_equal_to_row_size)); 1190 break; 1191 case 8: 1192 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1193 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 1194 break; 1195 case 9: 1196 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1198 break; 1199 case 10: 1200 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1202 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1204 break; 1205 case 11: 1206 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1207 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1208 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1210 break; 1211 case 12: 1212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1214 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1216 break; 1217 case 13: 1218 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1219 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1220 break; 1221 case 14: 1222 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1223 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1224 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1226 break; 1227 case 16: 1228 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1230 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1232 break; 1233 case 17: 1234 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1235 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1236 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1238 break; 1239 case 27: 1240 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1241 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1242 break; 1243 case 28: 1244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1246 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1248 break; 1249 case 29: 1250 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1251 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1252 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1254 break; 1255 case 30: 1256 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1258 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1260 break; 1261 default: 1262 gb_tile_moden = 0; 1263 break; 1264 } 1265 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1266 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1267 } 1268 } else if (num_rbs < 4) { 1269 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1270 switch (reg_offset) { 1271 case 0: 1272 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1274 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1276 break; 1277 case 1: 1278 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1280 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1282 break; 1283 case 2: 1284 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1286 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1287 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1288 break; 1289 case 3: 1290 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1292 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1294 break; 1295 case 4: 1296 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1298 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1299 TILE_SPLIT(split_equal_to_row_size)); 1300 break; 1301 case 5: 1302 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1304 break; 1305 case 6: 1306 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1308 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1310 break; 1311 case 7: 1312 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1314 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1315 TILE_SPLIT(split_equal_to_row_size)); 1316 break; 1317 case 8: 1318 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1319 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 1320 break; 1321 case 9: 1322 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1324 break; 1325 case 10: 1326 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1327 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1328 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1330 break; 1331 case 11: 1332 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1333 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1334 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1336 break; 1337 case 12: 1338 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1340 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1342 break; 1343 case 13: 1344 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1346 break; 1347 case 14: 1348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1350 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1352 break; 1353 case 16: 1354 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1355 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1356 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1358 break; 1359 case 17: 1360 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1362 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1364 break; 1365 case 27: 1366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1368 break; 1369 case 28: 1370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1371 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1372 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1374 break; 1375 case 29: 1376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1377 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1378 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1380 break; 1381 case 30: 1382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1384 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 1385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1386 break; 1387 default: 1388 gb_tile_moden = 0; 1389 break; 1390 } 1391 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1392 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1393 } 1394 } 1395 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1396 switch (reg_offset) { 1397 case 0: 1398 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1401 NUM_BANKS(ADDR_SURF_16_BANK)); 1402 break; 1403 case 1: 1404 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1407 NUM_BANKS(ADDR_SURF_16_BANK)); 1408 break; 1409 case 2: 1410 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1413 NUM_BANKS(ADDR_SURF_16_BANK)); 1414 break; 1415 case 3: 1416 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1419 NUM_BANKS(ADDR_SURF_16_BANK)); 1420 break; 1421 case 4: 1422 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1425 NUM_BANKS(ADDR_SURF_16_BANK)); 1426 break; 1427 case 5: 1428 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1431 NUM_BANKS(ADDR_SURF_8_BANK)); 1432 break; 1433 case 6: 1434 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1437 NUM_BANKS(ADDR_SURF_4_BANK)); 1438 break; 1439 case 8: 1440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1443 NUM_BANKS(ADDR_SURF_16_BANK)); 1444 break; 1445 case 9: 1446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1449 NUM_BANKS(ADDR_SURF_16_BANK)); 1450 break; 1451 case 10: 1452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1455 NUM_BANKS(ADDR_SURF_16_BANK)); 1456 break; 1457 case 11: 1458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1461 NUM_BANKS(ADDR_SURF_16_BANK)); 1462 break; 1463 case 12: 1464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1467 NUM_BANKS(ADDR_SURF_16_BANK)); 1468 break; 1469 case 13: 1470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1473 NUM_BANKS(ADDR_SURF_8_BANK)); 1474 break; 1475 case 14: 1476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1479 NUM_BANKS(ADDR_SURF_4_BANK)); 1480 break; 1481 default: 1482 gb_tile_moden = 0; 1483 break; 1484 } 1485 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1486 } 1487 } else if (num_pipe_configs == 2) { 1488 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1489 switch (reg_offset) { 1490 case 0: 1491 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1492 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1493 PIPE_CONFIG(ADDR_SURF_P2) | 1494 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B)); 1495 break; 1496 case 1: 1497 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1498 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1499 PIPE_CONFIG(ADDR_SURF_P2) | 1500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B)); 1501 break; 1502 case 2: 1503 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1504 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1505 PIPE_CONFIG(ADDR_SURF_P2) | 1506 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1507 break; 1508 case 3: 1509 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1510 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1511 PIPE_CONFIG(ADDR_SURF_P2) | 1512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B)); 1513 break; 1514 case 4: 1515 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1517 PIPE_CONFIG(ADDR_SURF_P2) | 1518 TILE_SPLIT(split_equal_to_row_size)); 1519 break; 1520 case 5: 1521 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1523 break; 1524 case 6: 1525 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1527 PIPE_CONFIG(ADDR_SURF_P2) | 1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B)); 1529 break; 1530 case 7: 1531 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1533 PIPE_CONFIG(ADDR_SURF_P2) | 1534 TILE_SPLIT(split_equal_to_row_size)); 1535 break; 1536 case 8: 1537 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); 1538 break; 1539 case 9: 1540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1541 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1542 break; 1543 case 10: 1544 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1545 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1546 PIPE_CONFIG(ADDR_SURF_P2) | 1547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1548 break; 1549 case 11: 1550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1551 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1552 PIPE_CONFIG(ADDR_SURF_P2) | 1553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1554 break; 1555 case 12: 1556 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1558 PIPE_CONFIG(ADDR_SURF_P2) | 1559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1560 break; 1561 case 13: 1562 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1564 break; 1565 case 14: 1566 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1568 PIPE_CONFIG(ADDR_SURF_P2) | 1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1570 break; 1571 case 16: 1572 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1574 PIPE_CONFIG(ADDR_SURF_P2) | 1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1576 break; 1577 case 17: 1578 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1580 PIPE_CONFIG(ADDR_SURF_P2) | 1581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1582 break; 1583 case 27: 1584 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1585 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1586 break; 1587 case 28: 1588 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1589 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1590 PIPE_CONFIG(ADDR_SURF_P2) | 1591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1592 break; 1593 case 29: 1594 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1595 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1596 PIPE_CONFIG(ADDR_SURF_P2) | 1597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1598 break; 1599 case 30: 1600 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1601 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1602 PIPE_CONFIG(ADDR_SURF_P2) | 1603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1604 break; 1605 default: 1606 gb_tile_moden = 0; 1607 break; 1608 } 1609 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden; 1610 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1611 } 1612 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { 1613 switch (reg_offset) { 1614 case 0: 1615 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1616 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1617 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1618 NUM_BANKS(ADDR_SURF_16_BANK)); 1619 break; 1620 case 1: 1621 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1624 NUM_BANKS(ADDR_SURF_16_BANK)); 1625 break; 1626 case 2: 1627 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1630 NUM_BANKS(ADDR_SURF_16_BANK)); 1631 break; 1632 case 3: 1633 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1636 NUM_BANKS(ADDR_SURF_16_BANK)); 1637 break; 1638 case 4: 1639 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1642 NUM_BANKS(ADDR_SURF_16_BANK)); 1643 break; 1644 case 5: 1645 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1648 NUM_BANKS(ADDR_SURF_16_BANK)); 1649 break; 1650 case 6: 1651 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1654 NUM_BANKS(ADDR_SURF_8_BANK)); 1655 break; 1656 case 8: 1657 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1660 NUM_BANKS(ADDR_SURF_16_BANK)); 1661 break; 1662 case 9: 1663 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1666 NUM_BANKS(ADDR_SURF_16_BANK)); 1667 break; 1668 case 10: 1669 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1672 NUM_BANKS(ADDR_SURF_16_BANK)); 1673 break; 1674 case 11: 1675 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1678 NUM_BANKS(ADDR_SURF_16_BANK)); 1679 break; 1680 case 12: 1681 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1684 NUM_BANKS(ADDR_SURF_16_BANK)); 1685 break; 1686 case 13: 1687 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1690 NUM_BANKS(ADDR_SURF_16_BANK)); 1691 break; 1692 case 14: 1693 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1696 NUM_BANKS(ADDR_SURF_8_BANK)); 1697 break; 1698 default: 1699 gb_tile_moden = 0; 1700 break; 1701 } 1702 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 1703 } 1704 } else 1705 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs); 1706 } 1707 1708 /** 1709 * cik_select_se_sh - select which SE, SH to address 1710 * 1711 * @rdev: radeon_device pointer 1712 * @se_num: shader engine to address 1713 * @sh_num: sh block to address 1714 * 1715 * Select which SE, SH combinations to address. Certain 1716 * registers are instanced per SE or SH. 0xffffffff means 1717 * broadcast to all SEs or SHs (CIK). 1718 */ 1719 static void cik_select_se_sh(struct radeon_device *rdev, 1720 u32 se_num, u32 sh_num) 1721 { 1722 u32 data = INSTANCE_BROADCAST_WRITES; 1723 1724 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) 1725 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; 1726 else if (se_num == 0xffffffff) 1727 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); 1728 else if (sh_num == 0xffffffff) 1729 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); 1730 else 1731 data |= SH_INDEX(sh_num) | SE_INDEX(se_num); 1732 WREG32(GRBM_GFX_INDEX, data); 1733 } 1734 1735 /** 1736 * cik_create_bitmask - create a bitmask 1737 * 1738 * @bit_width: length of the mask 1739 * 1740 * create a variable length bit mask (CIK). 1741 * Returns the bitmask. 1742 */ 1743 static u32 cik_create_bitmask(u32 bit_width) 1744 { 1745 u32 i, mask = 0; 1746 1747 for (i = 0; i < bit_width; i++) { 1748 mask <<= 1; 1749 mask |= 1; 1750 } 1751 return mask; 1752 } 1753 1754 /** 1755 * cik_select_se_sh - select which SE, SH to address 1756 * 1757 * @rdev: radeon_device pointer 1758 * @max_rb_num: max RBs (render backends) for the asic 1759 * @se_num: number of SEs (shader engines) for the asic 1760 * @sh_per_se: number of SH blocks per SE for the asic 1761 * 1762 * Calculates the bitmask of disabled RBs (CIK). 1763 * Returns the disabled RB bitmask. 1764 */ 1765 static u32 cik_get_rb_disabled(struct radeon_device *rdev, 1766 u32 max_rb_num, u32 se_num, 1767 u32 sh_per_se) 1768 { 1769 u32 data, mask; 1770 1771 data = RREG32(CC_RB_BACKEND_DISABLE); 1772 if (data & 1) 1773 data &= BACKEND_DISABLE_MASK; 1774 else 1775 data = 0; 1776 data |= RREG32(GC_USER_RB_BACKEND_DISABLE); 1777 1778 data >>= BACKEND_DISABLE_SHIFT; 1779 1780 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se); 1781 1782 return data & mask; 1783 } 1784 1785 /** 1786 * cik_setup_rb - setup the RBs on the asic 1787 * 1788 * @rdev: radeon_device pointer 1789 * @se_num: number of SEs (shader engines) for the asic 1790 * @sh_per_se: number of SH blocks per SE for the asic 1791 * @max_rb_num: max RBs (render backends) for the asic 1792 * 1793 * Configures per-SE/SH RB registers (CIK). 1794 */ 1795 static void cik_setup_rb(struct radeon_device *rdev, 1796 u32 se_num, u32 sh_per_se, 1797 u32 max_rb_num) 1798 { 1799 int i, j; 1800 u32 data, mask; 1801 u32 disabled_rbs = 0; 1802 u32 enabled_rbs = 0; 1803 1804 for (i = 0; i < se_num; i++) { 1805 for (j = 0; j < sh_per_se; j++) { 1806 cik_select_se_sh(rdev, i, j); 1807 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); 1808 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 1809 } 1810 } 1811 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 1812 1813 mask = 1; 1814 for (i = 0; i < max_rb_num; i++) { 1815 if (!(disabled_rbs & mask)) 1816 enabled_rbs |= mask; 1817 mask <<= 1; 1818 } 1819 1820 for (i = 0; i < se_num; i++) { 1821 cik_select_se_sh(rdev, i, 0xffffffff); 1822 data = 0; 1823 for (j = 0; j < sh_per_se; j++) { 1824 switch (enabled_rbs & 3) { 1825 case 1: 1826 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); 1827 break; 1828 case 2: 1829 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); 1830 break; 1831 case 3: 1832 default: 1833 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); 1834 break; 1835 } 1836 enabled_rbs >>= 2; 1837 } 1838 WREG32(PA_SC_RASTER_CONFIG, data); 1839 } 1840 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 1841 } 1842 1843 /** 1844 * cik_gpu_init - setup the 3D engine 1845 * 1846 * @rdev: radeon_device pointer 1847 * 1848 * Configures the 3D engine and tiling configuration 1849 * registers so that the 3D engine is usable. 1850 */ 1851 static __unused void cik_gpu_init(struct radeon_device *rdev) 1852 { 1853 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG); 1854 u32 mc_shared_chmap, mc_arb_ramcfg; 1855 u32 hdp_host_path_cntl; 1856 u32 tmp; 1857 int i, j; 1858 1859 switch (rdev->family) { 1860 case CHIP_BONAIRE: 1861 rdev->config.cik.max_shader_engines = 2; 1862 rdev->config.cik.max_tile_pipes = 4; 1863 rdev->config.cik.max_cu_per_sh = 7; 1864 rdev->config.cik.max_sh_per_se = 1; 1865 rdev->config.cik.max_backends_per_se = 2; 1866 rdev->config.cik.max_texture_channel_caches = 4; 1867 rdev->config.cik.max_gprs = 256; 1868 rdev->config.cik.max_gs_threads = 32; 1869 rdev->config.cik.max_hw_contexts = 8; 1870 1871 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 1872 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 1873 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 1874 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 1875 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 1876 break; 1877 case CHIP_KAVERI: 1878 /* TODO */ 1879 break; 1880 case CHIP_KABINI: 1881 default: 1882 rdev->config.cik.max_shader_engines = 1; 1883 rdev->config.cik.max_tile_pipes = 2; 1884 rdev->config.cik.max_cu_per_sh = 2; 1885 rdev->config.cik.max_sh_per_se = 1; 1886 rdev->config.cik.max_backends_per_se = 1; 1887 rdev->config.cik.max_texture_channel_caches = 2; 1888 rdev->config.cik.max_gprs = 256; 1889 rdev->config.cik.max_gs_threads = 16; 1890 rdev->config.cik.max_hw_contexts = 8; 1891 1892 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20; 1893 rdev->config.cik.sc_prim_fifo_size_backend = 0x100; 1894 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30; 1895 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130; 1896 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; 1897 break; 1898 } 1899 1900 /* Initialize HDP */ 1901 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 1902 WREG32((0x2c14 + j), 0x00000000); 1903 WREG32((0x2c18 + j), 0x00000000); 1904 WREG32((0x2c1c + j), 0x00000000); 1905 WREG32((0x2c20 + j), 0x00000000); 1906 WREG32((0x2c24 + j), 0x00000000); 1907 } 1908 1909 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 1910 1911 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN); 1912 1913 mc_shared_chmap = RREG32(MC_SHARED_CHMAP); 1914 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 1915 1916 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes; 1917 rdev->config.cik.mem_max_burst_length_bytes = 256; 1918 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; 1919 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1920 if (rdev->config.cik.mem_row_size_in_kb > 4) 1921 rdev->config.cik.mem_row_size_in_kb = 4; 1922 /* XXX use MC settings? */ 1923 rdev->config.cik.shader_engine_tile_size = 32; 1924 rdev->config.cik.num_gpus = 1; 1925 rdev->config.cik.multi_gpu_tile_size = 64; 1926 1927 /* fix up row size */ 1928 gb_addr_config &= ~ROW_SIZE_MASK; 1929 switch (rdev->config.cik.mem_row_size_in_kb) { 1930 case 1: 1931 default: 1932 gb_addr_config |= ROW_SIZE(0); 1933 break; 1934 case 2: 1935 gb_addr_config |= ROW_SIZE(1); 1936 break; 1937 case 4: 1938 gb_addr_config |= ROW_SIZE(2); 1939 break; 1940 } 1941 1942 /* setup tiling info dword. gb_addr_config is not adequate since it does 1943 * not have bank info, so create a custom tiling dword. 1944 * bits 3:0 num_pipes 1945 * bits 7:4 num_banks 1946 * bits 11:8 group_size 1947 * bits 15:12 row_size 1948 */ 1949 rdev->config.cik.tile_config = 0; 1950 switch (rdev->config.cik.num_tile_pipes) { 1951 case 1: 1952 rdev->config.cik.tile_config |= (0 << 0); 1953 break; 1954 case 2: 1955 rdev->config.cik.tile_config |= (1 << 0); 1956 break; 1957 case 4: 1958 rdev->config.cik.tile_config |= (2 << 0); 1959 break; 1960 case 8: 1961 default: 1962 /* XXX what about 12? */ 1963 rdev->config.cik.tile_config |= (3 << 0); 1964 break; 1965 } 1966 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) 1967 rdev->config.cik.tile_config |= 1 << 4; 1968 else 1969 rdev->config.cik.tile_config |= 0 << 4; 1970 rdev->config.cik.tile_config |= 1971 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; 1972 rdev->config.cik.tile_config |= 1973 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; 1974 1975 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1976 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1977 WREG32(DMIF_ADDR_CALC, gb_addr_config); 1978 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); 1979 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); 1980 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config); 1981 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 1982 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 1983 1984 cik_tiling_mode_table_init(rdev); 1985 1986 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines, 1987 rdev->config.cik.max_sh_per_se, 1988 rdev->config.cik.max_backends_per_se); 1989 1990 /* set HW defaults for 3D engine */ 1991 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); 1992 1993 WREG32(SX_DEBUG_1, 0x20); 1994 1995 WREG32(TA_CNTL_AUX, 0x00010000); 1996 1997 tmp = RREG32(SPI_CONFIG_CNTL); 1998 tmp |= 0x03000000; 1999 WREG32(SPI_CONFIG_CNTL, tmp); 2000 2001 WREG32(SQ_CONFIG, 1); 2002 2003 WREG32(DB_DEBUG, 0); 2004 2005 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff; 2006 tmp |= 0x00000400; 2007 WREG32(DB_DEBUG2, tmp); 2008 2009 tmp = RREG32(DB_DEBUG3) & ~0x0002021c; 2010 tmp |= 0x00020200; 2011 WREG32(DB_DEBUG3, tmp); 2012 2013 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000; 2014 tmp |= 0x00018208; 2015 WREG32(CB_HW_CONTROL, tmp); 2016 2017 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 2018 2019 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) | 2020 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) | 2021 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) | 2022 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size))); 2023 2024 WREG32(VGT_NUM_INSTANCES, 1); 2025 2026 WREG32(CP_PERFMON_CNTL, 0); 2027 2028 WREG32(SQ_CONFIG, 0); 2029 2030 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 2031 FORCE_EOV_MAX_REZ_CNT(255))); 2032 2033 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | 2034 AUTO_INVLD_EN(ES_AND_GS_AUTO)); 2035 2036 WREG32(VGT_GS_VERTEX_REUSE, 16); 2037 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 2038 2039 tmp = RREG32(HDP_MISC_CNTL); 2040 tmp |= HDP_FLUSH_INVALIDATE_CACHE; 2041 WREG32(HDP_MISC_CNTL, tmp); 2042 2043 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 2044 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 2045 2046 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); 2047 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); 2048 2049 udelay(50); 2050 } 2051 2052 /* 2053 * GPU scratch registers helpers function. 2054 */ 2055 /** 2056 * cik_scratch_init - setup driver info for CP scratch regs 2057 * 2058 * @rdev: radeon_device pointer 2059 * 2060 * Set up the number and offset of the CP scratch registers. 2061 * NOTE: use of CP scratch registers is a legacy inferface and 2062 * is not used by default on newer asics (r6xx+). On newer asics, 2063 * memory buffers are used for fences rather than scratch regs. 2064 */ 2065 static __unused void cik_scratch_init(struct radeon_device *rdev) 2066 { 2067 int i; 2068 2069 rdev->scratch.num_reg = 7; 2070 rdev->scratch.reg_base = SCRATCH_REG0; 2071 for (i = 0; i < rdev->scratch.num_reg; i++) { 2072 rdev->scratch.free[i] = true; 2073 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4); 2074 } 2075 } 2076 2077 /** 2078 * cik_ring_test - basic gfx ring test 2079 * 2080 * @rdev: radeon_device pointer 2081 * @ring: radeon_ring structure holding ring information 2082 * 2083 * Allocate a scratch register and write to it using the gfx ring (CIK). 2084 * Provides a basic gfx ring test to verify that the ring is working. 2085 * Used by cik_cp_gfx_resume(); 2086 * Returns 0 on success, error on failure. 2087 */ 2088 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 2089 { 2090 uint32_t scratch; 2091 uint32_t tmp = 0; 2092 unsigned i; 2093 int r; 2094 2095 r = radeon_scratch_get(rdev, &scratch); 2096 if (r) { 2097 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 2098 return r; 2099 } 2100 WREG32(scratch, 0xCAFEDEAD); 2101 r = radeon_ring_lock(rdev, ring, 3); 2102 if (r) { 2103 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); 2104 radeon_scratch_free(rdev, scratch); 2105 return r; 2106 } 2107 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2108 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); 2109 radeon_ring_write(ring, 0xDEADBEEF); 2110 radeon_ring_unlock_commit(rdev, ring); 2111 2112 for (i = 0; i < rdev->usec_timeout; i++) { 2113 tmp = RREG32(scratch); 2114 if (tmp == 0xDEADBEEF) 2115 break; 2116 DRM_UDELAY(1); 2117 } 2118 if (i < rdev->usec_timeout) { 2119 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 2120 } else { 2121 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 2122 ring->idx, scratch, tmp); 2123 r = -EINVAL; 2124 } 2125 radeon_scratch_free(rdev, scratch); 2126 return r; 2127 } 2128 2129 /** 2130 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring 2131 * 2132 * @rdev: radeon_device pointer 2133 * @fence: radeon fence object 2134 * 2135 * Emits a fence sequnce number on the gfx ring and flushes 2136 * GPU caches. 2137 */ 2138 void cik_fence_gfx_ring_emit(struct radeon_device *rdev, 2139 struct radeon_fence *fence) 2140 { 2141 struct radeon_ring *ring = &rdev->ring[fence->ring]; 2142 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 2143 2144 /* EVENT_WRITE_EOP - flush caches, send int */ 2145 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 2146 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 2147 EOP_TC_ACTION_EN | 2148 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 2149 EVENT_INDEX(5))); 2150 radeon_ring_write(ring, addr & 0xfffffffc); 2151 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 2152 radeon_ring_write(ring, fence->seq); 2153 radeon_ring_write(ring, 0); 2154 /* HDP flush */ 2155 /* We should be using the new WAIT_REG_MEM special op packet here 2156 * but it causes the CP to hang 2157 */ 2158 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2159 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2160 WRITE_DATA_DST_SEL(0))); 2161 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 2162 radeon_ring_write(ring, 0); 2163 radeon_ring_write(ring, 0); 2164 } 2165 2166 /** 2167 * cik_fence_compute_ring_emit - emit a fence on the compute ring 2168 * 2169 * @rdev: radeon_device pointer 2170 * @fence: radeon fence object 2171 * 2172 * Emits a fence sequnce number on the compute ring and flushes 2173 * GPU caches. 2174 */ 2175 void cik_fence_compute_ring_emit(struct radeon_device *rdev, 2176 struct radeon_fence *fence) 2177 { 2178 struct radeon_ring *ring = &rdev->ring[fence->ring]; 2179 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 2180 2181 /* RELEASE_MEM - flush caches, send int */ 2182 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 2183 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN | 2184 EOP_TC_ACTION_EN | 2185 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 2186 EVENT_INDEX(5))); 2187 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2)); 2188 radeon_ring_write(ring, addr & 0xfffffffc); 2189 radeon_ring_write(ring, upper_32_bits(addr)); 2190 radeon_ring_write(ring, fence->seq); 2191 radeon_ring_write(ring, 0); 2192 /* HDP flush */ 2193 /* We should be using the new WAIT_REG_MEM special op packet here 2194 * but it causes the CP to hang 2195 */ 2196 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2197 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2198 WRITE_DATA_DST_SEL(0))); 2199 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 2200 radeon_ring_write(ring, 0); 2201 radeon_ring_write(ring, 0); 2202 } 2203 2204 void cik_semaphore_ring_emit(struct radeon_device *rdev, 2205 struct radeon_ring *ring, 2206 struct radeon_semaphore *semaphore, 2207 bool emit_wait) 2208 { 2209 uint64_t addr = semaphore->gpu_addr; 2210 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 2211 2212 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 2213 radeon_ring_write(ring, addr & 0xffffffff); 2214 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 2215 } 2216 2217 /* 2218 * IB stuff 2219 */ 2220 /** 2221 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring 2222 * 2223 * @rdev: radeon_device pointer 2224 * @ib: radeon indirect buffer object 2225 * 2226 * Emits an DE (drawing engine) or CE (constant engine) IB 2227 * on the gfx ring. IBs are usually generated by userspace 2228 * acceleration drivers and submitted to the kernel for 2229 * sheduling on the ring. This function schedules the IB 2230 * on the gfx ring for execution by the GPU. 2231 */ 2232 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 2233 { 2234 struct radeon_ring *ring = &rdev->ring[ib->ring]; 2235 u32 header, control = INDIRECT_BUFFER_VALID; 2236 2237 if (ib->is_const_ib) { 2238 /* set switch buffer packet before const IB */ 2239 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 2240 radeon_ring_write(ring, 0); 2241 2242 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 2243 } else { 2244 u32 next_rptr; 2245 if (ring->rptr_save_reg) { 2246 next_rptr = ring->wptr + 3 + 4; 2247 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2248 radeon_ring_write(ring, ((ring->rptr_save_reg - 2249 PACKET3_SET_UCONFIG_REG_START) >> 2)); 2250 radeon_ring_write(ring, next_rptr); 2251 } else if (rdev->wb.enabled) { 2252 next_rptr = ring->wptr + 5 + 4; 2253 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2254 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1)); 2255 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 2256 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 2257 radeon_ring_write(ring, next_rptr); 2258 } 2259 2260 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 2261 } 2262 2263 control |= ib->length_dw | 2264 (ib->vm ? (ib->vm->id << 24) : 0); 2265 2266 radeon_ring_write(ring, header); 2267 radeon_ring_write(ring, 2268 #ifdef __BIG_ENDIAN 2269 (2 << 0) | 2270 #endif 2271 (ib->gpu_addr & 0xFFFFFFFC)); 2272 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 2273 radeon_ring_write(ring, control); 2274 } 2275 2276 /** 2277 * cik_ib_test - basic gfx ring IB test 2278 * 2279 * @rdev: radeon_device pointer 2280 * @ring: radeon_ring structure holding ring information 2281 * 2282 * Allocate an IB and execute it on the gfx ring (CIK). 2283 * Provides a basic gfx ring test to verify that IBs are working. 2284 * Returns 0 on success, error on failure. 2285 */ 2286 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 2287 { 2288 struct radeon_ib ib; 2289 uint32_t scratch; 2290 uint32_t tmp = 0; 2291 unsigned i; 2292 int r; 2293 2294 r = radeon_scratch_get(rdev, &scratch); 2295 if (r) { 2296 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 2297 return r; 2298 } 2299 WREG32(scratch, 0xCAFEDEAD); 2300 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 2301 if (r) { 2302 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 2303 return r; 2304 } 2305 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2306 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); 2307 ib.ptr[2] = 0xDEADBEEF; 2308 ib.length_dw = 3; 2309 r = radeon_ib_schedule(rdev, &ib, NULL); 2310 if (r) { 2311 radeon_scratch_free(rdev, scratch); 2312 radeon_ib_free(rdev, &ib); 2313 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 2314 return r; 2315 } 2316 r = radeon_fence_wait(ib.fence, false); 2317 if (r) { 2318 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 2319 return r; 2320 } 2321 for (i = 0; i < rdev->usec_timeout; i++) { 2322 tmp = RREG32(scratch); 2323 if (tmp == 0xDEADBEEF) 2324 break; 2325 DRM_UDELAY(1); 2326 } 2327 if (i < rdev->usec_timeout) { 2328 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 2329 } else { 2330 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 2331 scratch, tmp); 2332 r = -EINVAL; 2333 } 2334 radeon_scratch_free(rdev, scratch); 2335 radeon_ib_free(rdev, &ib); 2336 return r; 2337 } 2338 2339 /* 2340 * CP. 2341 * On CIK, gfx and compute now have independant command processors. 2342 * 2343 * GFX 2344 * Gfx consists of a single ring and can process both gfx jobs and 2345 * compute jobs. The gfx CP consists of three microengines (ME): 2346 * PFP - Pre-Fetch Parser 2347 * ME - Micro Engine 2348 * CE - Constant Engine 2349 * The PFP and ME make up what is considered the Drawing Engine (DE). 2350 * The CE is an asynchronous engine used for updating buffer desciptors 2351 * used by the DE so that they can be loaded into cache in parallel 2352 * while the DE is processing state update packets. 2353 * 2354 * Compute 2355 * The compute CP consists of two microengines (ME): 2356 * MEC1 - Compute MicroEngine 1 2357 * MEC2 - Compute MicroEngine 2 2358 * Each MEC supports 4 compute pipes and each pipe supports 8 queues. 2359 * The queues are exposed to userspace and are programmed directly 2360 * by the compute runtime. 2361 */ 2362 /** 2363 * cik_cp_gfx_enable - enable/disable the gfx CP MEs 2364 * 2365 * @rdev: radeon_device pointer 2366 * @enable: enable or disable the MEs 2367 * 2368 * Halts or unhalts the gfx MEs. 2369 */ 2370 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable) 2371 { 2372 if (enable) 2373 WREG32(CP_ME_CNTL, 0); 2374 else { 2375 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 2376 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2377 } 2378 udelay(50); 2379 } 2380 2381 /** 2382 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode 2383 * 2384 * @rdev: radeon_device pointer 2385 * 2386 * Loads the gfx PFP, ME, and CE ucode. 2387 * Returns 0 for success, -EINVAL if the ucode is not available. 2388 */ 2389 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) 2390 { 2391 const __be32 *fw_data; 2392 int i; 2393 2394 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw) 2395 return -EINVAL; 2396 2397 cik_cp_gfx_enable(rdev, false); 2398 2399 /* PFP */ 2400 fw_data = (const __be32 *)rdev->pfp_fw->data; 2401 WREG32(CP_PFP_UCODE_ADDR, 0); 2402 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++) 2403 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 2404 WREG32(CP_PFP_UCODE_ADDR, 0); 2405 2406 /* CE */ 2407 fw_data = (const __be32 *)rdev->ce_fw->data; 2408 WREG32(CP_CE_UCODE_ADDR, 0); 2409 for (i = 0; i < CIK_CE_UCODE_SIZE; i++) 2410 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++)); 2411 WREG32(CP_CE_UCODE_ADDR, 0); 2412 2413 /* ME */ 2414 fw_data = (const __be32 *)rdev->me_fw->data; 2415 WREG32(CP_ME_RAM_WADDR, 0); 2416 for (i = 0; i < CIK_ME_UCODE_SIZE; i++) 2417 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 2418 WREG32(CP_ME_RAM_WADDR, 0); 2419 2420 WREG32(CP_PFP_UCODE_ADDR, 0); 2421 WREG32(CP_CE_UCODE_ADDR, 0); 2422 WREG32(CP_ME_RAM_WADDR, 0); 2423 WREG32(CP_ME_RAM_RADDR, 0); 2424 return 0; 2425 } 2426 2427 /** 2428 * cik_cp_gfx_start - start the gfx ring 2429 * 2430 * @rdev: radeon_device pointer 2431 * 2432 * Enables the ring and loads the clear state context and other 2433 * packets required to init the ring. 2434 * Returns 0 for success, error for failure. 2435 */ 2436 static int cik_cp_gfx_start(struct radeon_device *rdev) 2437 { 2438 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2439 int r, i; 2440 2441 /* init the CP */ 2442 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1); 2443 WREG32(CP_ENDIAN_SWAP, 0); 2444 WREG32(CP_DEVICE_ID, 1); 2445 2446 cik_cp_gfx_enable(rdev, true); 2447 2448 r = radeon_ring_lock(rdev, ring, cik_default_size + 17); 2449 if (r) { 2450 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 2451 return r; 2452 } 2453 2454 /* init the CE partitions. CE only used for gfx on CIK */ 2455 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2456 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2457 radeon_ring_write(ring, 0xc000); 2458 radeon_ring_write(ring, 0xc000); 2459 2460 /* setup clear context state */ 2461 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2462 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2463 2464 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2465 radeon_ring_write(ring, 0x80000000); 2466 radeon_ring_write(ring, 0x80000000); 2467 2468 for (i = 0; i < cik_default_size; i++) 2469 radeon_ring_write(ring, cik_default_state[i]); 2470 2471 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2472 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2473 2474 /* set clear context state */ 2475 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2476 radeon_ring_write(ring, 0); 2477 2478 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 2479 radeon_ring_write(ring, 0x00000316); 2480 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 2481 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 2482 2483 radeon_ring_unlock_commit(rdev, ring); 2484 2485 return 0; 2486 } 2487 2488 /** 2489 * cik_cp_gfx_fini - stop the gfx ring 2490 * 2491 * @rdev: radeon_device pointer 2492 * 2493 * Stop the gfx ring and tear down the driver ring 2494 * info. 2495 */ 2496 static void cik_cp_gfx_fini(struct radeon_device *rdev) 2497 { 2498 cik_cp_gfx_enable(rdev, false); 2499 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 2500 } 2501 2502 /** 2503 * cik_cp_gfx_resume - setup the gfx ring buffer registers 2504 * 2505 * @rdev: radeon_device pointer 2506 * 2507 * Program the location and size of the gfx ring buffer 2508 * and test it to make sure it's working. 2509 * Returns 0 for success, error for failure. 2510 */ 2511 static int cik_cp_gfx_resume(struct radeon_device *rdev) 2512 { 2513 struct radeon_ring *ring; 2514 u32 tmp; 2515 u32 rb_bufsz; 2516 u64 rb_addr; 2517 int r; 2518 2519 WREG32(CP_SEM_WAIT_TIMER, 0x0); 2520 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); 2521 2522 /* Set the write pointer delay */ 2523 WREG32(CP_RB_WPTR_DELAY, 0); 2524 2525 /* set the RB to use vmid 0 */ 2526 WREG32(CP_RB_VMID, 0); 2527 2528 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); 2529 2530 /* ring 0 - compute and gfx */ 2531 /* Set ring buffer size */ 2532 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2533 rb_bufsz = drm_order(ring->ring_size / 8); 2534 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; 2535 #ifdef __BIG_ENDIAN 2536 tmp |= BUF_SWAP_32BIT; 2537 #endif 2538 WREG32(CP_RB0_CNTL, tmp); 2539 2540 /* Initialize the ring buffer's read and write pointers */ 2541 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); 2542 ring->wptr = 0; 2543 WREG32(CP_RB0_WPTR, ring->wptr); 2544 2545 /* set the wb address wether it's enabled or not */ 2546 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); 2547 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); 2548 2549 /* scratch register shadowing is no longer supported */ 2550 WREG32(SCRATCH_UMSK, 0); 2551 2552 if (!rdev->wb.enabled) 2553 tmp |= RB_NO_UPDATE; 2554 2555 mdelay(1); 2556 WREG32(CP_RB0_CNTL, tmp); 2557 2558 rb_addr = ring->gpu_addr >> 8; 2559 WREG32(CP_RB0_BASE, rb_addr); 2560 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2561 2562 ring->rptr = RREG32(CP_RB0_RPTR); 2563 2564 /* start the ring */ 2565 cik_cp_gfx_start(rdev); 2566 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; 2567 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 2568 if (r) { 2569 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2570 return r; 2571 } 2572 return 0; 2573 } 2574 2575 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev, 2576 struct radeon_ring *ring) 2577 { 2578 u32 rptr; 2579 2580 2581 2582 if (rdev->wb.enabled) { 2583 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); 2584 } else { 2585 spin_lock(&rdev->srbm_mutex); 2586 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 2587 rptr = RREG32(CP_HQD_PQ_RPTR); 2588 cik_srbm_select(rdev, 0, 0, 0, 0); 2589 spin_unlock(&rdev->srbm_mutex); 2590 } 2591 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; 2592 2593 return rptr; 2594 } 2595 2596 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev, 2597 struct radeon_ring *ring) 2598 { 2599 u32 wptr; 2600 2601 if (rdev->wb.enabled) { 2602 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]); 2603 } else { 2604 spin_lock(&rdev->srbm_mutex); 2605 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); 2606 wptr = RREG32(CP_HQD_PQ_WPTR); 2607 cik_srbm_select(rdev, 0, 0, 0, 0); 2608 spin_unlock(&rdev->srbm_mutex); 2609 } 2610 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift; 2611 2612 return wptr; 2613 } 2614 2615 void cik_compute_ring_set_wptr(struct radeon_device *rdev, 2616 struct radeon_ring *ring) 2617 { 2618 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask; 2619 2620 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr); 2621 WDOORBELL32(ring->doorbell_offset, wptr); 2622 } 2623 2624 /** 2625 * cik_cp_compute_enable - enable/disable the compute CP MEs 2626 * 2627 * @rdev: radeon_device pointer 2628 * @enable: enable or disable the MEs 2629 * 2630 * Halts or unhalts the compute MEs. 2631 */ 2632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) 2633 { 2634 if (enable) 2635 WREG32(CP_MEC_CNTL, 0); 2636 else 2637 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); 2638 udelay(50); 2639 } 2640 2641 /** 2642 * cik_cp_compute_load_microcode - load the compute CP ME ucode 2643 * 2644 * @rdev: radeon_device pointer 2645 * 2646 * Loads the compute MEC1&2 ucode. 2647 * Returns 0 for success, -EINVAL if the ucode is not available. 2648 */ 2649 static int cik_cp_compute_load_microcode(struct radeon_device *rdev) 2650 { 2651 const __be32 *fw_data; 2652 int i; 2653 2654 if (!rdev->mec_fw) 2655 return -EINVAL; 2656 2657 cik_cp_compute_enable(rdev, false); 2658 2659 /* MEC1 */ 2660 fw_data = (const __be32 *)rdev->mec_fw->data; 2661 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 2662 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 2663 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++)); 2664 WREG32(CP_MEC_ME1_UCODE_ADDR, 0); 2665 2666 if (rdev->family == CHIP_KAVERI) { 2667 /* MEC2 */ 2668 fw_data = (const __be32 *)rdev->mec_fw->data; 2669 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 2670 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++) 2671 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++)); 2672 WREG32(CP_MEC_ME2_UCODE_ADDR, 0); 2673 } 2674 2675 return 0; 2676 } 2677 2678 /** 2679 * cik_cp_compute_start - start the compute queues 2680 * 2681 * @rdev: radeon_device pointer 2682 * 2683 * Enable the compute queues. 2684 * Returns 0 for success, error for failure. 2685 */ 2686 static int cik_cp_compute_start(struct radeon_device *rdev) 2687 { 2688 cik_cp_compute_enable(rdev, true); 2689 2690 return 0; 2691 } 2692 2693 /** 2694 * cik_cp_compute_fini - stop the compute queues 2695 * 2696 * @rdev: radeon_device pointer 2697 * 2698 * Stop the compute queues and tear down the driver queue 2699 * info. 2700 */ 2701 static void cik_cp_compute_fini(struct radeon_device *rdev) 2702 { 2703 int i, idx, r; 2704 2705 cik_cp_compute_enable(rdev, false); 2706 2707 for (i = 0; i < 2; i++) { 2708 if (i == 0) 2709 idx = CAYMAN_RING_TYPE_CP1_INDEX; 2710 else 2711 idx = CAYMAN_RING_TYPE_CP2_INDEX; 2712 2713 if (rdev->ring[idx].mqd_obj) { 2714 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 2715 if (unlikely(r != 0)) 2716 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r); 2717 2718 radeon_bo_unpin(rdev->ring[idx].mqd_obj); 2719 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 2720 2721 radeon_bo_unref(&rdev->ring[idx].mqd_obj); 2722 rdev->ring[idx].mqd_obj = NULL; 2723 } 2724 } 2725 } 2726 2727 static void cik_mec_fini(struct radeon_device *rdev) 2728 { 2729 int r; 2730 2731 if (rdev->mec.hpd_eop_obj) { 2732 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 2733 if (unlikely(r != 0)) 2734 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r); 2735 radeon_bo_unpin(rdev->mec.hpd_eop_obj); 2736 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 2737 2738 radeon_bo_unref(&rdev->mec.hpd_eop_obj); 2739 rdev->mec.hpd_eop_obj = NULL; 2740 } 2741 } 2742 2743 #define MEC_HPD_SIZE 2048 2744 2745 static int cik_mec_init(struct radeon_device *rdev) 2746 { 2747 int r; 2748 u32 *hpd; 2749 2750 /* 2751 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 2752 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 2753 */ 2754 if (rdev->family == CHIP_KAVERI) 2755 rdev->mec.num_mec = 2; 2756 else 2757 rdev->mec.num_mec = 1; 2758 rdev->mec.num_pipe = 4; 2759 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; 2760 2761 if (rdev->mec.hpd_eop_obj == NULL) { 2762 r = radeon_bo_create(rdev, 2763 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, 2764 PAGE_SIZE, true, 2765 RADEON_GEM_DOMAIN_GTT, NULL, 2766 &rdev->mec.hpd_eop_obj); 2767 if (r) { 2768 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); 2769 return r; 2770 } 2771 } 2772 2773 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false); 2774 if (unlikely(r != 0)) { 2775 cik_mec_fini(rdev); 2776 return r; 2777 } 2778 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT, 2779 &rdev->mec.hpd_eop_gpu_addr); 2780 if (r) { 2781 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r); 2782 cik_mec_fini(rdev); 2783 return r; 2784 } 2785 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd); 2786 if (r) { 2787 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r); 2788 cik_mec_fini(rdev); 2789 return r; 2790 } 2791 2792 /* clear memory. Not sure if this is required or not */ 2793 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2); 2794 2795 radeon_bo_kunmap(rdev->mec.hpd_eop_obj); 2796 radeon_bo_unreserve(rdev->mec.hpd_eop_obj); 2797 2798 return 0; 2799 } 2800 2801 struct hqd_registers 2802 { 2803 u32 cp_mqd_base_addr; 2804 u32 cp_mqd_base_addr_hi; 2805 u32 cp_hqd_active; 2806 u32 cp_hqd_vmid; 2807 u32 cp_hqd_persistent_state; 2808 u32 cp_hqd_pipe_priority; 2809 u32 cp_hqd_queue_priority; 2810 u32 cp_hqd_quantum; 2811 u32 cp_hqd_pq_base; 2812 u32 cp_hqd_pq_base_hi; 2813 u32 cp_hqd_pq_rptr; 2814 u32 cp_hqd_pq_rptr_report_addr; 2815 u32 cp_hqd_pq_rptr_report_addr_hi; 2816 u32 cp_hqd_pq_wptr_poll_addr; 2817 u32 cp_hqd_pq_wptr_poll_addr_hi; 2818 u32 cp_hqd_pq_doorbell_control; 2819 u32 cp_hqd_pq_wptr; 2820 u32 cp_hqd_pq_control; 2821 u32 cp_hqd_ib_base_addr; 2822 u32 cp_hqd_ib_base_addr_hi; 2823 u32 cp_hqd_ib_rptr; 2824 u32 cp_hqd_ib_control; 2825 u32 cp_hqd_iq_timer; 2826 u32 cp_hqd_iq_rptr; 2827 u32 cp_hqd_dequeue_request; 2828 u32 cp_hqd_dma_offload; 2829 u32 cp_hqd_sema_cmd; 2830 u32 cp_hqd_msg_type; 2831 u32 cp_hqd_atomic0_preop_lo; 2832 u32 cp_hqd_atomic0_preop_hi; 2833 u32 cp_hqd_atomic1_preop_lo; 2834 u32 cp_hqd_atomic1_preop_hi; 2835 u32 cp_hqd_hq_scheduler0; 2836 u32 cp_hqd_hq_scheduler1; 2837 u32 cp_mqd_control; 2838 }; 2839 2840 struct bonaire_mqd 2841 { 2842 u32 header; 2843 u32 dispatch_initiator; 2844 u32 dimensions[3]; 2845 u32 start_idx[3]; 2846 u32 num_threads[3]; 2847 u32 pipeline_stat_enable; 2848 u32 perf_counter_enable; 2849 u32 pgm[2]; 2850 u32 tba[2]; 2851 u32 tma[2]; 2852 u32 pgm_rsrc[2]; 2853 u32 vmid; 2854 u32 resource_limits; 2855 u32 static_thread_mgmt01[2]; 2856 u32 tmp_ring_size; 2857 u32 static_thread_mgmt23[2]; 2858 u32 restart[3]; 2859 u32 thread_trace_enable; 2860 u32 reserved1; 2861 u32 user_data[16]; 2862 u32 vgtcs_invoke_count[2]; 2863 struct hqd_registers queue_state; 2864 u32 dequeue_cntr; 2865 u32 interrupt_queue[64]; 2866 }; 2867 2868 /** 2869 * cik_cp_compute_resume - setup the compute queue registers 2870 * 2871 * @rdev: radeon_device pointer 2872 * 2873 * Program the compute queues and test them to make sure they 2874 * are working. 2875 * Returns 0 for success, error for failure. 2876 */ 2877 static int cik_cp_compute_resume(struct radeon_device *rdev) 2878 { 2879 int r, i, idx; 2880 u32 tmp; 2881 bool use_doorbell = true; 2882 u64 hqd_gpu_addr; 2883 u64 mqd_gpu_addr; 2884 u64 eop_gpu_addr; 2885 u64 wb_gpu_addr; 2886 u32 *buf; 2887 struct bonaire_mqd *mqd; 2888 2889 r = cik_cp_compute_start(rdev); 2890 if (r) 2891 return r; 2892 2893 /* fix up chicken bits */ 2894 tmp = RREG32(CP_CPF_DEBUG); 2895 tmp |= (1 << 23); 2896 WREG32(CP_CPF_DEBUG, tmp); 2897 2898 /* init the pipes */ 2899 spin_lock(&rdev->srbm_mutex); 2900 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { 2901 int me = (i < 4) ? 1 : 2; 2902 int pipe = (i < 4) ? i : (i - 4); 2903 2904 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 2905 2906 cik_srbm_select(rdev, me, pipe, 0, 0); 2907 2908 /* write the EOP addr */ 2909 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 2910 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); 2911 2912 /* set the VMID assigned */ 2913 WREG32(CP_HPD_EOP_VMID, 0); 2914 2915 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2916 tmp = RREG32(CP_HPD_EOP_CONTROL); 2917 tmp &= ~EOP_SIZE_MASK; 2918 tmp |= drm_order(MEC_HPD_SIZE / 8); 2919 WREG32(CP_HPD_EOP_CONTROL, tmp); 2920 } 2921 cik_srbm_select(rdev, 0, 0, 0, 0); 2922 spin_unlock(&rdev->srbm_mutex); 2923 2924 /* init the queues. Just two for now. */ 2925 for (i = 0; i < 2; i++) { 2926 if (i == 0) 2927 idx = CAYMAN_RING_TYPE_CP1_INDEX; 2928 else 2929 idx = CAYMAN_RING_TYPE_CP2_INDEX; 2930 2931 if (rdev->ring[idx].mqd_obj == NULL) { 2932 r = radeon_bo_create(rdev, 2933 sizeof(struct bonaire_mqd), 2934 PAGE_SIZE, true, 2935 RADEON_GEM_DOMAIN_GTT, NULL, 2936 &rdev->ring[idx].mqd_obj); 2937 if (r) { 2938 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); 2939 return r; 2940 } 2941 } 2942 2943 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false); 2944 if (unlikely(r != 0)) { 2945 cik_cp_compute_fini(rdev); 2946 return r; 2947 } 2948 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT, 2949 &mqd_gpu_addr); 2950 if (r) { 2951 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r); 2952 cik_cp_compute_fini(rdev); 2953 return r; 2954 } 2955 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf); 2956 if (r) { 2957 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r); 2958 cik_cp_compute_fini(rdev); 2959 return r; 2960 } 2961 2962 /* doorbell offset */ 2963 rdev->ring[idx].doorbell_offset = 2964 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; 2965 2966 /* init the mqd struct */ 2967 memset(buf, 0, sizeof(struct bonaire_mqd)); 2968 2969 mqd = (struct bonaire_mqd *)buf; 2970 mqd->header = 0xC0310800; 2971 mqd->static_thread_mgmt01[0] = 0xffffffff; 2972 mqd->static_thread_mgmt01[1] = 0xffffffff; 2973 mqd->static_thread_mgmt23[0] = 0xffffffff; 2974 mqd->static_thread_mgmt23[1] = 0xffffffff; 2975 2976 spin_lock(&rdev->srbm_mutex); 2977 cik_srbm_select(rdev, rdev->ring[idx].me, 2978 rdev->ring[idx].pipe, 2979 rdev->ring[idx].queue, 0); 2980 2981 /* disable wptr polling */ 2982 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); 2983 tmp &= ~WPTR_POLL_EN; 2984 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); 2985 2986 /* enable doorbell? */ 2987 mqd->queue_state.cp_hqd_pq_doorbell_control = 2988 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 2989 if (use_doorbell) 2990 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 2991 else 2992 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN; 2993 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 2994 mqd->queue_state.cp_hqd_pq_doorbell_control); 2995 2996 /* disable the queue if it's active */ 2997 mqd->queue_state.cp_hqd_dequeue_request = 0; 2998 mqd->queue_state.cp_hqd_pq_rptr = 0; 2999 mqd->queue_state.cp_hqd_pq_wptr= 0; 3000 if (RREG32(CP_HQD_ACTIVE) & 1) { 3001 WREG32(CP_HQD_DEQUEUE_REQUEST, 1); 3002 for (i = 0; i < rdev->usec_timeout; i++) { 3003 if (!(RREG32(CP_HQD_ACTIVE) & 1)) 3004 break; 3005 udelay(1); 3006 } 3007 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); 3008 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); 3009 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3010 } 3011 3012 /* set the pointer to the MQD */ 3013 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 3014 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3015 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 3016 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 3017 /* set MQD vmid to 0 */ 3018 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL); 3019 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK; 3020 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 3021 3022 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3023 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8; 3024 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 3025 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3026 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 3027 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 3028 3029 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3030 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL); 3031 mqd->queue_state.cp_hqd_pq_control &= 3032 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK); 3033 3034 mqd->queue_state.cp_hqd_pq_control |= 3035 drm_order(rdev->ring[idx].ring_size / 8); 3036 mqd->queue_state.cp_hqd_pq_control |= 3037 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8); 3038 #ifdef __BIG_ENDIAN 3039 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT; 3040 #endif 3041 mqd->queue_state.cp_hqd_pq_control &= 3042 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE); 3043 mqd->queue_state.cp_hqd_pq_control |= 3044 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */ 3045 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); 3046 3047 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */ 3048 if (i == 0) 3049 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET; 3050 else 3051 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET; 3052 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3053 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3054 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); 3055 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI, 3056 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); 3057 3058 /* set the wb address wether it's enabled or not */ 3059 if (i == 0) 3060 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET; 3061 else 3062 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET; 3063 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; 3064 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = 3065 upper_32_bits(wb_gpu_addr) & 0xffff; 3066 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR, 3067 mqd->queue_state.cp_hqd_pq_rptr_report_addr); 3068 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3069 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); 3070 3071 /* enable the doorbell if requested */ 3072 if (use_doorbell) { 3073 mqd->queue_state.cp_hqd_pq_doorbell_control = 3074 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 3075 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 3076 mqd->queue_state.cp_hqd_pq_doorbell_control |= 3077 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 3078 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 3079 mqd->queue_state.cp_hqd_pq_doorbell_control &= 3080 ~(DOORBELL_SOURCE | DOORBELL_HIT); 3081 3082 } else { 3083 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3084 } 3085 WREG32(CP_HQD_PQ_DOORBELL_CONTROL, 3086 mqd->queue_state.cp_hqd_pq_doorbell_control); 3087 3088 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3089 rdev->ring[idx].wptr = 0; 3090 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 3091 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3092 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 3093 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 3094 3095 /* set the vmid for the queue */ 3096 mqd->queue_state.cp_hqd_vmid = 0; 3097 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); 3098 3099 /* activate the queue */ 3100 mqd->queue_state.cp_hqd_active = 1; 3101 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3102 3103 cik_srbm_select(rdev, 0, 0, 0, 0); 3104 spin_unlock(&rdev->srbm_mutex); 3105 3106 radeon_bo_kunmap(rdev->ring[idx].mqd_obj); 3107 radeon_bo_unreserve(rdev->ring[idx].mqd_obj); 3108 3109 rdev->ring[idx].ready = true; 3110 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]); 3111 if (r) 3112 rdev->ring[idx].ready = false; 3113 } 3114 3115 return 0; 3116 } 3117 3118 static void cik_cp_enable(struct radeon_device *rdev, bool enable) 3119 { 3120 cik_cp_gfx_enable(rdev, enable); 3121 cik_cp_compute_enable(rdev, enable); 3122 } 3123 3124 static int cik_cp_load_microcode(struct radeon_device *rdev) 3125 { 3126 int r; 3127 3128 r = cik_cp_gfx_load_microcode(rdev); 3129 if (r) 3130 return r; 3131 r = cik_cp_compute_load_microcode(rdev); 3132 if (r) 3133 return r; 3134 3135 return 0; 3136 } 3137 3138 static void cik_cp_fini(struct radeon_device *rdev) 3139 { 3140 cik_cp_gfx_fini(rdev); 3141 cik_cp_compute_fini(rdev); 3142 } 3143 3144 static int cik_cp_resume(struct radeon_device *rdev) 3145 { 3146 int r; 3147 3148 /* Reset all cp blocks */ 3149 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); 3150 RREG32(GRBM_SOFT_RESET); 3151 mdelay(15); 3152 WREG32(GRBM_SOFT_RESET, 0); 3153 RREG32(GRBM_SOFT_RESET); 3154 3155 r = cik_cp_load_microcode(rdev); 3156 if (r) 3157 return r; 3158 3159 r = cik_cp_gfx_resume(rdev); 3160 if (r) 3161 return r; 3162 r = cik_cp_compute_resume(rdev); 3163 if (r) 3164 return r; 3165 3166 return 0; 3167 } 3168 3169 /* 3170 * sDMA - System DMA 3171 * Starting with CIK, the GPU has new asynchronous 3172 * DMA engines. These engines are used for compute 3173 * and gfx. There are two DMA engines (SDMA0, SDMA1) 3174 * and each one supports 1 ring buffer used for gfx 3175 * and 2 queues used for compute. 3176 * 3177 * The programming model is very similar to the CP 3178 * (ring buffer, IBs, etc.), but sDMA has it's own 3179 * packet format that is different from the PM4 format 3180 * used by the CP. sDMA supports copying data, writing 3181 * embedded data, solid fills, and a number of other 3182 * things. It also has support for tiling/detiling of 3183 * buffers. 3184 */ 3185 /** 3186 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine 3187 * 3188 * @rdev: radeon_device pointer 3189 * @ib: IB object to schedule 3190 * 3191 * Schedule an IB in the DMA ring (CIK). 3192 */ 3193 void cik_sdma_ring_ib_execute(struct radeon_device *rdev, 3194 struct radeon_ib *ib) 3195 { 3196 struct radeon_ring *ring = &rdev->ring[ib->ring]; 3197 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; 3198 3199 if (rdev->wb.enabled) { 3200 u32 next_rptr = ring->wptr + 5; 3201 while ((next_rptr & 7) != 4) 3202 next_rptr++; 3203 next_rptr += 4; 3204 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 3205 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 3206 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 3207 radeon_ring_write(ring, 1); /* number of DWs to follow */ 3208 radeon_ring_write(ring, next_rptr); 3209 } 3210 3211 /* IB packet must end on a 8 DW boundary */ 3212 while ((ring->wptr & 7) != 4) 3213 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 3214 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); 3215 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ 3216 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); 3217 radeon_ring_write(ring, ib->length_dw); 3218 3219 } 3220 3221 /** 3222 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring 3223 * 3224 * @rdev: radeon_device pointer 3225 * @fence: radeon fence object 3226 * 3227 * Add a DMA fence packet to the ring to write 3228 * the fence seq number and DMA trap packet to generate 3229 * an interrupt if needed (CIK). 3230 */ 3231 void cik_sdma_fence_ring_emit(struct radeon_device *rdev, 3232 struct radeon_fence *fence) 3233 { 3234 struct radeon_ring *ring = &rdev->ring[fence->ring]; 3235 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 3236 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 3237 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 3238 u32 ref_and_mask; 3239 3240 if (fence->ring == R600_RING_TYPE_DMA_INDEX) 3241 ref_and_mask = SDMA0; 3242 else 3243 ref_and_mask = SDMA1; 3244 3245 /* write the fence */ 3246 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0)); 3247 radeon_ring_write(ring, addr & 0xffffffff); 3248 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 3249 radeon_ring_write(ring, fence->seq); 3250 /* generate an interrupt */ 3251 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0)); 3252 /* flush HDP */ 3253 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 3254 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 3255 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 3256 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 3257 radeon_ring_write(ring, ref_and_mask); /* MASK */ 3258 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 3259 } 3260 3261 /** 3262 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring 3263 * 3264 * @rdev: radeon_device pointer 3265 * @ring: radeon_ring structure holding ring information 3266 * @semaphore: radeon semaphore object 3267 * @emit_wait: wait or signal semaphore 3268 * 3269 * Add a DMA semaphore packet to the ring wait on or signal 3270 * other rings (CIK). 3271 */ 3272 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, 3273 struct radeon_ring *ring, 3274 struct radeon_semaphore *semaphore, 3275 bool emit_wait) 3276 { 3277 u64 addr = semaphore->gpu_addr; 3278 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; 3279 3280 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); 3281 radeon_ring_write(ring, addr & 0xfffffff8); 3282 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 3283 } 3284 3285 /** 3286 * cik_sdma_gfx_stop - stop the gfx async dma engines 3287 * 3288 * @rdev: radeon_device pointer 3289 * 3290 * Stop the gfx async dma ring buffers (CIK). 3291 */ 3292 static void cik_sdma_gfx_stop(struct radeon_device *rdev) 3293 { 3294 u32 rb_cntl, reg_offset; 3295 int i; 3296 3297 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 3298 3299 for (i = 0; i < 2; i++) { 3300 if (i == 0) 3301 reg_offset = SDMA0_REGISTER_OFFSET; 3302 else 3303 reg_offset = SDMA1_REGISTER_OFFSET; 3304 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset); 3305 rb_cntl &= ~SDMA_RB_ENABLE; 3306 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 3307 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); 3308 } 3309 } 3310 3311 /** 3312 * cik_sdma_rlc_stop - stop the compute async dma engines 3313 * 3314 * @rdev: radeon_device pointer 3315 * 3316 * Stop the compute async dma queues (CIK). 3317 */ 3318 static void cik_sdma_rlc_stop(struct radeon_device *rdev) 3319 { 3320 /* XXX todo */ 3321 } 3322 3323 /** 3324 * cik_sdma_enable - stop the async dma engines 3325 * 3326 * @rdev: radeon_device pointer 3327 * @enable: enable/disable the DMA MEs. 3328 * 3329 * Halt or unhalt the async dma engines (CIK). 3330 */ 3331 static void cik_sdma_enable(struct radeon_device *rdev, bool enable) 3332 { 3333 u32 me_cntl, reg_offset; 3334 int i; 3335 3336 for (i = 0; i < 2; i++) { 3337 if (i == 0) 3338 reg_offset = SDMA0_REGISTER_OFFSET; 3339 else 3340 reg_offset = SDMA1_REGISTER_OFFSET; 3341 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset); 3342 if (enable) 3343 me_cntl &= ~SDMA_HALT; 3344 else 3345 me_cntl |= SDMA_HALT; 3346 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); 3347 } 3348 } 3349 3350 /** 3351 * cik_sdma_gfx_resume - setup and start the async dma engines 3352 * 3353 * @rdev: radeon_device pointer 3354 * 3355 * Set up the gfx DMA ring buffers and enable them (CIK). 3356 * Returns 0 for success, error for failure. 3357 */ 3358 static int cik_sdma_gfx_resume(struct radeon_device *rdev) 3359 { 3360 struct radeon_ring *ring; 3361 u32 rb_cntl, ib_cntl; 3362 u32 rb_bufsz; 3363 u32 reg_offset, wb_offset; 3364 int i, r; 3365 3366 for (i = 0; i < 2; i++) { 3367 if (i == 0) { 3368 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 3369 reg_offset = SDMA0_REGISTER_OFFSET; 3370 wb_offset = R600_WB_DMA_RPTR_OFFSET; 3371 } else { 3372 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 3373 reg_offset = SDMA1_REGISTER_OFFSET; 3374 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 3375 } 3376 3377 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 3378 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 3379 3380 /* Set ring buffer size in dwords */ 3381 rb_bufsz = drm_order(ring->ring_size / 4); 3382 rb_cntl = rb_bufsz << 1; 3383 #ifdef __BIG_ENDIAN 3384 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE; 3385 #endif 3386 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); 3387 3388 /* Initialize the ring buffer's read and write pointers */ 3389 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0); 3390 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0); 3391 3392 /* set the wb address whether it's enabled or not */ 3393 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset, 3394 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 3395 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset, 3396 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 3397 3398 if (rdev->wb.enabled) 3399 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE; 3400 3401 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8); 3402 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40); 3403 3404 ring->wptr = 0; 3405 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); 3406 3407 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; 3408 3409 /* enable DMA RB */ 3410 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); 3411 3412 ib_cntl = SDMA_IB_ENABLE; 3413 #ifdef __BIG_ENDIAN 3414 ib_cntl |= SDMA_IB_SWAP_ENABLE; 3415 #endif 3416 /* enable DMA IBs */ 3417 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl); 3418 3419 ring->ready = true; 3420 3421 r = radeon_ring_test(rdev, ring->idx, ring); 3422 if (r) { 3423 ring->ready = false; 3424 return r; 3425 } 3426 } 3427 3428 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 3429 3430 return 0; 3431 } 3432 3433 /** 3434 * cik_sdma_rlc_resume - setup and start the async dma engines 3435 * 3436 * @rdev: radeon_device pointer 3437 * 3438 * Set up the compute DMA queues and enable them (CIK). 3439 * Returns 0 for success, error for failure. 3440 */ 3441 static int cik_sdma_rlc_resume(struct radeon_device *rdev) 3442 { 3443 /* XXX todo */ 3444 return 0; 3445 } 3446 3447 /** 3448 * cik_sdma_load_microcode - load the sDMA ME ucode 3449 * 3450 * @rdev: radeon_device pointer 3451 * 3452 * Loads the sDMA0/1 ucode. 3453 * Returns 0 for success, -EINVAL if the ucode is not available. 3454 */ 3455 static int cik_sdma_load_microcode(struct radeon_device *rdev) 3456 { 3457 const __be32 *fw_data; 3458 int i; 3459 3460 if (!rdev->sdma_fw) 3461 return -EINVAL; 3462 3463 /* stop the gfx rings and rlc compute queues */ 3464 cik_sdma_gfx_stop(rdev); 3465 cik_sdma_rlc_stop(rdev); 3466 3467 /* halt the MEs */ 3468 cik_sdma_enable(rdev, false); 3469 3470 /* sdma0 */ 3471 fw_data = (const __be32 *)rdev->sdma_fw->data; 3472 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 3473 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 3474 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 3475 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 3476 3477 /* sdma1 */ 3478 fw_data = (const __be32 *)rdev->sdma_fw->data; 3479 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 3480 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++) 3481 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++)); 3482 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION); 3483 3484 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0); 3485 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0); 3486 return 0; 3487 } 3488 3489 /** 3490 * cik_sdma_resume - setup and start the async dma engines 3491 * 3492 * @rdev: radeon_device pointer 3493 * 3494 * Set up the DMA engines and enable them (CIK). 3495 * Returns 0 for success, error for failure. 3496 */ 3497 static __unused int cik_sdma_resume(struct radeon_device *rdev) 3498 { 3499 int r; 3500 3501 /* Reset dma */ 3502 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); 3503 RREG32(SRBM_SOFT_RESET); 3504 udelay(50); 3505 WREG32(SRBM_SOFT_RESET, 0); 3506 RREG32(SRBM_SOFT_RESET); 3507 3508 r = cik_sdma_load_microcode(rdev); 3509 if (r) 3510 return r; 3511 3512 /* unhalt the MEs */ 3513 cik_sdma_enable(rdev, true); 3514 3515 /* start the gfx rings and rlc compute queues */ 3516 r = cik_sdma_gfx_resume(rdev); 3517 if (r) 3518 return r; 3519 r = cik_sdma_rlc_resume(rdev); 3520 if (r) 3521 return r; 3522 3523 return 0; 3524 } 3525 3526 /** 3527 * cik_sdma_fini - tear down the async dma engines 3528 * 3529 * @rdev: radeon_device pointer 3530 * 3531 * Stop the async dma engines and free the rings (CIK). 3532 */ 3533 static __unused void cik_sdma_fini(struct radeon_device *rdev) 3534 { 3535 /* stop the gfx rings and rlc compute queues */ 3536 cik_sdma_gfx_stop(rdev); 3537 cik_sdma_rlc_stop(rdev); 3538 /* halt the MEs */ 3539 cik_sdma_enable(rdev, false); 3540 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 3541 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 3542 /* XXX - compute dma queue tear down */ 3543 } 3544 3545 /** 3546 * cik_copy_dma - copy pages using the DMA engine 3547 * 3548 * @rdev: radeon_device pointer 3549 * @src_offset: src GPU address 3550 * @dst_offset: dst GPU address 3551 * @num_gpu_pages: number of GPU pages to xfer 3552 * @fence: radeon fence object 3553 * 3554 * Copy GPU paging using the DMA engine (CIK). 3555 * Used by the radeon ttm implementation to move pages if 3556 * registered as the asic copy callback. 3557 */ 3558 int cik_copy_dma(struct radeon_device *rdev, 3559 uint64_t src_offset, uint64_t dst_offset, 3560 unsigned num_gpu_pages, 3561 struct radeon_fence **fence) 3562 { 3563 struct radeon_semaphore *sem = NULL; 3564 int ring_index = rdev->asic->copy.dma_ring_index; 3565 struct radeon_ring *ring = &rdev->ring[ring_index]; 3566 u32 size_in_bytes, cur_size_in_bytes; 3567 int i, num_loops; 3568 int r = 0; 3569 3570 r = radeon_semaphore_create(rdev, &sem); 3571 if (r) { 3572 DRM_ERROR("radeon: moving bo (%d).\n", r); 3573 return r; 3574 } 3575 3576 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); 3577 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); 3578 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); 3579 if (r) { 3580 DRM_ERROR("radeon: moving bo (%d).\n", r); 3581 radeon_semaphore_free(rdev, &sem, NULL); 3582 return r; 3583 } 3584 3585 if (radeon_fence_need_sync(*fence, ring->idx)) { 3586 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 3587 ring->idx); 3588 radeon_fence_note_sync(*fence, ring->idx); 3589 } else { 3590 radeon_semaphore_free(rdev, &sem, NULL); 3591 } 3592 3593 for (i = 0; i < num_loops; i++) { 3594 cur_size_in_bytes = size_in_bytes; 3595 if (cur_size_in_bytes > 0x1fffff) 3596 cur_size_in_bytes = 0x1fffff; 3597 size_in_bytes -= cur_size_in_bytes; 3598 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); 3599 radeon_ring_write(ring, cur_size_in_bytes); 3600 radeon_ring_write(ring, 0); /* src/dst endian swap */ 3601 radeon_ring_write(ring, src_offset & 0xffffffff); 3602 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff); 3603 radeon_ring_write(ring, dst_offset & 0xfffffffc); 3604 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff); 3605 src_offset += cur_size_in_bytes; 3606 dst_offset += cur_size_in_bytes; 3607 } 3608 3609 r = radeon_fence_emit(rdev, fence, ring->idx); 3610 if (r) { 3611 radeon_ring_unlock_undo(rdev, ring); 3612 return r; 3613 } 3614 3615 radeon_ring_unlock_commit(rdev, ring); 3616 radeon_semaphore_free(rdev, &sem, *fence); 3617 3618 return r; 3619 } 3620 3621 /** 3622 * cik_sdma_ring_test - simple async dma engine test 3623 * 3624 * @rdev: radeon_device pointer 3625 * @ring: radeon_ring structure holding ring information 3626 * 3627 * Test the DMA engine by writing using it to write an 3628 * value to memory. (CIK). 3629 * Returns 0 for success, error for failure. 3630 */ 3631 int cik_sdma_ring_test(struct radeon_device *rdev, 3632 struct radeon_ring *ring) 3633 { 3634 unsigned i; 3635 int r; 3636 volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; 3637 u32 tmp; 3638 3639 if (!ptr) { 3640 DRM_ERROR("invalid vram scratch pointer\n"); 3641 return -EINVAL; 3642 } 3643 3644 tmp = 0xCAFEDEAD; 3645 writel(tmp, ptr); 3646 3647 r = radeon_ring_lock(rdev, ring, 4); 3648 if (r) { 3649 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); 3650 return r; 3651 } 3652 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 3653 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); 3654 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff); 3655 radeon_ring_write(ring, 1); /* number of DWs to follow */ 3656 radeon_ring_write(ring, 0xDEADBEEF); 3657 radeon_ring_unlock_commit(rdev, ring); 3658 3659 for (i = 0; i < rdev->usec_timeout; i++) { 3660 tmp = readl(ptr); 3661 if (tmp == 0xDEADBEEF) 3662 break; 3663 DRM_UDELAY(1); 3664 } 3665 3666 if (i < rdev->usec_timeout) { 3667 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 3668 } else { 3669 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", 3670 ring->idx, tmp); 3671 r = -EINVAL; 3672 } 3673 return r; 3674 } 3675 3676 /** 3677 * cik_sdma_ib_test - test an IB on the DMA engine 3678 * 3679 * @rdev: radeon_device pointer 3680 * @ring: radeon_ring structure holding ring information 3681 * 3682 * Test a simple IB in the DMA ring (CIK). 3683 * Returns 0 on success, error on failure. 3684 */ 3685 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3686 { 3687 struct radeon_ib ib; 3688 unsigned i; 3689 int r; 3690 volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr; 3691 u32 tmp = 0; 3692 3693 if (!ptr) { 3694 DRM_ERROR("invalid vram scratch pointer\n"); 3695 return -EINVAL; 3696 } 3697 3698 tmp = 0xCAFEDEAD; 3699 writel(tmp, ptr); 3700 3701 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3702 if (r) { 3703 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3704 return r; 3705 } 3706 3707 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 3708 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; 3709 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff; 3710 ib.ptr[3] = 1; 3711 ib.ptr[4] = 0xDEADBEEF; 3712 ib.length_dw = 5; 3713 3714 r = radeon_ib_schedule(rdev, &ib, NULL); 3715 if (r) { 3716 radeon_ib_free(rdev, &ib); 3717 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3718 return r; 3719 } 3720 r = radeon_fence_wait(ib.fence, false); 3721 if (r) { 3722 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3723 return r; 3724 } 3725 for (i = 0; i < rdev->usec_timeout; i++) { 3726 tmp = readl(ptr); 3727 if (tmp == 0xDEADBEEF) 3728 break; 3729 DRM_UDELAY(1); 3730 } 3731 if (i < rdev->usec_timeout) { 3732 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3733 } else { 3734 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); 3735 r = -EINVAL; 3736 } 3737 radeon_ib_free(rdev, &ib); 3738 return r; 3739 } 3740 3741 3742 static void cik_print_gpu_status_regs(struct radeon_device *rdev) 3743 { 3744 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 3745 RREG32(GRBM_STATUS)); 3746 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n", 3747 RREG32(GRBM_STATUS2)); 3748 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 3749 RREG32(GRBM_STATUS_SE0)); 3750 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n", 3751 RREG32(GRBM_STATUS_SE1)); 3752 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n", 3753 RREG32(GRBM_STATUS_SE2)); 3754 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n", 3755 RREG32(GRBM_STATUS_SE3)); 3756 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 3757 RREG32(SRBM_STATUS)); 3758 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n", 3759 RREG32(SRBM_STATUS2)); 3760 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 3761 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 3762 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 3763 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 3764 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT)); 3765 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 3766 RREG32(CP_STALLED_STAT1)); 3767 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 3768 RREG32(CP_STALLED_STAT2)); 3769 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 3770 RREG32(CP_STALLED_STAT3)); 3771 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 3772 RREG32(CP_CPF_BUSY_STAT)); 3773 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 3774 RREG32(CP_CPF_STALLED_STAT1)); 3775 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS)); 3776 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT)); 3777 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 3778 RREG32(CP_CPC_STALLED_STAT1)); 3779 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS)); 3780 } 3781 3782 /** 3783 * cik_gpu_check_soft_reset - check which blocks are busy 3784 * 3785 * @rdev: radeon_device pointer 3786 * 3787 * Check which blocks are busy and return the relevant reset 3788 * mask to be used by cik_gpu_soft_reset(). 3789 * Returns a mask of the blocks to be reset. 3790 */ 3791 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev) 3792 { 3793 u32 reset_mask = 0; 3794 u32 tmp; 3795 3796 /* GRBM_STATUS */ 3797 tmp = RREG32(GRBM_STATUS); 3798 if (tmp & (PA_BUSY | SC_BUSY | 3799 BCI_BUSY | SX_BUSY | 3800 TA_BUSY | VGT_BUSY | 3801 DB_BUSY | CB_BUSY | 3802 GDS_BUSY | SPI_BUSY | 3803 IA_BUSY | IA_BUSY_NO_DMA)) 3804 reset_mask |= RADEON_RESET_GFX; 3805 3806 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY)) 3807 reset_mask |= RADEON_RESET_CP; 3808 3809 /* GRBM_STATUS2 */ 3810 tmp = RREG32(GRBM_STATUS2); 3811 if (tmp & RLC_BUSY) 3812 reset_mask |= RADEON_RESET_RLC; 3813 3814 /* SDMA0_STATUS_REG */ 3815 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); 3816 if (!(tmp & SDMA_IDLE)) 3817 reset_mask |= RADEON_RESET_DMA; 3818 3819 /* SDMA1_STATUS_REG */ 3820 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 3821 if (!(tmp & SDMA_IDLE)) 3822 reset_mask |= RADEON_RESET_DMA1; 3823 3824 /* SRBM_STATUS2 */ 3825 tmp = RREG32(SRBM_STATUS2); 3826 if (tmp & SDMA_BUSY) 3827 reset_mask |= RADEON_RESET_DMA; 3828 3829 if (tmp & SDMA1_BUSY) 3830 reset_mask |= RADEON_RESET_DMA1; 3831 3832 /* SRBM_STATUS */ 3833 tmp = RREG32(SRBM_STATUS); 3834 3835 if (tmp & IH_BUSY) 3836 reset_mask |= RADEON_RESET_IH; 3837 3838 if (tmp & SEM_BUSY) 3839 reset_mask |= RADEON_RESET_SEM; 3840 3841 if (tmp & GRBM_RQ_PENDING) 3842 reset_mask |= RADEON_RESET_GRBM; 3843 3844 if (tmp & VMC_BUSY) 3845 reset_mask |= RADEON_RESET_VMC; 3846 3847 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY | 3848 MCC_BUSY | MCD_BUSY)) 3849 reset_mask |= RADEON_RESET_MC; 3850 3851 if (evergreen_is_display_hung(rdev)) 3852 reset_mask |= RADEON_RESET_DISPLAY; 3853 3854 /* Skip MC reset as it's mostly likely not hung, just busy */ 3855 if (reset_mask & RADEON_RESET_MC) { 3856 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); 3857 reset_mask &= ~RADEON_RESET_MC; 3858 } 3859 3860 return reset_mask; 3861 } 3862 3863 /** 3864 * cik_gpu_soft_reset - soft reset GPU 3865 * 3866 * @rdev: radeon_device pointer 3867 * @reset_mask: mask of which blocks to reset 3868 * 3869 * Soft reset the blocks specified in @reset_mask. 3870 */ 3871 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask) 3872 { 3873 struct evergreen_mc_save save; 3874 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 3875 u32 tmp; 3876 3877 if (reset_mask == 0) 3878 return; 3879 3880 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask); 3881 3882 cik_print_gpu_status_regs(rdev); 3883 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 3884 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); 3885 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 3886 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); 3887 3888 /* stop the rlc */ 3889 cik_rlc_stop(rdev); 3890 3891 /* Disable GFX parsing/prefetching */ 3892 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT); 3893 3894 /* Disable MEC parsing/prefetching */ 3895 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT); 3896 3897 if (reset_mask & RADEON_RESET_DMA) { 3898 /* sdma0 */ 3899 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET); 3900 tmp |= SDMA_HALT; 3901 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp); 3902 } 3903 if (reset_mask & RADEON_RESET_DMA1) { 3904 /* sdma1 */ 3905 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET); 3906 tmp |= SDMA_HALT; 3907 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp); 3908 } 3909 3910 evergreen_mc_stop(rdev, &save); 3911 if (evergreen_mc_wait_for_idle(rdev)) { 3912 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 3913 } 3914 3915 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) 3916 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX; 3917 3918 if (reset_mask & RADEON_RESET_CP) { 3919 grbm_soft_reset |= SOFT_RESET_CP; 3920 3921 srbm_soft_reset |= SOFT_RESET_GRBM; 3922 } 3923 3924 if (reset_mask & RADEON_RESET_DMA) 3925 srbm_soft_reset |= SOFT_RESET_SDMA; 3926 3927 if (reset_mask & RADEON_RESET_DMA1) 3928 srbm_soft_reset |= SOFT_RESET_SDMA1; 3929 3930 if (reset_mask & RADEON_RESET_DISPLAY) 3931 srbm_soft_reset |= SOFT_RESET_DC; 3932 3933 if (reset_mask & RADEON_RESET_RLC) 3934 grbm_soft_reset |= SOFT_RESET_RLC; 3935 3936 if (reset_mask & RADEON_RESET_SEM) 3937 srbm_soft_reset |= SOFT_RESET_SEM; 3938 3939 if (reset_mask & RADEON_RESET_IH) 3940 srbm_soft_reset |= SOFT_RESET_IH; 3941 3942 if (reset_mask & RADEON_RESET_GRBM) 3943 srbm_soft_reset |= SOFT_RESET_GRBM; 3944 3945 if (reset_mask & RADEON_RESET_VMC) 3946 srbm_soft_reset |= SOFT_RESET_VMC; 3947 3948 if (!(rdev->flags & RADEON_IS_IGP)) { 3949 if (reset_mask & RADEON_RESET_MC) 3950 srbm_soft_reset |= SOFT_RESET_MC; 3951 } 3952 3953 if (grbm_soft_reset) { 3954 tmp = RREG32(GRBM_SOFT_RESET); 3955 tmp |= grbm_soft_reset; 3956 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3957 WREG32(GRBM_SOFT_RESET, tmp); 3958 tmp = RREG32(GRBM_SOFT_RESET); 3959 3960 udelay(50); 3961 3962 tmp &= ~grbm_soft_reset; 3963 WREG32(GRBM_SOFT_RESET, tmp); 3964 tmp = RREG32(GRBM_SOFT_RESET); 3965 } 3966 3967 if (srbm_soft_reset) { 3968 tmp = RREG32(SRBM_SOFT_RESET); 3969 tmp |= srbm_soft_reset; 3970 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 3971 WREG32(SRBM_SOFT_RESET, tmp); 3972 tmp = RREG32(SRBM_SOFT_RESET); 3973 3974 udelay(50); 3975 3976 tmp &= ~srbm_soft_reset; 3977 WREG32(SRBM_SOFT_RESET, tmp); 3978 tmp = RREG32(SRBM_SOFT_RESET); 3979 } 3980 3981 /* Wait a little for things to settle down */ 3982 udelay(50); 3983 3984 evergreen_mc_resume(rdev, &save); 3985 udelay(50); 3986 3987 cik_print_gpu_status_regs(rdev); 3988 } 3989 3990 /** 3991 * cik_asic_reset - soft reset GPU 3992 * 3993 * @rdev: radeon_device pointer 3994 * 3995 * Look up which blocks are hung and attempt 3996 * to reset them. 3997 * Returns 0 for success. 3998 */ 3999 int cik_asic_reset(struct radeon_device *rdev) 4000 { 4001 u32 reset_mask; 4002 4003 reset_mask = cik_gpu_check_soft_reset(rdev); 4004 4005 if (reset_mask) 4006 r600_set_bios_scratch_engine_hung(rdev, true); 4007 4008 cik_gpu_soft_reset(rdev, reset_mask); 4009 4010 reset_mask = cik_gpu_check_soft_reset(rdev); 4011 4012 if (!reset_mask) 4013 r600_set_bios_scratch_engine_hung(rdev, false); 4014 4015 return 0; 4016 } 4017 4018 /** 4019 * cik_gfx_is_lockup - check if the 3D engine is locked up 4020 * 4021 * @rdev: radeon_device pointer 4022 * @ring: radeon_ring structure holding ring information 4023 * 4024 * Check if the 3D engine is locked up (CIK). 4025 * Returns true if the engine is locked, false if not. 4026 */ 4027 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4028 { 4029 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4030 4031 if (!(reset_mask & (RADEON_RESET_GFX | 4032 RADEON_RESET_COMPUTE | 4033 RADEON_RESET_CP))) { 4034 radeon_ring_lockup_update(ring); 4035 return false; 4036 } 4037 /* force CP activities */ 4038 radeon_ring_force_activity(rdev, ring); 4039 return radeon_ring_test_lockup(rdev, ring); 4040 } 4041 4042 /** 4043 * cik_sdma_is_lockup - Check if the DMA engine is locked up 4044 * 4045 * @rdev: radeon_device pointer 4046 * @ring: radeon_ring structure holding ring information 4047 * 4048 * Check if the async DMA engine is locked up (CIK). 4049 * Returns true if the engine appears to be locked up, false if not. 4050 */ 4051 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 4052 { 4053 u32 reset_mask = cik_gpu_check_soft_reset(rdev); 4054 u32 mask; 4055 4056 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 4057 mask = RADEON_RESET_DMA; 4058 else 4059 mask = RADEON_RESET_DMA1; 4060 4061 if (!(reset_mask & mask)) { 4062 radeon_ring_lockup_update(ring); 4063 return false; 4064 } 4065 /* force ring activities */ 4066 radeon_ring_force_activity(rdev, ring); 4067 return radeon_ring_test_lockup(rdev, ring); 4068 } 4069 4070 /* MC */ 4071 /** 4072 * cik_mc_program - program the GPU memory controller 4073 * 4074 * @rdev: radeon_device pointer 4075 * 4076 * Set the location of vram, gart, and AGP in the GPU's 4077 * physical address space (CIK). 4078 */ 4079 static __unused void cik_mc_program(struct radeon_device *rdev) 4080 { 4081 struct evergreen_mc_save save; 4082 u32 tmp; 4083 int i, j; 4084 4085 /* Initialize HDP */ 4086 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 4087 WREG32((0x2c14 + j), 0x00000000); 4088 WREG32((0x2c18 + j), 0x00000000); 4089 WREG32((0x2c1c + j), 0x00000000); 4090 WREG32((0x2c20 + j), 0x00000000); 4091 WREG32((0x2c24 + j), 0x00000000); 4092 } 4093 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); 4094 4095 evergreen_mc_stop(rdev, &save); 4096 if (radeon_mc_wait_for_idle(rdev)) { 4097 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4098 } 4099 /* Lockout access through VGA aperture*/ 4100 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 4101 /* Update configuration */ 4102 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 4103 rdev->mc.vram_start >> 12); 4104 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 4105 rdev->mc.vram_end >> 12); 4106 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 4107 rdev->vram_scratch.gpu_addr >> 12); 4108 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 4109 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 4110 WREG32(MC_VM_FB_LOCATION, tmp); 4111 /* XXX double check these! */ 4112 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 4113 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 4114 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 4115 WREG32(MC_VM_AGP_BASE, 0); 4116 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 4117 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 4118 if (radeon_mc_wait_for_idle(rdev)) { 4119 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 4120 } 4121 evergreen_mc_resume(rdev, &save); 4122 /* we need to own VRAM, so turn off the VGA renderer here 4123 * to stop it overwriting our objects */ 4124 rv515_vga_render_disable(rdev); 4125 } 4126 4127 /** 4128 * cik_mc_init - initialize the memory controller driver params 4129 * 4130 * @rdev: radeon_device pointer 4131 * 4132 * Look up the amount of vram, vram width, and decide how to place 4133 * vram and gart within the GPU's physical address space (CIK). 4134 * Returns 0 for success. 4135 */ 4136 static __unused int cik_mc_init(struct radeon_device *rdev) 4137 { 4138 u32 tmp; 4139 int chansize, numchan; 4140 4141 /* Get VRAM informations */ 4142 rdev->mc.vram_is_ddr = true; 4143 tmp = RREG32(MC_ARB_RAMCFG); 4144 if (tmp & CHANSIZE_MASK) { 4145 chansize = 64; 4146 } else { 4147 chansize = 32; 4148 } 4149 tmp = RREG32(MC_SHARED_CHMAP); 4150 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 4151 case 0: 4152 default: 4153 numchan = 1; 4154 break; 4155 case 1: 4156 numchan = 2; 4157 break; 4158 case 2: 4159 numchan = 4; 4160 break; 4161 case 3: 4162 numchan = 8; 4163 break; 4164 case 4: 4165 numchan = 3; 4166 break; 4167 case 5: 4168 numchan = 6; 4169 break; 4170 case 6: 4171 numchan = 10; 4172 break; 4173 case 7: 4174 numchan = 12; 4175 break; 4176 case 8: 4177 numchan = 16; 4178 break; 4179 } 4180 rdev->mc.vram_width = numchan * chansize; 4181 /* Could aper size report 0 ? */ 4182 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 4183 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 4184 /* size in MB on si */ 4185 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 4186 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 4187 rdev->mc.visible_vram_size = rdev->mc.aper_size; 4188 si_vram_gtt_location(rdev, &rdev->mc); 4189 radeon_update_bandwidth_info(rdev); 4190 4191 return 0; 4192 } 4193 4194 /* 4195 * GART 4196 * VMID 0 is the physical GPU addresses as used by the kernel. 4197 * VMIDs 1-15 are used for userspace clients and are handled 4198 * by the radeon vm/hsa code. 4199 */ 4200 /** 4201 * cik_pcie_gart_tlb_flush - gart tlb flush callback 4202 * 4203 * @rdev: radeon_device pointer 4204 * 4205 * Flush the TLB for the VMID 0 page table (CIK). 4206 */ 4207 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev) 4208 { 4209 /* flush hdp cache */ 4210 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 4211 4212 /* bits 0-15 are the VM contexts0-15 */ 4213 WREG32(VM_INVALIDATE_REQUEST, 0x1); 4214 } 4215 4216 /** 4217 * cik_pcie_gart_enable - gart enable 4218 * 4219 * @rdev: radeon_device pointer 4220 * 4221 * This sets up the TLBs, programs the page tables for VMID0, 4222 * sets up the hw for VMIDs 1-15 which are allocated on 4223 * demand, and sets up the global locations for the LDS, GDS, 4224 * and GPUVM for FSA64 clients (CIK). 4225 * Returns 0 for success, errors for failure. 4226 */ 4227 static __unused int cik_pcie_gart_enable(struct radeon_device *rdev) 4228 { 4229 int r, i; 4230 4231 if (rdev->gart.robj == NULL) { 4232 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 4233 return -EINVAL; 4234 } 4235 r = radeon_gart_table_vram_pin(rdev); 4236 if (r) 4237 return r; 4238 radeon_gart_restore(rdev); 4239 /* Setup TLB control */ 4240 WREG32(MC_VM_MX_L1_TLB_CNTL, 4241 (0xA << 7) | 4242 ENABLE_L1_TLB | 4243 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4244 ENABLE_ADVANCED_DRIVER_MODEL | 4245 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4246 /* Setup L2 cache */ 4247 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 4248 ENABLE_L2_FRAGMENT_PROCESSING | 4249 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4250 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4251 EFFECTIVE_L2_QUEUE_SIZE(7) | 4252 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4253 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE); 4254 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4255 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4256 /* setup context0 */ 4257 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 4258 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 4259 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 4260 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 4261 (u32)(rdev->dummy_page.addr >> 12)); 4262 WREG32(VM_CONTEXT0_CNTL2, 0); 4263 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 4264 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT)); 4265 4266 WREG32(0x15D4, 0); 4267 WREG32(0x15D8, 0); 4268 WREG32(0x15DC, 0); 4269 4270 /* empty context1-15 */ 4271 /* FIXME start with 4G, once using 2 level pt switch to full 4272 * vm size space 4273 */ 4274 /* set vm size, must be a multiple of 4 */ 4275 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); 4276 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); 4277 for (i = 1; i < 16; i++) { 4278 if (i < 8) 4279 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), 4280 rdev->gart.table_addr >> 12); 4281 else 4282 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), 4283 rdev->gart.table_addr >> 12); 4284 } 4285 4286 /* enable context1-15 */ 4287 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 4288 (u32)(rdev->dummy_page.addr >> 12)); 4289 WREG32(VM_CONTEXT1_CNTL2, 4); 4290 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 4291 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4292 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4293 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4294 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT | 4295 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT | 4296 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT | 4297 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT | 4298 VALID_PROTECTION_FAULT_ENABLE_DEFAULT | 4299 READ_PROTECTION_FAULT_ENABLE_INTERRUPT | 4300 READ_PROTECTION_FAULT_ENABLE_DEFAULT | 4301 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT | 4302 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT); 4303 4304 /* TC cache setup ??? */ 4305 WREG32(TC_CFG_L1_LOAD_POLICY0, 0); 4306 WREG32(TC_CFG_L1_LOAD_POLICY1, 0); 4307 WREG32(TC_CFG_L1_STORE_POLICY, 0); 4308 4309 WREG32(TC_CFG_L2_LOAD_POLICY0, 0); 4310 WREG32(TC_CFG_L2_LOAD_POLICY1, 0); 4311 WREG32(TC_CFG_L2_STORE_POLICY0, 0); 4312 WREG32(TC_CFG_L2_STORE_POLICY1, 0); 4313 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0); 4314 4315 WREG32(TC_CFG_L1_VOLATILE, 0); 4316 WREG32(TC_CFG_L2_VOLATILE, 0); 4317 4318 if (rdev->family == CHIP_KAVERI) { 4319 u32 tmp = RREG32(CHUB_CONTROL); 4320 tmp &= ~BYPASS_VM; 4321 WREG32(CHUB_CONTROL, tmp); 4322 } 4323 4324 /* XXX SH_MEM regs */ 4325 /* where to put LDS, scratch, GPUVM in FSA64 space */ 4326 spin_lock(&rdev->srbm_mutex); 4327 for (i = 0; i < 16; i++) { 4328 cik_srbm_select(rdev, 0, 0, 0, i); 4329 /* CP and shaders */ 4330 WREG32(SH_MEM_CONFIG, 0); 4331 WREG32(SH_MEM_APE1_BASE, 1); 4332 WREG32(SH_MEM_APE1_LIMIT, 0); 4333 WREG32(SH_MEM_BASES, 0); 4334 /* SDMA GFX */ 4335 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0); 4336 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0); 4337 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0); 4338 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0); 4339 /* XXX SDMA RLC - todo */ 4340 } 4341 cik_srbm_select(rdev, 0, 0, 0, 0); 4342 spin_unlock(&rdev->srbm_mutex); 4343 4344 cik_pcie_gart_tlb_flush(rdev); 4345 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 4346 (unsigned)(rdev->mc.gtt_size >> 20), 4347 (unsigned long long)rdev->gart.table_addr); 4348 rdev->gart.ready = true; 4349 return 0; 4350 } 4351 4352 /** 4353 * cik_pcie_gart_disable - gart disable 4354 * 4355 * @rdev: radeon_device pointer 4356 * 4357 * This disables all VM page table (CIK). 4358 */ 4359 static void cik_pcie_gart_disable(struct radeon_device *rdev) 4360 { 4361 /* Disable all tables */ 4362 WREG32(VM_CONTEXT0_CNTL, 0); 4363 WREG32(VM_CONTEXT1_CNTL, 0); 4364 /* Setup TLB control */ 4365 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS | 4366 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 4367 /* Setup L2 cache */ 4368 WREG32(VM_L2_CNTL, 4369 ENABLE_L2_FRAGMENT_PROCESSING | 4370 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 4371 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE | 4372 EFFECTIVE_L2_QUEUE_SIZE(7) | 4373 CONTEXT1_IDENTITY_ACCESS_MODE(1)); 4374 WREG32(VM_L2_CNTL2, 0); 4375 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY | 4376 L2_CACHE_BIGK_FRAGMENT_SIZE(6)); 4377 radeon_gart_table_vram_unpin(rdev); 4378 } 4379 4380 /** 4381 * cik_pcie_gart_fini - vm fini callback 4382 * 4383 * @rdev: radeon_device pointer 4384 * 4385 * Tears down the driver GART/VM setup (CIK). 4386 */ 4387 static __unused void cik_pcie_gart_fini(struct radeon_device *rdev) 4388 { 4389 cik_pcie_gart_disable(rdev); 4390 radeon_gart_table_vram_free(rdev); 4391 radeon_gart_fini(rdev); 4392 } 4393 4394 /* vm parser */ 4395 /** 4396 * cik_ib_parse - vm ib_parse callback 4397 * 4398 * @rdev: radeon_device pointer 4399 * @ib: indirect buffer pointer 4400 * 4401 * CIK uses hw IB checking so this is a nop (CIK). 4402 */ 4403 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 4404 { 4405 return 0; 4406 } 4407 4408 /* 4409 * vm 4410 * VMID 0 is the physical GPU addresses as used by the kernel. 4411 * VMIDs 1-15 are used for userspace clients and are handled 4412 * by the radeon vm/hsa code. 4413 */ 4414 /** 4415 * cik_vm_init - cik vm init callback 4416 * 4417 * @rdev: radeon_device pointer 4418 * 4419 * Inits cik specific vm parameters (number of VMs, base of vram for 4420 * VMIDs 1-15) (CIK). 4421 * Returns 0 for success. 4422 */ 4423 int cik_vm_init(struct radeon_device *rdev) 4424 { 4425 /* number of VMs */ 4426 rdev->vm_manager.nvm = 16; 4427 /* base offset of vram pages */ 4428 if (rdev->flags & RADEON_IS_IGP) { 4429 u64 tmp = RREG32(MC_VM_FB_OFFSET); 4430 tmp <<= 22; 4431 rdev->vm_manager.vram_base_offset = tmp; 4432 } else 4433 rdev->vm_manager.vram_base_offset = 0; 4434 4435 return 0; 4436 } 4437 4438 /** 4439 * cik_vm_fini - cik vm fini callback 4440 * 4441 * @rdev: radeon_device pointer 4442 * 4443 * Tear down any asic specific VM setup (CIK). 4444 */ 4445 void cik_vm_fini(struct radeon_device *rdev) 4446 { 4447 } 4448 4449 /** 4450 * cik_vm_decode_fault - print human readable fault info 4451 * 4452 * @rdev: radeon_device pointer 4453 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 4454 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 4455 * 4456 * Print human readable fault information (CIK). 4457 */ 4458 static void cik_vm_decode_fault(struct radeon_device *rdev, 4459 u32 status, u32 addr, u32 mc_client) 4460 { 4461 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT; 4462 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT; 4463 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT; 4464 char *block = (char *)&mc_client; 4465 4466 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n", 4467 protections, vmid, addr, 4468 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read", 4469 block, mc_id); 4470 } 4471 4472 /** 4473 * cik_vm_flush - cik vm flush using the CP 4474 * 4475 * @rdev: radeon_device pointer 4476 * 4477 * Update the page table base and flush the VM TLB 4478 * using the CP (CIK). 4479 */ 4480 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4481 { 4482 struct radeon_ring *ring = &rdev->ring[ridx]; 4483 4484 if (vm == NULL) 4485 return; 4486 4487 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4488 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4489 WRITE_DATA_DST_SEL(0))); 4490 if (vm->id < 8) { 4491 radeon_ring_write(ring, 4492 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4493 } else { 4494 radeon_ring_write(ring, 4495 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4496 } 4497 radeon_ring_write(ring, 0); 4498 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4499 4500 /* update SH_MEM_* regs */ 4501 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4502 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4503 WRITE_DATA_DST_SEL(0))); 4504 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4505 radeon_ring_write(ring, 0); 4506 radeon_ring_write(ring, VMID(vm->id)); 4507 4508 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); 4509 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4510 WRITE_DATA_DST_SEL(0))); 4511 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4512 radeon_ring_write(ring, 0); 4513 4514 radeon_ring_write(ring, 0); /* SH_MEM_BASES */ 4515 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */ 4516 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */ 4517 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ 4518 4519 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4520 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4521 WRITE_DATA_DST_SEL(0))); 4522 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4523 radeon_ring_write(ring, 0); 4524 radeon_ring_write(ring, VMID(0)); 4525 4526 /* HDP flush */ 4527 /* We should be using the WAIT_REG_MEM packet here like in 4528 * cik_fence_ring_emit(), but it causes the CP to hang in this 4529 * context... 4530 */ 4531 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4532 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4533 WRITE_DATA_DST_SEL(0))); 4534 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); 4535 radeon_ring_write(ring, 0); 4536 radeon_ring_write(ring, 0); 4537 4538 /* bits 0-15 are the VM contexts0-15 */ 4539 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4540 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4541 WRITE_DATA_DST_SEL(0))); 4542 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4543 radeon_ring_write(ring, 0); 4544 radeon_ring_write(ring, 1 << vm->id); 4545 4546 /* compute doesn't have PFP */ 4547 if (ridx == RADEON_RING_TYPE_GFX_INDEX) { 4548 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4549 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4550 radeon_ring_write(ring, 0x0); 4551 } 4552 } 4553 4554 /** 4555 * cik_vm_set_page - update the page tables using sDMA 4556 * 4557 * @rdev: radeon_device pointer 4558 * @ib: indirect buffer to fill with commands 4559 * @pe: addr of the page entry 4560 * @addr: dst addr to write into pe 4561 * @count: number of page entries to update 4562 * @incr: increase next addr by incr bytes 4563 * @flags: access flags 4564 * 4565 * Update the page tables using CP or sDMA (CIK). 4566 */ 4567 void cik_vm_set_page(struct radeon_device *rdev, 4568 struct radeon_ib *ib, 4569 uint64_t pe, 4570 uint64_t addr, unsigned count, 4571 uint32_t incr, uint32_t flags) 4572 { 4573 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 4574 uint64_t value; 4575 unsigned ndw; 4576 4577 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) { 4578 /* CP */ 4579 while (count) { 4580 ndw = 2 + count * 2; 4581 if (ndw > 0x3FFE) 4582 ndw = 0x3FFE; 4583 4584 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); 4585 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | 4586 WRITE_DATA_DST_SEL(1)); 4587 ib->ptr[ib->length_dw++] = pe; 4588 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4589 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 4590 if (flags & RADEON_VM_PAGE_SYSTEM) { 4591 value = radeon_vm_map_gart(rdev, addr); 4592 value &= 0xFFFFFFFFFFFFF000ULL; 4593 } else if (flags & RADEON_VM_PAGE_VALID) { 4594 value = addr; 4595 } else { 4596 value = 0; 4597 } 4598 addr += incr; 4599 value |= r600_flags; 4600 ib->ptr[ib->length_dw++] = value; 4601 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4602 } 4603 } 4604 } else { 4605 /* DMA */ 4606 if (flags & RADEON_VM_PAGE_SYSTEM) { 4607 while (count) { 4608 ndw = count * 2; 4609 if (ndw > 0xFFFFE) 4610 ndw = 0xFFFFE; 4611 4612 /* for non-physically contiguous pages (system) */ 4613 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 4614 ib->ptr[ib->length_dw++] = pe; 4615 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4616 ib->ptr[ib->length_dw++] = ndw; 4617 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 4618 if (flags & RADEON_VM_PAGE_SYSTEM) { 4619 value = radeon_vm_map_gart(rdev, addr); 4620 value &= 0xFFFFFFFFFFFFF000ULL; 4621 } else if (flags & RADEON_VM_PAGE_VALID) { 4622 value = addr; 4623 } else { 4624 value = 0; 4625 } 4626 addr += incr; 4627 value |= r600_flags; 4628 ib->ptr[ib->length_dw++] = value; 4629 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4630 } 4631 } 4632 } else { 4633 while (count) { 4634 ndw = count; 4635 if (ndw > 0x7FFFF) 4636 ndw = 0x7FFFF; 4637 4638 if (flags & RADEON_VM_PAGE_VALID) 4639 value = addr; 4640 else 4641 value = 0; 4642 /* for physically contiguous pages (vram) */ 4643 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); 4644 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 4645 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 4646 ib->ptr[ib->length_dw++] = r600_flags; /* mask */ 4647 ib->ptr[ib->length_dw++] = 0; 4648 ib->ptr[ib->length_dw++] = value; /* value */ 4649 ib->ptr[ib->length_dw++] = upper_32_bits(value); 4650 ib->ptr[ib->length_dw++] = incr; /* increment size */ 4651 ib->ptr[ib->length_dw++] = 0; 4652 ib->ptr[ib->length_dw++] = ndw; /* number of entries */ 4653 pe += ndw * 8; 4654 addr += ndw * incr; 4655 count -= ndw; 4656 } 4657 } 4658 while (ib->length_dw & 0x7) 4659 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 4660 } 4661 } 4662 4663 /** 4664 * cik_dma_vm_flush - cik vm flush using sDMA 4665 * 4666 * @rdev: radeon_device pointer 4667 * 4668 * Update the page table base and flush the VM TLB 4669 * using sDMA (CIK). 4670 */ 4671 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) 4672 { 4673 struct radeon_ring *ring = &rdev->ring[ridx]; 4674 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) | 4675 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 4676 u32 ref_and_mask; 4677 4678 if (vm == NULL) 4679 return; 4680 4681 if (ridx == R600_RING_TYPE_DMA_INDEX) 4682 ref_and_mask = SDMA0; 4683 else 4684 ref_and_mask = SDMA1; 4685 4686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4687 if (vm->id < 8) { 4688 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); 4689 } else { 4690 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); 4691 } 4692 radeon_ring_write(ring, vm->pd_gpu_addr >> 12); 4693 4694 /* update SH_MEM_* regs */ 4695 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4696 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4697 radeon_ring_write(ring, VMID(vm->id)); 4698 4699 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4700 radeon_ring_write(ring, SH_MEM_BASES >> 2); 4701 radeon_ring_write(ring, 0); 4702 4703 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4704 radeon_ring_write(ring, SH_MEM_CONFIG >> 2); 4705 radeon_ring_write(ring, 0); 4706 4707 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4708 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2); 4709 radeon_ring_write(ring, 1); 4710 4711 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4712 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2); 4713 radeon_ring_write(ring, 0); 4714 4715 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4716 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); 4717 radeon_ring_write(ring, VMID(0)); 4718 4719 /* flush HDP */ 4720 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 4721 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE); 4722 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ); 4723 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */ 4724 radeon_ring_write(ring, ref_and_mask); /* MASK */ 4725 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */ 4726 4727 /* flush TLB */ 4728 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 4729 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); 4730 radeon_ring_write(ring, 1 << vm->id); 4731 } 4732 4733 /* 4734 * RLC 4735 * The RLC is a multi-purpose microengine that handles a 4736 * variety of functions, the most important of which is 4737 * the interrupt controller. 4738 */ 4739 /** 4740 * cik_rlc_stop - stop the RLC ME 4741 * 4742 * @rdev: radeon_device pointer 4743 * 4744 * Halt the RLC ME (MicroEngine) (CIK). 4745 */ 4746 static void cik_rlc_stop(struct radeon_device *rdev) 4747 { 4748 int i, j, k; 4749 u32 mask, tmp; 4750 4751 tmp = RREG32(CP_INT_CNTL_RING0); 4752 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4753 WREG32(CP_INT_CNTL_RING0, tmp); 4754 4755 RREG32(CB_CGTT_SCLK_CTRL); 4756 RREG32(CB_CGTT_SCLK_CTRL); 4757 RREG32(CB_CGTT_SCLK_CTRL); 4758 RREG32(CB_CGTT_SCLK_CTRL); 4759 4760 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc; 4761 WREG32(RLC_CGCG_CGLS_CTRL, tmp); 4762 4763 WREG32(RLC_CNTL, 0); 4764 4765 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { 4766 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { 4767 cik_select_se_sh(rdev, i, j); 4768 for (k = 0; k < rdev->usec_timeout; k++) { 4769 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0) 4770 break; 4771 udelay(1); 4772 } 4773 } 4774 } 4775 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4776 4777 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; 4778 for (k = 0; k < rdev->usec_timeout; k++) { 4779 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 4780 break; 4781 udelay(1); 4782 } 4783 } 4784 4785 /** 4786 * cik_rlc_start - start the RLC ME 4787 * 4788 * @rdev: radeon_device pointer 4789 * 4790 * Unhalt the RLC ME (MicroEngine) (CIK). 4791 */ 4792 static void cik_rlc_start(struct radeon_device *rdev) 4793 { 4794 u32 tmp; 4795 4796 WREG32(RLC_CNTL, RLC_ENABLE); 4797 4798 tmp = RREG32(CP_INT_CNTL_RING0); 4799 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4800 WREG32(CP_INT_CNTL_RING0, tmp); 4801 4802 udelay(50); 4803 } 4804 4805 /** 4806 * cik_rlc_resume - setup the RLC hw 4807 * 4808 * @rdev: radeon_device pointer 4809 * 4810 * Initialize the RLC registers, load the ucode, 4811 * and start the RLC (CIK). 4812 * Returns 0 for success, -EINVAL if the ucode is not available. 4813 */ 4814 static __unused int cik_rlc_resume(struct radeon_device *rdev) 4815 { 4816 u32 i, size; 4817 u32 clear_state_info[3]; 4818 const __be32 *fw_data; 4819 4820 if (!rdev->rlc_fw) 4821 return -EINVAL; 4822 4823 switch (rdev->family) { 4824 case CHIP_BONAIRE: 4825 default: 4826 size = BONAIRE_RLC_UCODE_SIZE; 4827 break; 4828 case CHIP_KAVERI: 4829 size = KV_RLC_UCODE_SIZE; 4830 break; 4831 case CHIP_KABINI: 4832 size = KB_RLC_UCODE_SIZE; 4833 break; 4834 } 4835 4836 cik_rlc_stop(rdev); 4837 4838 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC); 4839 RREG32(GRBM_SOFT_RESET); 4840 udelay(50); 4841 WREG32(GRBM_SOFT_RESET, 0); 4842 RREG32(GRBM_SOFT_RESET); 4843 udelay(50); 4844 4845 WREG32(RLC_LB_CNTR_INIT, 0); 4846 WREG32(RLC_LB_CNTR_MAX, 0x00008000); 4847 4848 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); 4849 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); 4850 WREG32(RLC_LB_PARAMS, 0x00600408); 4851 WREG32(RLC_LB_CNTL, 0x80000004); 4852 4853 WREG32(RLC_MC_CNTL, 0); 4854 WREG32(RLC_UCODE_CNTL, 0); 4855 4856 fw_data = (const __be32 *)rdev->rlc_fw->data; 4857 WREG32(RLC_GPM_UCODE_ADDR, 0); 4858 for (i = 0; i < size; i++) 4859 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++)); 4860 WREG32(RLC_GPM_UCODE_ADDR, 0); 4861 4862 /* XXX */ 4863 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr); 4864 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr; 4865 clear_state_info[2] = 0;//cik_default_size; 4866 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d); 4867 for (i = 0; i < 3; i++) 4868 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]); 4869 WREG32(RLC_DRIVER_DMA_STATUS, 0); 4870 4871 cik_rlc_start(rdev); 4872 4873 return 0; 4874 } 4875 4876 /* 4877 * Interrupts 4878 * Starting with r6xx, interrupts are handled via a ring buffer. 4879 * Ring buffers are areas of GPU accessible memory that the GPU 4880 * writes interrupt vectors into and the host reads vectors out of. 4881 * There is a rptr (read pointer) that determines where the 4882 * host is currently reading, and a wptr (write pointer) 4883 * which determines where the GPU has written. When the 4884 * pointers are equal, the ring is idle. When the GPU 4885 * writes vectors to the ring buffer, it increments the 4886 * wptr. When there is an interrupt, the host then starts 4887 * fetching commands and processing them until the pointers are 4888 * equal again at which point it updates the rptr. 4889 */ 4890 4891 /** 4892 * cik_enable_interrupts - Enable the interrupt ring buffer 4893 * 4894 * @rdev: radeon_device pointer 4895 * 4896 * Enable the interrupt ring buffer (CIK). 4897 */ 4898 static void cik_enable_interrupts(struct radeon_device *rdev) 4899 { 4900 u32 ih_cntl = RREG32(IH_CNTL); 4901 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 4902 4903 ih_cntl |= ENABLE_INTR; 4904 ih_rb_cntl |= IH_RB_ENABLE; 4905 WREG32(IH_CNTL, ih_cntl); 4906 WREG32(IH_RB_CNTL, ih_rb_cntl); 4907 rdev->ih.enabled = true; 4908 } 4909 4910 /** 4911 * cik_disable_interrupts - Disable the interrupt ring buffer 4912 * 4913 * @rdev: radeon_device pointer 4914 * 4915 * Disable the interrupt ring buffer (CIK). 4916 */ 4917 static void cik_disable_interrupts(struct radeon_device *rdev) 4918 { 4919 u32 ih_rb_cntl = RREG32(IH_RB_CNTL); 4920 u32 ih_cntl = RREG32(IH_CNTL); 4921 4922 ih_rb_cntl &= ~IH_RB_ENABLE; 4923 ih_cntl &= ~ENABLE_INTR; 4924 WREG32(IH_RB_CNTL, ih_rb_cntl); 4925 WREG32(IH_CNTL, ih_cntl); 4926 /* set rptr, wptr to 0 */ 4927 WREG32(IH_RB_RPTR, 0); 4928 WREG32(IH_RB_WPTR, 0); 4929 rdev->ih.enabled = false; 4930 rdev->ih.rptr = 0; 4931 } 4932 4933 /** 4934 * cik_disable_interrupt_state - Disable all interrupt sources 4935 * 4936 * @rdev: radeon_device pointer 4937 * 4938 * Clear all interrupt enable bits used by the driver (CIK). 4939 */ 4940 static void cik_disable_interrupt_state(struct radeon_device *rdev) 4941 { 4942 u32 tmp; 4943 4944 /* gfx ring */ 4945 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 4946 /* sdma */ 4947 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 4948 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); 4949 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 4950 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp); 4951 /* compute queues */ 4952 WREG32(CP_ME1_PIPE0_INT_CNTL, 0); 4953 WREG32(CP_ME1_PIPE1_INT_CNTL, 0); 4954 WREG32(CP_ME1_PIPE2_INT_CNTL, 0); 4955 WREG32(CP_ME1_PIPE3_INT_CNTL, 0); 4956 WREG32(CP_ME2_PIPE0_INT_CNTL, 0); 4957 WREG32(CP_ME2_PIPE1_INT_CNTL, 0); 4958 WREG32(CP_ME2_PIPE2_INT_CNTL, 0); 4959 WREG32(CP_ME2_PIPE3_INT_CNTL, 0); 4960 /* grbm */ 4961 WREG32(GRBM_INT_CNTL, 0); 4962 /* vline/vblank, etc. */ 4963 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 4964 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 4965 if (rdev->num_crtc >= 4) { 4966 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); 4967 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); 4968 } 4969 if (rdev->num_crtc >= 6) { 4970 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); 4971 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); 4972 } 4973 4974 /* dac hotplug */ 4975 WREG32(DAC_AUTODETECT_INT_CONTROL, 0); 4976 4977 /* digital hotplug */ 4978 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4979 WREG32(DC_HPD1_INT_CONTROL, tmp); 4980 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4981 WREG32(DC_HPD2_INT_CONTROL, tmp); 4982 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4983 WREG32(DC_HPD3_INT_CONTROL, tmp); 4984 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4985 WREG32(DC_HPD4_INT_CONTROL, tmp); 4986 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4987 WREG32(DC_HPD5_INT_CONTROL, tmp); 4988 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY; 4989 WREG32(DC_HPD6_INT_CONTROL, tmp); 4990 4991 } 4992 4993 /** 4994 * cik_irq_init - init and enable the interrupt ring 4995 * 4996 * @rdev: radeon_device pointer 4997 * 4998 * Allocate a ring buffer for the interrupt controller, 4999 * enable the RLC, disable interrupts, enable the IH 5000 * ring buffer and enable it (CIK). 5001 * Called at device load and reume. 5002 * Returns 0 for success, errors for failure. 5003 */ 5004 static __unused int cik_irq_init(struct radeon_device *rdev) 5005 { 5006 int ret = 0; 5007 int rb_bufsz; 5008 u32 interrupt_cntl, ih_cntl, ih_rb_cntl; 5009 5010 /* allocate ring */ 5011 ret = r600_ih_ring_alloc(rdev); 5012 if (ret) 5013 return ret; 5014 5015 /* disable irqs */ 5016 cik_disable_interrupts(rdev); 5017 5018 /* init rlc */ 5019 ret = cik_rlc_resume(rdev); 5020 if (ret) { 5021 r600_ih_ring_fini(rdev); 5022 return ret; 5023 } 5024 5025 /* setup interrupt control */ 5026 /* XXX this should actually be a bus address, not an MC address. same on older asics */ 5027 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); 5028 interrupt_cntl = RREG32(INTERRUPT_CNTL); 5029 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi 5030 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN 5031 */ 5032 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; 5033 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */ 5034 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; 5035 WREG32(INTERRUPT_CNTL, interrupt_cntl); 5036 5037 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8); 5038 rb_bufsz = drm_order(rdev->ih.ring_size / 4); 5039 5040 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE | 5041 IH_WPTR_OVERFLOW_CLEAR | 5042 (rb_bufsz << 1)); 5043 5044 if (rdev->wb.enabled) 5045 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE; 5046 5047 /* set the writeback address whether it's enabled or not */ 5048 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC); 5049 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF); 5050 5051 WREG32(IH_RB_CNTL, ih_rb_cntl); 5052 5053 /* set rptr, wptr to 0 */ 5054 WREG32(IH_RB_RPTR, 0); 5055 WREG32(IH_RB_WPTR, 0); 5056 5057 /* Default settings for IH_CNTL (disabled at first) */ 5058 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0); 5059 /* RPTR_REARM only works if msi's are enabled */ 5060 if (rdev->msi_enabled) 5061 ih_cntl |= RPTR_REARM; 5062 WREG32(IH_CNTL, ih_cntl); 5063 5064 /* force the active interrupt state to all disabled */ 5065 cik_disable_interrupt_state(rdev); 5066 5067 pci_enable_busmaster(rdev->dev); 5068 5069 /* enable irqs */ 5070 cik_enable_interrupts(rdev); 5071 5072 return ret; 5073 } 5074 5075 /** 5076 * cik_irq_set - enable/disable interrupt sources 5077 * 5078 * @rdev: radeon_device pointer 5079 * 5080 * Enable interrupt sources on the GPU (vblanks, hpd, 5081 * etc.) (CIK). 5082 * Returns 0 for success, errors for failure. 5083 */ 5084 int cik_irq_set(struct radeon_device *rdev) 5085 { 5086 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE | 5087 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; 5088 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; 5089 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; 5090 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; 5091 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 5092 u32 grbm_int_cntl = 0; 5093 u32 dma_cntl, dma_cntl1; 5094 5095 if (!rdev->irq.installed) { 5096 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 5097 return -EINVAL; 5098 } 5099 /* don't enable anything if the ih is disabled */ 5100 if (!rdev->ih.enabled) { 5101 cik_disable_interrupts(rdev); 5102 /* force the active interrupt state to all disabled */ 5103 cik_disable_interrupt_state(rdev); 5104 return 0; 5105 } 5106 5107 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; 5108 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; 5109 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; 5110 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN; 5111 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 5112 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 5113 5114 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; 5115 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; 5116 5117 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5118 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5119 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5120 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5121 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5122 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5123 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5124 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; 5125 5126 /* enable CP interrupts on all rings */ 5127 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 5128 DRM_DEBUG("cik_irq_set: sw int gfx\n"); 5129 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 5130 } 5131 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { 5132 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 5133 DRM_DEBUG("si_irq_set: sw int cp1\n"); 5134 if (ring->me == 1) { 5135 switch (ring->pipe) { 5136 case 0: 5137 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 5138 break; 5139 case 1: 5140 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 5141 break; 5142 case 2: 5143 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5144 break; 5145 case 3: 5146 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5147 break; 5148 default: 5149 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 5150 break; 5151 } 5152 } else if (ring->me == 2) { 5153 switch (ring->pipe) { 5154 case 0: 5155 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 5156 break; 5157 case 1: 5158 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 5159 break; 5160 case 2: 5161 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5162 break; 5163 case 3: 5164 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5165 break; 5166 default: 5167 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); 5168 break; 5169 } 5170 } else { 5171 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me); 5172 } 5173 } 5174 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { 5175 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 5176 DRM_DEBUG("si_irq_set: sw int cp2\n"); 5177 if (ring->me == 1) { 5178 switch (ring->pipe) { 5179 case 0: 5180 cp_m1p0 |= TIME_STAMP_INT_ENABLE; 5181 break; 5182 case 1: 5183 cp_m1p1 |= TIME_STAMP_INT_ENABLE; 5184 break; 5185 case 2: 5186 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5187 break; 5188 case 3: 5189 cp_m1p2 |= TIME_STAMP_INT_ENABLE; 5190 break; 5191 default: 5192 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 5193 break; 5194 } 5195 } else if (ring->me == 2) { 5196 switch (ring->pipe) { 5197 case 0: 5198 cp_m2p0 |= TIME_STAMP_INT_ENABLE; 5199 break; 5200 case 1: 5201 cp_m2p1 |= TIME_STAMP_INT_ENABLE; 5202 break; 5203 case 2: 5204 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5205 break; 5206 case 3: 5207 cp_m2p2 |= TIME_STAMP_INT_ENABLE; 5208 break; 5209 default: 5210 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); 5211 break; 5212 } 5213 } else { 5214 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me); 5215 } 5216 } 5217 5218 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) { 5219 DRM_DEBUG("cik_irq_set: sw int dma\n"); 5220 dma_cntl |= TRAP_ENABLE; 5221 } 5222 5223 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) { 5224 DRM_DEBUG("cik_irq_set: sw int dma1\n"); 5225 dma_cntl1 |= TRAP_ENABLE; 5226 } 5227 5228 if (rdev->irq.crtc_vblank_int[0] || 5229 atomic_read(&rdev->irq.pflip[0])) { 5230 DRM_DEBUG("cik_irq_set: vblank 0\n"); 5231 crtc1 |= VBLANK_INTERRUPT_MASK; 5232 } 5233 if (rdev->irq.crtc_vblank_int[1] || 5234 atomic_read(&rdev->irq.pflip[1])) { 5235 DRM_DEBUG("cik_irq_set: vblank 1\n"); 5236 crtc2 |= VBLANK_INTERRUPT_MASK; 5237 } 5238 if (rdev->irq.crtc_vblank_int[2] || 5239 atomic_read(&rdev->irq.pflip[2])) { 5240 DRM_DEBUG("cik_irq_set: vblank 2\n"); 5241 crtc3 |= VBLANK_INTERRUPT_MASK; 5242 } 5243 if (rdev->irq.crtc_vblank_int[3] || 5244 atomic_read(&rdev->irq.pflip[3])) { 5245 DRM_DEBUG("cik_irq_set: vblank 3\n"); 5246 crtc4 |= VBLANK_INTERRUPT_MASK; 5247 } 5248 if (rdev->irq.crtc_vblank_int[4] || 5249 atomic_read(&rdev->irq.pflip[4])) { 5250 DRM_DEBUG("cik_irq_set: vblank 4\n"); 5251 crtc5 |= VBLANK_INTERRUPT_MASK; 5252 } 5253 if (rdev->irq.crtc_vblank_int[5] || 5254 atomic_read(&rdev->irq.pflip[5])) { 5255 DRM_DEBUG("cik_irq_set: vblank 5\n"); 5256 crtc6 |= VBLANK_INTERRUPT_MASK; 5257 } 5258 if (rdev->irq.hpd[0]) { 5259 DRM_DEBUG("cik_irq_set: hpd 1\n"); 5260 hpd1 |= DC_HPDx_INT_EN; 5261 } 5262 if (rdev->irq.hpd[1]) { 5263 DRM_DEBUG("cik_irq_set: hpd 2\n"); 5264 hpd2 |= DC_HPDx_INT_EN; 5265 } 5266 if (rdev->irq.hpd[2]) { 5267 DRM_DEBUG("cik_irq_set: hpd 3\n"); 5268 hpd3 |= DC_HPDx_INT_EN; 5269 } 5270 if (rdev->irq.hpd[3]) { 5271 DRM_DEBUG("cik_irq_set: hpd 4\n"); 5272 hpd4 |= DC_HPDx_INT_EN; 5273 } 5274 if (rdev->irq.hpd[4]) { 5275 DRM_DEBUG("cik_irq_set: hpd 5\n"); 5276 hpd5 |= DC_HPDx_INT_EN; 5277 } 5278 if (rdev->irq.hpd[5]) { 5279 DRM_DEBUG("cik_irq_set: hpd 6\n"); 5280 hpd6 |= DC_HPDx_INT_EN; 5281 } 5282 5283 WREG32(CP_INT_CNTL_RING0, cp_int_cntl); 5284 5285 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl); 5286 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); 5287 5288 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); 5289 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); 5290 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); 5291 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); 5292 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); 5293 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); 5294 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); 5295 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); 5296 5297 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 5298 5299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 5300 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); 5301 if (rdev->num_crtc >= 4) { 5302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3); 5303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4); 5304 } 5305 if (rdev->num_crtc >= 6) { 5306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5); 5307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); 5308 } 5309 5310 WREG32(DC_HPD1_INT_CONTROL, hpd1); 5311 WREG32(DC_HPD2_INT_CONTROL, hpd2); 5312 WREG32(DC_HPD3_INT_CONTROL, hpd3); 5313 WREG32(DC_HPD4_INT_CONTROL, hpd4); 5314 WREG32(DC_HPD5_INT_CONTROL, hpd5); 5315 WREG32(DC_HPD6_INT_CONTROL, hpd6); 5316 5317 return 0; 5318 } 5319 5320 /** 5321 * cik_irq_ack - ack interrupt sources 5322 * 5323 * @rdev: radeon_device pointer 5324 * 5325 * Ack interrupt sources on the GPU (vblanks, hpd, 5326 * etc.) (CIK). Certain interrupts sources are sw 5327 * generated and do not require an explicit ack. 5328 */ 5329 static inline void cik_irq_ack(struct radeon_device *rdev) 5330 { 5331 u32 tmp; 5332 5333 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS); 5334 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); 5335 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); 5336 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); 5337 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); 5338 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); 5339 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); 5340 5341 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) 5342 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); 5343 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) 5344 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); 5345 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) 5346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); 5347 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) 5348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); 5349 5350 if (rdev->num_crtc >= 4) { 5351 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) 5352 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); 5353 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) 5354 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); 5355 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) 5356 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); 5357 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) 5358 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); 5359 } 5360 5361 if (rdev->num_crtc >= 6) { 5362 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) 5363 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); 5364 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) 5365 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); 5366 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) 5367 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); 5368 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) 5369 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); 5370 } 5371 5372 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 5373 tmp = RREG32(DC_HPD1_INT_CONTROL); 5374 tmp |= DC_HPDx_INT_ACK; 5375 WREG32(DC_HPD1_INT_CONTROL, tmp); 5376 } 5377 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 5378 tmp = RREG32(DC_HPD2_INT_CONTROL); 5379 tmp |= DC_HPDx_INT_ACK; 5380 WREG32(DC_HPD2_INT_CONTROL, tmp); 5381 } 5382 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 5383 tmp = RREG32(DC_HPD3_INT_CONTROL); 5384 tmp |= DC_HPDx_INT_ACK; 5385 WREG32(DC_HPD3_INT_CONTROL, tmp); 5386 } 5387 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 5388 tmp = RREG32(DC_HPD4_INT_CONTROL); 5389 tmp |= DC_HPDx_INT_ACK; 5390 WREG32(DC_HPD4_INT_CONTROL, tmp); 5391 } 5392 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 5393 tmp = RREG32(DC_HPD5_INT_CONTROL); 5394 tmp |= DC_HPDx_INT_ACK; 5395 WREG32(DC_HPD5_INT_CONTROL, tmp); 5396 } 5397 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 5398 tmp = RREG32(DC_HPD5_INT_CONTROL); 5399 tmp |= DC_HPDx_INT_ACK; 5400 WREG32(DC_HPD6_INT_CONTROL, tmp); 5401 } 5402 } 5403 5404 /** 5405 * cik_irq_disable - disable interrupts 5406 * 5407 * @rdev: radeon_device pointer 5408 * 5409 * Disable interrupts on the hw (CIK). 5410 */ 5411 static void cik_irq_disable(struct radeon_device *rdev) 5412 { 5413 cik_disable_interrupts(rdev); 5414 /* Wait and acknowledge irq */ 5415 mdelay(1); 5416 cik_irq_ack(rdev); 5417 cik_disable_interrupt_state(rdev); 5418 } 5419 5420 /** 5421 * cik_irq_disable - disable interrupts for suspend 5422 * 5423 * @rdev: radeon_device pointer 5424 * 5425 * Disable interrupts and stop the RLC (CIK). 5426 * Used for suspend. 5427 */ 5428 static void cik_irq_suspend(struct radeon_device *rdev) 5429 { 5430 cik_irq_disable(rdev); 5431 cik_rlc_stop(rdev); 5432 } 5433 5434 /** 5435 * cik_irq_fini - tear down interrupt support 5436 * 5437 * @rdev: radeon_device pointer 5438 * 5439 * Disable interrupts on the hw and free the IH ring 5440 * buffer (CIK). 5441 * Used for driver unload. 5442 */ 5443 static __unused void cik_irq_fini(struct radeon_device *rdev) 5444 { 5445 cik_irq_suspend(rdev); 5446 r600_ih_ring_fini(rdev); 5447 } 5448 5449 /** 5450 * cik_get_ih_wptr - get the IH ring buffer wptr 5451 * 5452 * @rdev: radeon_device pointer 5453 * 5454 * Get the IH ring buffer wptr from either the register 5455 * or the writeback memory buffer (CIK). Also check for 5456 * ring buffer overflow and deal with it. 5457 * Used by cik_irq_process(). 5458 * Returns the value of the wptr. 5459 */ 5460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) 5461 { 5462 u32 wptr, tmp; 5463 5464 if (rdev->wb.enabled) 5465 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); 5466 else 5467 wptr = RREG32(IH_RB_WPTR); 5468 5469 if (wptr & RB_OVERFLOW) { 5470 /* When a ring buffer overflow happen start parsing interrupt 5471 * from the last not overwritten vector (wptr + 16). Hopefully 5472 * this should allow us to catchup. 5473 */ 5474 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", 5475 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); 5476 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; 5477 tmp = RREG32(IH_RB_CNTL); 5478 tmp |= IH_WPTR_OVERFLOW_CLEAR; 5479 WREG32(IH_RB_CNTL, tmp); 5480 } 5481 return (wptr & rdev->ih.ptr_mask); 5482 } 5483 5484 /* CIK IV Ring 5485 * Each IV ring entry is 128 bits: 5486 * [7:0] - interrupt source id 5487 * [31:8] - reserved 5488 * [59:32] - interrupt source data 5489 * [63:60] - reserved 5490 * [71:64] - RINGID 5491 * CP: 5492 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0] 5493 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher 5494 * - for gfx, hw shader state (0=PS...5=LS, 6=CS) 5495 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes 5496 * PIPE_ID - ME0 0=3D 5497 * - ME1&2 compute dispatcher (4 pipes each) 5498 * SDMA: 5499 * INSTANCE_ID [1:0], QUEUE_ID[1:0] 5500 * INSTANCE_ID - 0 = sdma0, 1 = sdma1 5501 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1 5502 * [79:72] - VMID 5503 * [95:80] - PASID 5504 * [127:96] - reserved 5505 */ 5506 /** 5507 * cik_irq_process - interrupt handler 5508 * 5509 * @rdev: radeon_device pointer 5510 * 5511 * Interrupt hander (CIK). Walk the IH ring, 5512 * ack interrupts and schedule work to handle 5513 * interrupt events. 5514 * Returns irq process return code. 5515 */ 5516 irqreturn_t cik_irq_process(struct radeon_device *rdev) 5517 { 5518 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 5519 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 5520 u32 wptr; 5521 u32 rptr; 5522 u32 src_id, src_data, ring_id; 5523 u8 me_id, pipe_id, queue_id; 5524 u32 ring_index; 5525 bool queue_hotplug = false; 5526 bool queue_reset = false; 5527 u32 addr, status, mc_client; 5528 5529 if (!rdev->ih.enabled || rdev->shutdown) 5530 return IRQ_NONE; 5531 5532 wptr = cik_get_ih_wptr(rdev); 5533 5534 restart_ih: 5535 /* is somebody else already processing irqs? */ 5536 if (atomic_xchg(&rdev->ih.lock, 1)) 5537 return IRQ_NONE; 5538 5539 rptr = rdev->ih.rptr; 5540 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr); 5541 5542 /* Order reading of wptr vs. reading of IH ring data */ 5543 rmb(); 5544 5545 /* display interrupts */ 5546 cik_irq_ack(rdev); 5547 5548 while (rptr != wptr) { 5549 /* wptr/rptr are in bytes! */ 5550 ring_index = rptr / 4; 5551 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; 5552 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; 5553 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; 5554 5555 switch (src_id) { 5556 case 1: /* D1 vblank/vline */ 5557 switch (src_data) { 5558 case 0: /* D1 vblank */ 5559 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { 5560 if (rdev->irq.crtc_vblank_int[0]) { 5561 drm_handle_vblank(rdev->ddev, 0); 5562 rdev->pm.vblank_sync = true; 5563 wake_up(&rdev->irq.vblank_queue); 5564 } 5565 if (atomic_read(&rdev->irq.pflip[0])) 5566 radeon_crtc_handle_flip(rdev, 0); 5567 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; 5568 DRM_DEBUG("IH: D1 vblank\n"); 5569 } 5570 break; 5571 case 1: /* D1 vline */ 5572 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { 5573 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; 5574 DRM_DEBUG("IH: D1 vline\n"); 5575 } 5576 break; 5577 default: 5578 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5579 break; 5580 } 5581 break; 5582 case 2: /* D2 vblank/vline */ 5583 switch (src_data) { 5584 case 0: /* D2 vblank */ 5585 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { 5586 if (rdev->irq.crtc_vblank_int[1]) { 5587 drm_handle_vblank(rdev->ddev, 1); 5588 rdev->pm.vblank_sync = true; 5589 wake_up(&rdev->irq.vblank_queue); 5590 } 5591 if (atomic_read(&rdev->irq.pflip[1])) 5592 radeon_crtc_handle_flip(rdev, 1); 5593 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; 5594 DRM_DEBUG("IH: D2 vblank\n"); 5595 } 5596 break; 5597 case 1: /* D2 vline */ 5598 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { 5599 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; 5600 DRM_DEBUG("IH: D2 vline\n"); 5601 } 5602 break; 5603 default: 5604 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5605 break; 5606 } 5607 break; 5608 case 3: /* D3 vblank/vline */ 5609 switch (src_data) { 5610 case 0: /* D3 vblank */ 5611 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { 5612 if (rdev->irq.crtc_vblank_int[2]) { 5613 drm_handle_vblank(rdev->ddev, 2); 5614 rdev->pm.vblank_sync = true; 5615 wake_up(&rdev->irq.vblank_queue); 5616 } 5617 if (atomic_read(&rdev->irq.pflip[2])) 5618 radeon_crtc_handle_flip(rdev, 2); 5619 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; 5620 DRM_DEBUG("IH: D3 vblank\n"); 5621 } 5622 break; 5623 case 1: /* D3 vline */ 5624 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { 5625 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; 5626 DRM_DEBUG("IH: D3 vline\n"); 5627 } 5628 break; 5629 default: 5630 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5631 break; 5632 } 5633 break; 5634 case 4: /* D4 vblank/vline */ 5635 switch (src_data) { 5636 case 0: /* D4 vblank */ 5637 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { 5638 if (rdev->irq.crtc_vblank_int[3]) { 5639 drm_handle_vblank(rdev->ddev, 3); 5640 rdev->pm.vblank_sync = true; 5641 wake_up(&rdev->irq.vblank_queue); 5642 } 5643 if (atomic_read(&rdev->irq.pflip[3])) 5644 radeon_crtc_handle_flip(rdev, 3); 5645 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; 5646 DRM_DEBUG("IH: D4 vblank\n"); 5647 } 5648 break; 5649 case 1: /* D4 vline */ 5650 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { 5651 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; 5652 DRM_DEBUG("IH: D4 vline\n"); 5653 } 5654 break; 5655 default: 5656 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5657 break; 5658 } 5659 break; 5660 case 5: /* D5 vblank/vline */ 5661 switch (src_data) { 5662 case 0: /* D5 vblank */ 5663 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { 5664 if (rdev->irq.crtc_vblank_int[4]) { 5665 drm_handle_vblank(rdev->ddev, 4); 5666 rdev->pm.vblank_sync = true; 5667 wake_up(&rdev->irq.vblank_queue); 5668 } 5669 if (atomic_read(&rdev->irq.pflip[4])) 5670 radeon_crtc_handle_flip(rdev, 4); 5671 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; 5672 DRM_DEBUG("IH: D5 vblank\n"); 5673 } 5674 break; 5675 case 1: /* D5 vline */ 5676 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { 5677 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; 5678 DRM_DEBUG("IH: D5 vline\n"); 5679 } 5680 break; 5681 default: 5682 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5683 break; 5684 } 5685 break; 5686 case 6: /* D6 vblank/vline */ 5687 switch (src_data) { 5688 case 0: /* D6 vblank */ 5689 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { 5690 if (rdev->irq.crtc_vblank_int[5]) { 5691 drm_handle_vblank(rdev->ddev, 5); 5692 rdev->pm.vblank_sync = true; 5693 wake_up(&rdev->irq.vblank_queue); 5694 } 5695 if (atomic_read(&rdev->irq.pflip[5])) 5696 radeon_crtc_handle_flip(rdev, 5); 5697 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; 5698 DRM_DEBUG("IH: D6 vblank\n"); 5699 } 5700 break; 5701 case 1: /* D6 vline */ 5702 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { 5703 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; 5704 DRM_DEBUG("IH: D6 vline\n"); 5705 } 5706 break; 5707 default: 5708 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5709 break; 5710 } 5711 break; 5712 case 42: /* HPD hotplug */ 5713 switch (src_data) { 5714 case 0: 5715 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { 5716 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; 5717 queue_hotplug = true; 5718 DRM_DEBUG("IH: HPD1\n"); 5719 } 5720 break; 5721 case 1: 5722 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { 5723 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; 5724 queue_hotplug = true; 5725 DRM_DEBUG("IH: HPD2\n"); 5726 } 5727 break; 5728 case 2: 5729 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { 5730 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; 5731 queue_hotplug = true; 5732 DRM_DEBUG("IH: HPD3\n"); 5733 } 5734 break; 5735 case 3: 5736 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { 5737 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; 5738 queue_hotplug = true; 5739 DRM_DEBUG("IH: HPD4\n"); 5740 } 5741 break; 5742 case 4: 5743 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { 5744 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; 5745 queue_hotplug = true; 5746 DRM_DEBUG("IH: HPD5\n"); 5747 } 5748 break; 5749 case 5: 5750 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { 5751 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; 5752 queue_hotplug = true; 5753 DRM_DEBUG("IH: HPD6\n"); 5754 } 5755 break; 5756 default: 5757 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5758 break; 5759 } 5760 break; 5761 case 146: 5762 case 147: 5763 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR); 5764 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS); 5765 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); 5766 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); 5767 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 5768 addr); 5769 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 5770 status); 5771 cik_vm_decode_fault(rdev, status, addr, mc_client); 5772 /* reset addr and status */ 5773 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 5774 break; 5775 case 176: /* GFX RB CP_INT */ 5776 case 177: /* GFX IB CP_INT */ 5777 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 5778 break; 5779 case 181: /* CP EOP event */ 5780 DRM_DEBUG("IH: CP EOP\n"); 5781 /* XXX check the bitfield order! */ 5782 me_id = (ring_id & 0x60) >> 5; 5783 pipe_id = (ring_id & 0x18) >> 3; 5784 queue_id = (ring_id & 0x7) >> 0; 5785 switch (me_id) { 5786 case 0: 5787 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 5788 break; 5789 case 1: 5790 case 2: 5791 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id)) 5792 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 5793 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id)) 5794 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 5795 break; 5796 } 5797 break; 5798 case 184: /* CP Privileged reg access */ 5799 DRM_ERROR("Illegal register access in command stream\n"); 5800 /* XXX check the bitfield order! */ 5801 me_id = (ring_id & 0x60) >> 5; 5802 pipe_id = (ring_id & 0x18) >> 3; 5803 queue_id = (ring_id & 0x7) >> 0; 5804 switch (me_id) { 5805 case 0: 5806 /* This results in a full GPU reset, but all we need to do is soft 5807 * reset the CP for gfx 5808 */ 5809 queue_reset = true; 5810 break; 5811 case 1: 5812 /* XXX compute */ 5813 queue_reset = true; 5814 break; 5815 case 2: 5816 /* XXX compute */ 5817 queue_reset = true; 5818 break; 5819 } 5820 break; 5821 case 185: /* CP Privileged inst */ 5822 DRM_ERROR("Illegal instruction in command stream\n"); 5823 /* XXX check the bitfield order! */ 5824 me_id = (ring_id & 0x60) >> 5; 5825 pipe_id = (ring_id & 0x18) >> 3; 5826 queue_id = (ring_id & 0x7) >> 0; 5827 switch (me_id) { 5828 case 0: 5829 /* This results in a full GPU reset, but all we need to do is soft 5830 * reset the CP for gfx 5831 */ 5832 queue_reset = true; 5833 break; 5834 case 1: 5835 /* XXX compute */ 5836 queue_reset = true; 5837 break; 5838 case 2: 5839 /* XXX compute */ 5840 queue_reset = true; 5841 break; 5842 } 5843 break; 5844 case 224: /* SDMA trap event */ 5845 /* XXX check the bitfield order! */ 5846 me_id = (ring_id & 0x3) >> 0; 5847 queue_id = (ring_id & 0xc) >> 2; 5848 DRM_DEBUG("IH: SDMA trap\n"); 5849 switch (me_id) { 5850 case 0: 5851 switch (queue_id) { 5852 case 0: 5853 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); 5854 break; 5855 case 1: 5856 /* XXX compute */ 5857 break; 5858 case 2: 5859 /* XXX compute */ 5860 break; 5861 } 5862 break; 5863 case 1: 5864 switch (queue_id) { 5865 case 0: 5866 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 5867 break; 5868 case 1: 5869 /* XXX compute */ 5870 break; 5871 case 2: 5872 /* XXX compute */ 5873 break; 5874 } 5875 break; 5876 } 5877 break; 5878 case 241: /* SDMA Privileged inst */ 5879 case 247: /* SDMA Privileged inst */ 5880 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 5881 /* XXX check the bitfield order! */ 5882 me_id = (ring_id & 0x3) >> 0; 5883 queue_id = (ring_id & 0xc) >> 2; 5884 switch (me_id) { 5885 case 0: 5886 switch (queue_id) { 5887 case 0: 5888 queue_reset = true; 5889 break; 5890 case 1: 5891 /* XXX compute */ 5892 queue_reset = true; 5893 break; 5894 case 2: 5895 /* XXX compute */ 5896 queue_reset = true; 5897 break; 5898 } 5899 break; 5900 case 1: 5901 switch (queue_id) { 5902 case 0: 5903 queue_reset = true; 5904 break; 5905 case 1: 5906 /* XXX compute */ 5907 queue_reset = true; 5908 break; 5909 case 2: 5910 /* XXX compute */ 5911 queue_reset = true; 5912 break; 5913 } 5914 break; 5915 } 5916 break; 5917 case 233: /* GUI IDLE */ 5918 DRM_DEBUG("IH: GUI idle\n"); 5919 break; 5920 default: 5921 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 5922 break; 5923 } 5924 5925 /* wptr/rptr are in bytes! */ 5926 rptr += 16; 5927 rptr &= rdev->ih.ptr_mask; 5928 } 5929 if (queue_hotplug) 5930 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work); 5931 if (queue_reset) 5932 taskqueue_enqueue(rdev->tq, &rdev->reset_work); 5933 rdev->ih.rptr = rptr; 5934 WREG32(IH_RB_RPTR, rdev->ih.rptr); 5935 atomic_set(&rdev->ih.lock, 0); 5936 5937 /* make sure wptr hasn't changed while processing */ 5938 wptr = cik_get_ih_wptr(rdev); 5939 if (wptr != rptr) 5940 goto restart_ih; 5941 5942 return IRQ_HANDLED; 5943 } 5944 5945 /* 5946 * startup/shutdown callbacks 5947 */ 5948 /** 5949 * cik_startup - program the asic to a functional state 5950 * 5951 * @rdev: radeon_device pointer 5952 * 5953 * Programs the asic to a functional state (CIK). 5954 * Called by cik_init() and cik_resume(). 5955 * Returns 0 for success, error for failure. 5956 */ 5957 static int cik_startup(struct radeon_device *rdev) 5958 { 5959 struct radeon_ring *ring; 5960 int r; 5961 5962 cik_mc_program(rdev); 5963 5964 if (rdev->flags & RADEON_IS_IGP) { 5965 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 5966 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) { 5967 r = cik_init_microcode(rdev); 5968 if (r) { 5969 DRM_ERROR("Failed to load firmware!\n"); 5970 return r; 5971 } 5972 } 5973 } else { 5974 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || 5975 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw || 5976 !rdev->mc_fw) { 5977 r = cik_init_microcode(rdev); 5978 if (r) { 5979 DRM_ERROR("Failed to load firmware!\n"); 5980 return r; 5981 } 5982 } 5983 5984 r = ci_mc_load_microcode(rdev); 5985 if (r) { 5986 DRM_ERROR("Failed to load MC firmware!\n"); 5987 return r; 5988 } 5989 } 5990 5991 r = r600_vram_scratch_init(rdev); 5992 if (r) 5993 return r; 5994 5995 r = cik_pcie_gart_enable(rdev); 5996 if (r) 5997 return r; 5998 cik_gpu_init(rdev); 5999 6000 /* allocate rlc buffers */ 6001 r = si_rlc_init(rdev); 6002 if (r) { 6003 DRM_ERROR("Failed to init rlc BOs!\n"); 6004 return r; 6005 } 6006 6007 /* allocate wb buffer */ 6008 r = radeon_wb_init(rdev); 6009 if (r) 6010 return r; 6011 6012 /* allocate mec buffers */ 6013 r = cik_mec_init(rdev); 6014 if (r) { 6015 DRM_ERROR("Failed to init MEC BOs!\n"); 6016 return r; 6017 } 6018 6019 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 6020 if (r) { 6021 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 6022 return r; 6023 } 6024 6025 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); 6026 if (r) { 6027 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 6028 return r; 6029 } 6030 6031 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX); 6032 if (r) { 6033 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 6034 return r; 6035 } 6036 6037 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); 6038 if (r) { 6039 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 6040 return r; 6041 } 6042 6043 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX); 6044 if (r) { 6045 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r); 6046 return r; 6047 } 6048 6049 r = cik_uvd_resume(rdev); 6050 if (!r) { 6051 r = radeon_fence_driver_start_ring(rdev, 6052 R600_RING_TYPE_UVD_INDEX); 6053 if (r) 6054 dev_err(rdev->dev, "UVD fences init error (%d).\n", r); 6055 } 6056 if (r) 6057 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 6058 6059 /* Enable IRQ */ 6060 if (!rdev->irq.installed) { 6061 r = radeon_irq_kms_init(rdev); 6062 if (r) 6063 return r; 6064 } 6065 6066 r = cik_irq_init(rdev); 6067 if (r) { 6068 DRM_ERROR("radeon: IH init failed (%d).\n", r); 6069 radeon_irq_kms_fini(rdev); 6070 return r; 6071 } 6072 cik_irq_set(rdev); 6073 6074 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 6075 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 6076 CP_RB0_RPTR, CP_RB0_WPTR, 6077 0, 0xfffff, RADEON_CP_PACKET2); 6078 if (r) 6079 return r; 6080 6081 /* set up the compute queues */ 6082 /* type-2 packets are deprecated on MEC, use type-3 instead */ 6083 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6084 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET, 6085 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 6086 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF)); 6087 if (r) 6088 return r; 6089 ring->me = 1; /* first MEC */ 6090 ring->pipe = 0; /* first pipe */ 6091 ring->queue = 0; /* first queue */ 6092 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET; 6093 6094 /* type-2 packets are deprecated on MEC, use type-3 instead */ 6095 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6096 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET, 6097 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR, 6098 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF)); 6099 if (r) 6100 return r; 6101 /* dGPU only have 1 MEC */ 6102 ring->me = 1; /* first MEC */ 6103 ring->pipe = 0; /* first pipe */ 6104 ring->queue = 1; /* second queue */ 6105 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET; 6106 6107 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 6108 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, 6109 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET, 6110 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET, 6111 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 6112 if (r) 6113 return r; 6114 6115 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 6116 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET, 6117 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET, 6118 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET, 6119 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 6120 if (r) 6121 return r; 6122 6123 r = cik_cp_resume(rdev); 6124 if (r) 6125 return r; 6126 6127 r = cik_sdma_resume(rdev); 6128 if (r) 6129 return r; 6130 6131 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 6132 if (ring->ring_size) { 6133 r = radeon_ring_init(rdev, ring, ring->ring_size, 6134 R600_WB_UVD_RPTR_OFFSET, 6135 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, 6136 0, 0xfffff, RADEON_CP_PACKET2); 6137 if (!r) 6138 r = r600_uvd_init(rdev); 6139 if (r) 6140 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 6141 } 6142 6143 r = radeon_ib_pool_init(rdev); 6144 if (r) { 6145 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 6146 return r; 6147 } 6148 6149 r = radeon_vm_manager_init(rdev); 6150 if (r) { 6151 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); 6152 return r; 6153 } 6154 6155 return 0; 6156 } 6157 6158 /** 6159 * cik_resume - resume the asic to a functional state 6160 * 6161 * @rdev: radeon_device pointer 6162 * 6163 * Programs the asic to a functional state (CIK). 6164 * Called at resume. 6165 * Returns 0 for success, error for failure. 6166 */ 6167 int cik_resume(struct radeon_device *rdev) 6168 { 6169 int r; 6170 6171 /* post card */ 6172 atom_asic_init(rdev->mode_info.atom_context); 6173 6174 /* init golden registers */ 6175 cik_init_golden_registers(rdev); 6176 6177 rdev->accel_working = true; 6178 r = cik_startup(rdev); 6179 if (r) { 6180 DRM_ERROR("cik startup failed on resume\n"); 6181 rdev->accel_working = false; 6182 return r; 6183 } 6184 6185 return r; 6186 6187 } 6188 6189 /** 6190 * cik_suspend - suspend the asic 6191 * 6192 * @rdev: radeon_device pointer 6193 * 6194 * Bring the chip into a state suitable for suspend (CIK). 6195 * Called at suspend. 6196 * Returns 0 for success. 6197 */ 6198 int cik_suspend(struct radeon_device *rdev) 6199 { 6200 radeon_vm_manager_fini(rdev); 6201 cik_cp_enable(rdev, false); 6202 cik_sdma_enable(rdev, false); 6203 r600_uvd_stop(rdev); 6204 radeon_uvd_suspend(rdev); 6205 cik_irq_suspend(rdev); 6206 radeon_wb_disable(rdev); 6207 cik_pcie_gart_disable(rdev); 6208 return 0; 6209 } 6210 6211 /* Plan is to move initialization in that function and use 6212 * helper function so that radeon_device_init pretty much 6213 * do nothing more than calling asic specific function. This 6214 * should also allow to remove a bunch of callback function 6215 * like vram_info. 6216 */ 6217 /** 6218 * cik_init - asic specific driver and hw init 6219 * 6220 * @rdev: radeon_device pointer 6221 * 6222 * Setup asic specific driver variables and program the hw 6223 * to a functional state (CIK). 6224 * Called at driver startup. 6225 * Returns 0 for success, errors for failure. 6226 */ 6227 int cik_init(struct radeon_device *rdev) 6228 { 6229 struct radeon_ring *ring; 6230 int r; 6231 6232 /* Read BIOS */ 6233 if (!radeon_get_bios(rdev)) { 6234 if (ASIC_IS_AVIVO(rdev)) 6235 return -EINVAL; 6236 } 6237 /* Must be an ATOMBIOS */ 6238 if (!rdev->is_atom_bios) { 6239 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n"); 6240 return -EINVAL; 6241 } 6242 r = radeon_atombios_init(rdev); 6243 if (r) 6244 return r; 6245 6246 /* Post card if necessary */ 6247 if (!radeon_card_posted(rdev)) { 6248 if (!rdev->bios) { 6249 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 6250 return -EINVAL; 6251 } 6252 DRM_INFO("GPU not posted. posting now...\n"); 6253 atom_asic_init(rdev->mode_info.atom_context); 6254 } 6255 /* init golden registers */ 6256 cik_init_golden_registers(rdev); 6257 /* Initialize scratch registers */ 6258 cik_scratch_init(rdev); 6259 /* Initialize surface registers */ 6260 radeon_surface_init(rdev); 6261 /* Initialize clocks */ 6262 radeon_get_clock_info(rdev->ddev); 6263 6264 /* Fence driver */ 6265 r = radeon_fence_driver_init(rdev); 6266 if (r) 6267 return r; 6268 6269 /* initialize memory controller */ 6270 r = cik_mc_init(rdev); 6271 if (r) 6272 return r; 6273 /* Memory manager */ 6274 r = radeon_bo_init(rdev); 6275 if (r) 6276 return r; 6277 6278 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 6279 ring->ring_obj = NULL; 6280 r600_ring_init(rdev, ring, 1024 * 1024); 6281 6282 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 6283 ring->ring_obj = NULL; 6284 r600_ring_init(rdev, ring, 1024 * 1024); 6285 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 6286 if (r) 6287 return r; 6288 6289 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 6290 ring->ring_obj = NULL; 6291 r600_ring_init(rdev, ring, 1024 * 1024); 6292 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 6293 if (r) 6294 return r; 6295 6296 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 6297 ring->ring_obj = NULL; 6298 r600_ring_init(rdev, ring, 256 * 1024); 6299 6300 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 6301 ring->ring_obj = NULL; 6302 r600_ring_init(rdev, ring, 256 * 1024); 6303 6304 r = radeon_uvd_init(rdev); 6305 if (!r) { 6306 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; 6307 ring->ring_obj = NULL; 6308 r600_ring_init(rdev, ring, 4096); 6309 } 6310 6311 rdev->ih.ring_obj = NULL; 6312 r600_ih_ring_init(rdev, 64 * 1024); 6313 6314 r = r600_pcie_gart_init(rdev); 6315 if (r) 6316 return r; 6317 6318 rdev->accel_working = true; 6319 r = cik_startup(rdev); 6320 if (r) { 6321 dev_err(rdev->dev, "disabling GPU acceleration\n"); 6322 cik_cp_fini(rdev); 6323 cik_sdma_fini(rdev); 6324 cik_irq_fini(rdev); 6325 si_rlc_fini(rdev); 6326 cik_mec_fini(rdev); 6327 radeon_wb_fini(rdev); 6328 radeon_ib_pool_fini(rdev); 6329 radeon_vm_manager_fini(rdev); 6330 radeon_irq_kms_fini(rdev); 6331 cik_pcie_gart_fini(rdev); 6332 rdev->accel_working = false; 6333 } 6334 6335 /* Don't start up if the MC ucode is missing. 6336 * The default clocks and voltages before the MC ucode 6337 * is loaded are not suffient for advanced operations. 6338 */ 6339 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) { 6340 DRM_ERROR("radeon: MC ucode required for NI+.\n"); 6341 return -EINVAL; 6342 } 6343 6344 return 0; 6345 } 6346 6347 /** 6348 * cik_fini - asic specific driver and hw fini 6349 * 6350 * @rdev: radeon_device pointer 6351 * 6352 * Tear down the asic specific driver variables and program the hw 6353 * to an idle state (CIK). 6354 * Called at driver unload. 6355 */ 6356 void cik_fini(struct radeon_device *rdev) 6357 { 6358 cik_cp_fini(rdev); 6359 cik_sdma_fini(rdev); 6360 cik_irq_fini(rdev); 6361 si_rlc_fini(rdev); 6362 cik_mec_fini(rdev); 6363 radeon_wb_fini(rdev); 6364 radeon_vm_manager_fini(rdev); 6365 radeon_ib_pool_fini(rdev); 6366 radeon_irq_kms_fini(rdev); 6367 r600_uvd_stop(rdev); 6368 radeon_uvd_fini(rdev); 6369 cik_pcie_gart_fini(rdev); 6370 r600_vram_scratch_fini(rdev); 6371 radeon_gem_fini(rdev); 6372 radeon_fence_driver_fini(rdev); 6373 radeon_bo_fini(rdev); 6374 radeon_atombios_fini(rdev); 6375 kfree(rdev->bios); 6376 rdev->bios = NULL; 6377 } 6378 6379 /* display watermark setup */ 6380 /** 6381 * dce8_line_buffer_adjust - Set up the line buffer 6382 * 6383 * @rdev: radeon_device pointer 6384 * @radeon_crtc: the selected display controller 6385 * @mode: the current display mode on the selected display 6386 * controller 6387 * 6388 * Setup up the line buffer allocation for 6389 * the selected display controller (CIK). 6390 * Returns the line buffer size in pixels. 6391 */ 6392 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev, 6393 struct radeon_crtc *radeon_crtc, 6394 struct drm_display_mode *mode) 6395 { 6396 u32 tmp; 6397 6398 /* 6399 * Line Buffer Setup 6400 * There are 6 line buffers, one for each display controllers. 6401 * There are 3 partitions per LB. Select the number of partitions 6402 * to enable based on the display width. For display widths larger 6403 * than 4096, you need use to use 2 display controllers and combine 6404 * them using the stereo blender. 6405 */ 6406 if (radeon_crtc->base.enabled && mode) { 6407 if (mode->crtc_hdisplay < 1920) 6408 tmp = 1; 6409 else if (mode->crtc_hdisplay < 2560) 6410 tmp = 2; 6411 else if (mode->crtc_hdisplay < 4096) 6412 tmp = 0; 6413 else { 6414 DRM_DEBUG_KMS("Mode too big for LB!\n"); 6415 tmp = 0; 6416 } 6417 } else 6418 tmp = 1; 6419 6420 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset, 6421 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0)); 6422 6423 if (radeon_crtc->base.enabled && mode) { 6424 switch (tmp) { 6425 case 0: 6426 default: 6427 return 4096 * 2; 6428 case 1: 6429 return 1920 * 2; 6430 case 2: 6431 return 2560 * 2; 6432 } 6433 } 6434 6435 /* controller not enabled, so no lb used */ 6436 return 0; 6437 } 6438 6439 /** 6440 * cik_get_number_of_dram_channels - get the number of dram channels 6441 * 6442 * @rdev: radeon_device pointer 6443 * 6444 * Look up the number of video ram channels (CIK). 6445 * Used for display watermark bandwidth calculations 6446 * Returns the number of dram channels 6447 */ 6448 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev) 6449 { 6450 u32 tmp = RREG32(MC_SHARED_CHMAP); 6451 6452 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 6453 case 0: 6454 default: 6455 return 1; 6456 case 1: 6457 return 2; 6458 case 2: 6459 return 4; 6460 case 3: 6461 return 8; 6462 case 4: 6463 return 3; 6464 case 5: 6465 return 6; 6466 case 6: 6467 return 10; 6468 case 7: 6469 return 12; 6470 case 8: 6471 return 16; 6472 } 6473 } 6474 6475 struct dce8_wm_params { 6476 u32 dram_channels; /* number of dram channels */ 6477 u32 yclk; /* bandwidth per dram data pin in kHz */ 6478 u32 sclk; /* engine clock in kHz */ 6479 u32 disp_clk; /* display clock in kHz */ 6480 u32 src_width; /* viewport width */ 6481 u32 active_time; /* active display time in ns */ 6482 u32 blank_time; /* blank time in ns */ 6483 bool interlaced; /* mode is interlaced */ 6484 fixed20_12 vsc; /* vertical scale ratio */ 6485 u32 num_heads; /* number of active crtcs */ 6486 u32 bytes_per_pixel; /* bytes per pixel display + overlay */ 6487 u32 lb_size; /* line buffer allocated to pipe */ 6488 u32 vtaps; /* vertical scaler taps */ 6489 }; 6490 6491 /** 6492 * dce8_dram_bandwidth - get the dram bandwidth 6493 * 6494 * @wm: watermark calculation data 6495 * 6496 * Calculate the raw dram bandwidth (CIK). 6497 * Used for display watermark bandwidth calculations 6498 * Returns the dram bandwidth in MBytes/s 6499 */ 6500 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm) 6501 { 6502 /* Calculate raw DRAM Bandwidth */ 6503 fixed20_12 dram_efficiency; /* 0.7 */ 6504 fixed20_12 yclk, dram_channels, bandwidth; 6505 fixed20_12 a; 6506 6507 a.full = dfixed_const(1000); 6508 yclk.full = dfixed_const(wm->yclk); 6509 yclk.full = dfixed_div(yclk, a); 6510 dram_channels.full = dfixed_const(wm->dram_channels * 4); 6511 a.full = dfixed_const(10); 6512 dram_efficiency.full = dfixed_const(7); 6513 dram_efficiency.full = dfixed_div(dram_efficiency, a); 6514 bandwidth.full = dfixed_mul(dram_channels, yclk); 6515 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); 6516 6517 return dfixed_trunc(bandwidth); 6518 } 6519 6520 /** 6521 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display 6522 * 6523 * @wm: watermark calculation data 6524 * 6525 * Calculate the dram bandwidth used for display (CIK). 6526 * Used for display watermark bandwidth calculations 6527 * Returns the dram bandwidth for display in MBytes/s 6528 */ 6529 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm) 6530 { 6531 /* Calculate DRAM Bandwidth and the part allocated to display. */ 6532 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ 6533 fixed20_12 yclk, dram_channels, bandwidth; 6534 fixed20_12 a; 6535 6536 a.full = dfixed_const(1000); 6537 yclk.full = dfixed_const(wm->yclk); 6538 yclk.full = dfixed_div(yclk, a); 6539 dram_channels.full = dfixed_const(wm->dram_channels * 4); 6540 a.full = dfixed_const(10); 6541 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ 6542 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); 6543 bandwidth.full = dfixed_mul(dram_channels, yclk); 6544 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); 6545 6546 return dfixed_trunc(bandwidth); 6547 } 6548 6549 /** 6550 * dce8_data_return_bandwidth - get the data return bandwidth 6551 * 6552 * @wm: watermark calculation data 6553 * 6554 * Calculate the data return bandwidth used for display (CIK). 6555 * Used for display watermark bandwidth calculations 6556 * Returns the data return bandwidth in MBytes/s 6557 */ 6558 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm) 6559 { 6560 /* Calculate the display Data return Bandwidth */ 6561 fixed20_12 return_efficiency; /* 0.8 */ 6562 fixed20_12 sclk, bandwidth; 6563 fixed20_12 a; 6564 6565 a.full = dfixed_const(1000); 6566 sclk.full = dfixed_const(wm->sclk); 6567 sclk.full = dfixed_div(sclk, a); 6568 a.full = dfixed_const(10); 6569 return_efficiency.full = dfixed_const(8); 6570 return_efficiency.full = dfixed_div(return_efficiency, a); 6571 a.full = dfixed_const(32); 6572 bandwidth.full = dfixed_mul(a, sclk); 6573 bandwidth.full = dfixed_mul(bandwidth, return_efficiency); 6574 6575 return dfixed_trunc(bandwidth); 6576 } 6577 6578 /** 6579 * dce8_dmif_request_bandwidth - get the dmif bandwidth 6580 * 6581 * @wm: watermark calculation data 6582 * 6583 * Calculate the dmif bandwidth used for display (CIK). 6584 * Used for display watermark bandwidth calculations 6585 * Returns the dmif bandwidth in MBytes/s 6586 */ 6587 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm) 6588 { 6589 /* Calculate the DMIF Request Bandwidth */ 6590 fixed20_12 disp_clk_request_efficiency; /* 0.8 */ 6591 fixed20_12 disp_clk, bandwidth; 6592 fixed20_12 a, b; 6593 6594 a.full = dfixed_const(1000); 6595 disp_clk.full = dfixed_const(wm->disp_clk); 6596 disp_clk.full = dfixed_div(disp_clk, a); 6597 a.full = dfixed_const(32); 6598 b.full = dfixed_mul(a, disp_clk); 6599 6600 a.full = dfixed_const(10); 6601 disp_clk_request_efficiency.full = dfixed_const(8); 6602 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); 6603 6604 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); 6605 6606 return dfixed_trunc(bandwidth); 6607 } 6608 6609 /** 6610 * dce8_available_bandwidth - get the min available bandwidth 6611 * 6612 * @wm: watermark calculation data 6613 * 6614 * Calculate the min available bandwidth used for display (CIK). 6615 * Used for display watermark bandwidth calculations 6616 * Returns the min available bandwidth in MBytes/s 6617 */ 6618 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm) 6619 { 6620 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ 6621 u32 dram_bandwidth = dce8_dram_bandwidth(wm); 6622 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm); 6623 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm); 6624 6625 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); 6626 } 6627 6628 /** 6629 * dce8_average_bandwidth - get the average available bandwidth 6630 * 6631 * @wm: watermark calculation data 6632 * 6633 * Calculate the average available bandwidth used for display (CIK). 6634 * Used for display watermark bandwidth calculations 6635 * Returns the average available bandwidth in MBytes/s 6636 */ 6637 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm) 6638 { 6639 /* Calculate the display mode Average Bandwidth 6640 * DisplayMode should contain the source and destination dimensions, 6641 * timing, etc. 6642 */ 6643 fixed20_12 bpp; 6644 fixed20_12 line_time; 6645 fixed20_12 src_width; 6646 fixed20_12 bandwidth; 6647 fixed20_12 a; 6648 6649 a.full = dfixed_const(1000); 6650 line_time.full = dfixed_const(wm->active_time + wm->blank_time); 6651 line_time.full = dfixed_div(line_time, a); 6652 bpp.full = dfixed_const(wm->bytes_per_pixel); 6653 src_width.full = dfixed_const(wm->src_width); 6654 bandwidth.full = dfixed_mul(src_width, bpp); 6655 bandwidth.full = dfixed_mul(bandwidth, wm->vsc); 6656 bandwidth.full = dfixed_div(bandwidth, line_time); 6657 6658 return dfixed_trunc(bandwidth); 6659 } 6660 6661 /** 6662 * dce8_latency_watermark - get the latency watermark 6663 * 6664 * @wm: watermark calculation data 6665 * 6666 * Calculate the latency watermark (CIK). 6667 * Used for display watermark bandwidth calculations 6668 * Returns the latency watermark in ns 6669 */ 6670 static u32 dce8_latency_watermark(struct dce8_wm_params *wm) 6671 { 6672 /* First calculate the latency in ns */ 6673 u32 mc_latency = 2000; /* 2000 ns. */ 6674 u32 available_bandwidth = dce8_available_bandwidth(wm); 6675 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; 6676 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; 6677 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ 6678 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + 6679 (wm->num_heads * cursor_line_pair_return_time); 6680 u32 latency = mc_latency + other_heads_data_return_time + dc_latency; 6681 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; 6682 u32 tmp, dmif_size = 12288; 6683 fixed20_12 a, b, c; 6684 6685 if (wm->num_heads == 0) 6686 return 0; 6687 6688 a.full = dfixed_const(2); 6689 b.full = dfixed_const(1); 6690 if ((wm->vsc.full > a.full) || 6691 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || 6692 (wm->vtaps >= 5) || 6693 ((wm->vsc.full >= a.full) && wm->interlaced)) 6694 max_src_lines_per_dst_line = 4; 6695 else 6696 max_src_lines_per_dst_line = 2; 6697 6698 a.full = dfixed_const(available_bandwidth); 6699 b.full = dfixed_const(wm->num_heads); 6700 a.full = dfixed_div(a, b); 6701 6702 b.full = dfixed_const(mc_latency + 512); 6703 c.full = dfixed_const(wm->disp_clk); 6704 b.full = dfixed_div(b, c); 6705 6706 c.full = dfixed_const(dmif_size); 6707 b.full = dfixed_div(c, b); 6708 6709 tmp = min(dfixed_trunc(a), dfixed_trunc(b)); 6710 6711 b.full = dfixed_const(1000); 6712 c.full = dfixed_const(wm->disp_clk); 6713 b.full = dfixed_div(c, b); 6714 c.full = dfixed_const(wm->bytes_per_pixel); 6715 b.full = dfixed_mul(b, c); 6716 6717 lb_fill_bw = min(tmp, dfixed_trunc(b)); 6718 6719 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); 6720 b.full = dfixed_const(1000); 6721 c.full = dfixed_const(lb_fill_bw); 6722 b.full = dfixed_div(c, b); 6723 a.full = dfixed_div(a, b); 6724 line_fill_time = dfixed_trunc(a); 6725 6726 if (line_fill_time < wm->active_time) 6727 return latency; 6728 else 6729 return latency + (line_fill_time - wm->active_time); 6730 6731 } 6732 6733 /** 6734 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check 6735 * average and available dram bandwidth 6736 * 6737 * @wm: watermark calculation data 6738 * 6739 * Check if the display average bandwidth fits in the display 6740 * dram bandwidth (CIK). 6741 * Used for display watermark bandwidth calculations 6742 * Returns true if the display fits, false if not. 6743 */ 6744 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm) 6745 { 6746 if (dce8_average_bandwidth(wm) <= 6747 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads)) 6748 return true; 6749 else 6750 return false; 6751 } 6752 6753 /** 6754 * dce8_average_bandwidth_vs_available_bandwidth - check 6755 * average and available bandwidth 6756 * 6757 * @wm: watermark calculation data 6758 * 6759 * Check if the display average bandwidth fits in the display 6760 * available bandwidth (CIK). 6761 * Used for display watermark bandwidth calculations 6762 * Returns true if the display fits, false if not. 6763 */ 6764 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm) 6765 { 6766 if (dce8_average_bandwidth(wm) <= 6767 (dce8_available_bandwidth(wm) / wm->num_heads)) 6768 return true; 6769 else 6770 return false; 6771 } 6772 6773 /** 6774 * dce8_check_latency_hiding - check latency hiding 6775 * 6776 * @wm: watermark calculation data 6777 * 6778 * Check latency hiding (CIK). 6779 * Used for display watermark bandwidth calculations 6780 * Returns true if the display fits, false if not. 6781 */ 6782 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm) 6783 { 6784 u32 lb_partitions = wm->lb_size / wm->src_width; 6785 u32 line_time = wm->active_time + wm->blank_time; 6786 u32 latency_tolerant_lines; 6787 u32 latency_hiding; 6788 fixed20_12 a; 6789 6790 a.full = dfixed_const(1); 6791 if (wm->vsc.full > a.full) 6792 latency_tolerant_lines = 1; 6793 else { 6794 if (lb_partitions <= (wm->vtaps + 1)) 6795 latency_tolerant_lines = 1; 6796 else 6797 latency_tolerant_lines = 2; 6798 } 6799 6800 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); 6801 6802 if (dce8_latency_watermark(wm) <= latency_hiding) 6803 return true; 6804 else 6805 return false; 6806 } 6807 6808 /** 6809 * dce8_program_watermarks - program display watermarks 6810 * 6811 * @rdev: radeon_device pointer 6812 * @radeon_crtc: the selected display controller 6813 * @lb_size: line buffer size 6814 * @num_heads: number of display controllers in use 6815 * 6816 * Calculate and program the display watermarks for the 6817 * selected display controller (CIK). 6818 */ 6819 static void dce8_program_watermarks(struct radeon_device *rdev, 6820 struct radeon_crtc *radeon_crtc, 6821 u32 lb_size, u32 num_heads) 6822 { 6823 struct drm_display_mode *mode = &radeon_crtc->base.mode; 6824 struct dce8_wm_params wm; 6825 u32 pixel_period; 6826 u32 line_time = 0; 6827 u32 latency_watermark_a = 0, latency_watermark_b = 0; 6828 u32 tmp, wm_mask; 6829 6830 if (radeon_crtc->base.enabled && num_heads && mode) { 6831 pixel_period = 1000000 / (u32)mode->clock; 6832 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); 6833 6834 wm.yclk = rdev->pm.current_mclk * 10; 6835 wm.sclk = rdev->pm.current_sclk * 10; 6836 wm.disp_clk = mode->clock; 6837 wm.src_width = mode->crtc_hdisplay; 6838 wm.active_time = mode->crtc_hdisplay * pixel_period; 6839 wm.blank_time = line_time - wm.active_time; 6840 wm.interlaced = false; 6841 if (mode->flags & DRM_MODE_FLAG_INTERLACE) 6842 wm.interlaced = true; 6843 wm.vsc = radeon_crtc->vsc; 6844 wm.vtaps = 1; 6845 if (radeon_crtc->rmx_type != RMX_OFF) 6846 wm.vtaps = 2; 6847 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ 6848 wm.lb_size = lb_size; 6849 wm.dram_channels = cik_get_number_of_dram_channels(rdev); 6850 wm.num_heads = num_heads; 6851 6852 /* set for high clocks */ 6853 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535); 6854 /* set for low clocks */ 6855 /* wm.yclk = low clk; wm.sclk = low clk */ 6856 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535); 6857 6858 /* possibly force display priority to high */ 6859 /* should really do this at mode validation time... */ 6860 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || 6861 !dce8_average_bandwidth_vs_available_bandwidth(&wm) || 6862 !dce8_check_latency_hiding(&wm) || 6863 (rdev->disp_priority == 2)) { 6864 DRM_DEBUG_KMS("force priority to high\n"); 6865 } 6866 } 6867 6868 /* select wm A */ 6869 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 6870 tmp = wm_mask; 6871 tmp &= ~LATENCY_WATERMARK_MASK(3); 6872 tmp |= LATENCY_WATERMARK_MASK(1); 6873 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 6874 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 6875 (LATENCY_LOW_WATERMARK(latency_watermark_a) | 6876 LATENCY_HIGH_WATERMARK(line_time))); 6877 /* select wm B */ 6878 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset); 6879 tmp &= ~LATENCY_WATERMARK_MASK(3); 6880 tmp |= LATENCY_WATERMARK_MASK(2); 6881 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp); 6882 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset, 6883 (LATENCY_LOW_WATERMARK(latency_watermark_b) | 6884 LATENCY_HIGH_WATERMARK(line_time))); 6885 /* restore original selection */ 6886 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask); 6887 } 6888 6889 /** 6890 * dce8_bandwidth_update - program display watermarks 6891 * 6892 * @rdev: radeon_device pointer 6893 * 6894 * Calculate and program the display watermarks and line 6895 * buffer allocation (CIK). 6896 */ 6897 void dce8_bandwidth_update(struct radeon_device *rdev) 6898 { 6899 struct drm_display_mode *mode = NULL; 6900 u32 num_heads = 0, lb_size; 6901 int i; 6902 6903 radeon_update_display_priority(rdev); 6904 6905 for (i = 0; i < rdev->num_crtc; i++) { 6906 if (rdev->mode_info.crtcs[i]->base.enabled) 6907 num_heads++; 6908 } 6909 for (i = 0; i < rdev->num_crtc; i++) { 6910 mode = &rdev->mode_info.crtcs[i]->base.mode; 6911 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode); 6912 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads); 6913 } 6914 } 6915 6916 /** 6917 * cik_get_gpu_clock_counter - return GPU clock counter snapshot 6918 * 6919 * @rdev: radeon_device pointer 6920 * 6921 * Fetches a GPU clock counter snapshot (SI). 6922 * Returns the 64 bit clock counter snapshot. 6923 */ 6924 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev) 6925 { 6926 uint64_t clock; 6927 6928 spin_lock(&rdev->gpu_clock_mutex); 6929 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); 6930 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | 6931 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 6932 spin_unlock(&rdev->gpu_clock_mutex); 6933 return clock; 6934 } 6935 6936 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock, 6937 u32 cntl_reg, u32 status_reg) 6938 { 6939 int r, i; 6940 struct atom_clock_dividers dividers; 6941 uint32_t tmp; 6942 6943 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 6944 clock, false, ÷rs); 6945 if (r) 6946 return r; 6947 6948 tmp = RREG32_SMC(cntl_reg); 6949 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK); 6950 tmp |= dividers.post_divider; 6951 WREG32_SMC(cntl_reg, tmp); 6952 6953 for (i = 0; i < 100; i++) { 6954 if (RREG32_SMC(status_reg) & DCLK_STATUS) 6955 break; 6956 mdelay(10); 6957 } 6958 if (i == 100) 6959 return -ETIMEDOUT; 6960 6961 return 0; 6962 } 6963 6964 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) 6965 { 6966 int r = 0; 6967 6968 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS); 6969 if (r) 6970 return r; 6971 6972 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 6973 return r; 6974 } 6975 6976 int cik_uvd_resume(struct radeon_device *rdev) 6977 { 6978 uint64_t addr; 6979 uint32_t size; 6980 int r; 6981 6982 r = radeon_uvd_resume(rdev); 6983 if (r) 6984 return r; 6985 6986 /* programm the VCPU memory controller bits 0-27 */ 6987 addr = rdev->uvd.gpu_addr >> 3; 6988 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3; 6989 WREG32(UVD_VCPU_CACHE_OFFSET0, addr); 6990 WREG32(UVD_VCPU_CACHE_SIZE0, size); 6991 6992 addr += size; 6993 size = RADEON_UVD_STACK_SIZE >> 3; 6994 WREG32(UVD_VCPU_CACHE_OFFSET1, addr); 6995 WREG32(UVD_VCPU_CACHE_SIZE1, size); 6996 6997 addr += size; 6998 size = RADEON_UVD_HEAP_SIZE >> 3; 6999 WREG32(UVD_VCPU_CACHE_OFFSET2, addr); 7000 WREG32(UVD_VCPU_CACHE_SIZE2, size); 7001 7002 /* bits 28-31 */ 7003 addr = (rdev->uvd.gpu_addr >> 28) & 0xF; 7004 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); 7005 7006 /* bits 32-39 */ 7007 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; 7008 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); 7009 7010 return 0; 7011 } 7012