1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 31 32 unsigned int dml32_dscceComputeDelay( 33 unsigned int bpc, 34 double BPP, 35 unsigned int sliceWidth, 36 unsigned int numSlices, 37 enum output_format_class pixelFormat, 38 enum output_encoder_class Output) 39 { 40 // valid bpc = source bits per component in the set of {8, 10, 12} 41 // valid bpp = increments of 1/16 of a bit 42 // min = 6/7/8 in N420/N422/444, respectively 43 // max = such that compression is 1:1 44 //valid sliceWidth = number of pixels per slice line, 45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 48 49 // fixed value 50 unsigned int rcModelSize = 8192; 51 52 // N422/N420 operate at 2 pixels per clock 53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 54 Delay, pixels; 55 56 if (pixelFormat == dm_420) 57 pixelsPerClock = 2; 58 else if (pixelFormat == dm_n422) 59 pixelsPerClock = 2; 60 // #all other modes operate at 1 pixel per clock 61 else 62 pixelsPerClock = 1; 63 64 //initial transmit delay as per PPS 65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 66 67 //compute ssm delay 68 if (bpc == 8) 69 D = 81; 70 else if (bpc == 10) 71 D = 89; 72 else 73 D = 113; 74 75 //divide by pixel per cycle to compute slice width as seen by DSC 76 w = sliceWidth / pixelsPerClock; 77 78 //422 mode has an additional cycle of delay 79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 80 s = 0; 81 else 82 s = 1; 83 84 //main calculation for the dscce 85 ix = initalXmitDelay + 45; 86 wx = (w + 2) / 3; 87 p = 3 * wx - w; 88 l0 = ix / w; 89 a = ix + p * l0; 90 ax = (a + 2) / 3 + D + 6 + 1; 91 L = (ax + wx - 1) / wx; 92 if ((ix % w) == 0 && p != 0) 93 lstall = 1; 94 else 95 lstall = 0; 96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 97 98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 99 pixels = Delay * 3 * pixelsPerClock; 100 101 #ifdef __DML_VBA_DEBUG__ 102 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 103 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 107 dml_print("DML::%s: Output: %d\n", __func__, Output); 108 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 109 #endif 110 111 return pixels; 112 } 113 114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 115 { 116 unsigned int Delay = 0; 117 118 if (pixelFormat == dm_420) { 119 // sfr 120 Delay = Delay + 2; 121 // dsccif 122 Delay = Delay + 0; 123 // dscc - input deserializer 124 Delay = Delay + 3; 125 // dscc gets pixels every other cycle 126 Delay = Delay + 2; 127 // dscc - input cdc fifo 128 Delay = Delay + 12; 129 // dscc gets pixels every other cycle 130 Delay = Delay + 13; 131 // dscc - cdc uncertainty 132 Delay = Delay + 2; 133 // dscc - output cdc fifo 134 Delay = Delay + 7; 135 // dscc gets pixels every other cycle 136 Delay = Delay + 3; 137 // dscc - cdc uncertainty 138 Delay = Delay + 2; 139 // dscc - output serializer 140 Delay = Delay + 1; 141 // sft 142 Delay = Delay + 1; 143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 144 // sfr 145 Delay = Delay + 2; 146 // dsccif 147 Delay = Delay + 1; 148 // dscc - input deserializer 149 Delay = Delay + 5; 150 // dscc - input cdc fifo 151 Delay = Delay + 25; 152 // dscc - cdc uncertainty 153 Delay = Delay + 2; 154 // dscc - output cdc fifo 155 Delay = Delay + 10; 156 // dscc - cdc uncertainty 157 Delay = Delay + 2; 158 // dscc - output serializer 159 Delay = Delay + 1; 160 // sft 161 Delay = Delay + 1; 162 } else { 163 // sfr 164 Delay = Delay + 2; 165 // dsccif 166 Delay = Delay + 0; 167 // dscc - input deserializer 168 Delay = Delay + 3; 169 // dscc - input cdc fifo 170 Delay = Delay + 12; 171 // dscc - cdc uncertainty 172 Delay = Delay + 2; 173 // dscc - output cdc fifo 174 Delay = Delay + 7; 175 // dscc - output serializer 176 Delay = Delay + 1; 177 // dscc - cdc uncertainty 178 Delay = Delay + 2; 179 // sft 180 Delay = Delay + 1; 181 } 182 183 return Delay; 184 } 185 186 187 bool IsVertical(enum dm_rotation_angle Scan) 188 { 189 bool is_vert = false; 190 191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 192 is_vert = true; 193 else 194 is_vert = false; 195 return is_vert; 196 } 197 198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 199 double HRatio, 200 double HRatioChroma, 201 double VRatio, 202 double VRatioChroma, 203 double MaxDCHUBToPSCLThroughput, 204 double MaxPSCLToLBThroughput, 205 double PixelClock, 206 enum source_format_class SourcePixelFormat, 207 unsigned int HTaps, 208 unsigned int HTapsChroma, 209 unsigned int VTaps, 210 unsigned int VTapsChroma, 211 212 /* output */ 213 double *PSCL_THROUGHPUT, 214 double *PSCL_THROUGHPUT_CHROMA, 215 double *DPPCLKUsingSingleDPP) 216 { 217 double DPPCLKUsingSingleDPPLuma; 218 double DPPCLKUsingSingleDPPChroma; 219 220 if (HRatio > 1) { 221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 222 dml_ceil((double) HTaps / 6.0, 1.0)); 223 } else { 224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 225 } 226 227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 228 *PSCL_THROUGHPUT, 1); 229 230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 232 233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 234 SourcePixelFormat != dm_rgbe_alpha)) { 235 *PSCL_THROUGHPUT_CHROMA = 0; 236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 237 } else { 238 if (HRatioChroma > 1) { 239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 241 } else { 242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 243 } 244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 249 } 250 } 251 252 void dml32_CalculateBytePerPixelAndBlockSizes( 253 enum source_format_class SourcePixelFormat, 254 enum dm_swizzle_mode SurfaceTiling, 255 256 /* Output */ 257 unsigned int *BytePerPixelY, 258 unsigned int *BytePerPixelC, 259 double *BytePerPixelDETY, 260 double *BytePerPixelDETC, 261 unsigned int *BlockHeight256BytesY, 262 unsigned int *BlockHeight256BytesC, 263 unsigned int *BlockWidth256BytesY, 264 unsigned int *BlockWidth256BytesC, 265 unsigned int *MacroTileHeightY, 266 unsigned int *MacroTileHeightC, 267 unsigned int *MacroTileWidthY, 268 unsigned int *MacroTileWidthC) 269 { 270 if (SourcePixelFormat == dm_444_64) { 271 *BytePerPixelDETY = 8; 272 *BytePerPixelDETC = 0; 273 *BytePerPixelY = 8; 274 *BytePerPixelC = 0; 275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 276 *BytePerPixelDETY = 4; 277 *BytePerPixelDETC = 0; 278 *BytePerPixelY = 4; 279 *BytePerPixelC = 0; 280 } else if (SourcePixelFormat == dm_444_16) { 281 *BytePerPixelDETY = 2; 282 *BytePerPixelDETC = 0; 283 *BytePerPixelY = 2; 284 *BytePerPixelC = 0; 285 } else if (SourcePixelFormat == dm_444_8) { 286 *BytePerPixelDETY = 1; 287 *BytePerPixelDETC = 0; 288 *BytePerPixelY = 1; 289 *BytePerPixelC = 0; 290 } else if (SourcePixelFormat == dm_rgbe_alpha) { 291 *BytePerPixelDETY = 4; 292 *BytePerPixelDETC = 1; 293 *BytePerPixelY = 4; 294 *BytePerPixelC = 1; 295 } else if (SourcePixelFormat == dm_420_8) { 296 *BytePerPixelDETY = 1; 297 *BytePerPixelDETC = 2; 298 *BytePerPixelY = 1; 299 *BytePerPixelC = 2; 300 } else if (SourcePixelFormat == dm_420_12) { 301 *BytePerPixelDETY = 2; 302 *BytePerPixelDETC = 4; 303 *BytePerPixelY = 2; 304 *BytePerPixelC = 4; 305 } else { 306 *BytePerPixelDETY = 4.0 / 3; 307 *BytePerPixelDETC = 8.0 / 3; 308 *BytePerPixelY = 2; 309 *BytePerPixelC = 4; 310 } 311 #ifdef __DML_VBA_DEBUG__ 312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 317 #endif 318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 319 || SourcePixelFormat == dm_444_16 320 || SourcePixelFormat == dm_444_8 321 || SourcePixelFormat == dm_mono_16 322 || SourcePixelFormat == dm_mono_8 323 || SourcePixelFormat == dm_rgbe)) { 324 if (SurfaceTiling == dm_sw_linear) 325 *BlockHeight256BytesY = 1; 326 else if (SourcePixelFormat == dm_444_64) 327 *BlockHeight256BytesY = 4; 328 else if (SourcePixelFormat == dm_444_8) 329 *BlockHeight256BytesY = 16; 330 else 331 *BlockHeight256BytesY = 8; 332 333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 334 *BlockHeight256BytesC = 0; 335 *BlockWidth256BytesC = 0; 336 } else { 337 if (SurfaceTiling == dm_sw_linear) { 338 *BlockHeight256BytesY = 1; 339 *BlockHeight256BytesC = 1; 340 } else if (SourcePixelFormat == dm_rgbe_alpha) { 341 *BlockHeight256BytesY = 8; 342 *BlockHeight256BytesC = 16; 343 } else if (SourcePixelFormat == dm_420_8) { 344 *BlockHeight256BytesY = 16; 345 *BlockHeight256BytesC = 8; 346 } else { 347 *BlockHeight256BytesY = 8; 348 *BlockHeight256BytesC = 8; 349 } 350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 352 } 353 #ifdef __DML_VBA_DEBUG__ 354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 358 #endif 359 360 if (SurfaceTiling == dm_sw_linear) { 361 *MacroTileHeightY = *BlockHeight256BytesY; 362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 363 *MacroTileHeightC = *BlockHeight256BytesC; 364 if (*MacroTileHeightC == 0) 365 *MacroTileWidthC = 0; 366 else 367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 370 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 372 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 373 if (*MacroTileHeightC == 0) 374 *MacroTileWidthC = 0; 375 else 376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 377 } else { 378 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 380 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 381 if (*MacroTileHeightC == 0) 382 *MacroTileWidthC = 0; 383 else 384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 385 } 386 387 #ifdef __DML_VBA_DEBUG__ 388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 392 #endif 393 } // CalculateBytePerPixelAndBlockSizes 394 395 void dml32_CalculateSwathAndDETConfiguration( 396 unsigned int DETSizeOverride[], 397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 398 unsigned int ConfigReturnBufferSizeInKByte, 399 unsigned int MaxTotalDETInKByte, 400 unsigned int MinCompressedBufferSizeInKByte, 401 double ForceSingleDPP, 402 unsigned int NumberOfActiveSurfaces, 403 unsigned int nomDETInKByte, 404 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 406 unsigned int PixelChunkSizeKBytes, 407 unsigned int ROBSizeKBytes, 408 unsigned int CompressedBufferSegmentSizeInkByteFinal, 409 enum output_encoder_class Output[], 410 double ReadBandwidthLuma[], 411 double ReadBandwidthChroma[], 412 double MaximumSwathWidthLuma[], 413 double MaximumSwathWidthChroma[], 414 enum dm_rotation_angle SourceRotation[], 415 bool ViewportStationary[], 416 enum source_format_class SourcePixelFormat[], 417 enum dm_swizzle_mode SurfaceTiling[], 418 unsigned int ViewportWidth[], 419 unsigned int ViewportHeight[], 420 unsigned int ViewportXStart[], 421 unsigned int ViewportYStart[], 422 unsigned int ViewportXStartC[], 423 unsigned int ViewportYStartC[], 424 unsigned int SurfaceWidthY[], 425 unsigned int SurfaceWidthC[], 426 unsigned int SurfaceHeightY[], 427 unsigned int SurfaceHeightC[], 428 unsigned int Read256BytesBlockHeightY[], 429 unsigned int Read256BytesBlockHeightC[], 430 unsigned int Read256BytesBlockWidthY[], 431 unsigned int Read256BytesBlockWidthC[], 432 enum odm_combine_mode ODMMode[], 433 unsigned int BlendingAndTiming[], 434 unsigned int BytePerPixY[], 435 unsigned int BytePerPixC[], 436 double BytePerPixDETY[], 437 double BytePerPixDETC[], 438 unsigned int HActive[], 439 double HRatio[], 440 double HRatioChroma[], 441 unsigned int DPPPerSurface[], 442 443 /* Output */ 444 unsigned int swath_width_luma_ub[], 445 unsigned int swath_width_chroma_ub[], 446 double SwathWidth[], 447 double SwathWidthChroma[], 448 unsigned int SwathHeightY[], 449 unsigned int SwathHeightC[], 450 unsigned int DETBufferSizeInKByte[], 451 unsigned int DETBufferSizeY[], 452 unsigned int DETBufferSizeC[], 453 bool *UnboundedRequestEnabled, 454 unsigned int *CompressedBufferSizeInkByte, 455 unsigned int *CompBufReservedSpaceKBytes, 456 bool *CompBufReservedSpaceNeedAdjustment, 457 bool ViewportSizeSupportPerSurface[], 458 bool *ViewportSizeSupport) 459 { 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 464 unsigned int RoundedUpSwathSizeBytesY; 465 unsigned int RoundedUpSwathSizeBytesC; 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 468 unsigned int k; 469 unsigned int TotalActiveDPP = 0; 470 bool NoChromaSurfaces = true; 471 unsigned int DETBufferSizeInKByteForSwathCalculation; 472 473 #ifdef __DML_VBA_DEBUG__ 474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 477 #endif 478 dml32_CalculateSwathWidth(ForceSingleDPP, 479 NumberOfActiveSurfaces, 480 SourcePixelFormat, 481 SourceRotation, 482 ViewportStationary, 483 ViewportWidth, 484 ViewportHeight, 485 ViewportXStart, 486 ViewportYStart, 487 ViewportXStartC, 488 ViewportYStartC, 489 SurfaceWidthY, 490 SurfaceWidthC, 491 SurfaceHeightY, 492 SurfaceHeightC, 493 ODMMode, 494 BytePerPixY, 495 BytePerPixC, 496 Read256BytesBlockHeightY, 497 Read256BytesBlockHeightC, 498 Read256BytesBlockWidthY, 499 Read256BytesBlockWidthC, 500 BlendingAndTiming, 501 HActive, 502 HRatio, 503 DPPPerSurface, 504 505 /* Output */ 506 SwathWidthdoubleDPP, 507 SwathWidthdoubleDPPChroma, 508 SwathWidth, 509 SwathWidthChroma, 510 MaximumSwathHeightY, 511 MaximumSwathHeightC, 512 swath_width_luma_ub, 513 swath_width_chroma_ub); 514 515 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 518 #ifdef __DML_VBA_DEBUG__ 519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 524 RoundedUpMaxSwathSizeBytesY[k]); 525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 529 RoundedUpMaxSwathSizeBytesC[k]); 530 #endif 531 532 if (SourcePixelFormat[k] == dm_420_10) { 533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 535 } 536 } 537 538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 542 NoChromaSurfaces = false; 543 } 544 } 545 546 // By default, just set the reserved space to 2 pixel chunks size 547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 548 549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 553 554 if (*CompBufReservedSpaceNeedAdjustment == 1) { 555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 556 } 557 558 #ifdef __DML_VBA_DEBUG__ 559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 561 #endif 562 563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 564 565 dml32_CalculateDETBufferSize(DETSizeOverride, 566 UseMALLForPStateChange, 567 ForceSingleDPP, 568 NumberOfActiveSurfaces, 569 *UnboundedRequestEnabled, 570 nomDETInKByte, 571 MaxTotalDETInKByte, 572 ConfigReturnBufferSizeInKByte, 573 MinCompressedBufferSizeInKByte, 574 CompressedBufferSegmentSizeInkByteFinal, 575 SourcePixelFormat, 576 ReadBandwidthLuma, 577 ReadBandwidthChroma, 578 RoundedUpMaxSwathSizeBytesY, 579 RoundedUpMaxSwathSizeBytesC, 580 DPPPerSurface, 581 582 /* Output */ 583 DETBufferSizeInKByte, // per hubp pipe 584 CompressedBufferSizeInkByte); 585 586 #ifdef __DML_VBA_DEBUG__ 587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 593 #endif 594 595 *ViewportSizeSupport = true; 596 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 597 598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 600 #ifdef __DML_VBA_DEBUG__ 601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 602 DETBufferSizeInKByteForSwathCalculation); 603 #endif 604 605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 607 SwathHeightY[k] = MaximumSwathHeightY[k]; 608 SwathHeightC[k] = MaximumSwathHeightC[k]; 609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 615 SwathHeightC[k] = MaximumSwathHeightC[k]; 616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 621 SwathHeightY[k] = MaximumSwathHeightY[k]; 622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 625 } else { 626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 630 } 631 632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 636 *ViewportSizeSupport = false; 637 ViewportSizeSupportPerSurface[k] = false; 638 } else { 639 ViewportSizeSupportPerSurface[k] = true; 640 } 641 642 if (SwathHeightC[k] == 0) { 643 #ifdef __DML_VBA_DEBUG__ 644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 645 #endif 646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 647 DETBufferSizeC[k] = 0; 648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 649 #ifdef __DML_VBA_DEBUG__ 650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 651 #endif 652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 654 } else { 655 #ifdef __DML_VBA_DEBUG__ 656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 657 #endif 658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 660 } 661 662 #ifdef __DML_VBA_DEBUG__ 663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesY[k]); 667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 668 k, RoundedUpMaxSwathSizeBytesC[k]); 669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 675 ViewportSizeSupportPerSurface[k]); 676 #endif 677 678 } 679 } // CalculateSwathAndDETConfiguration 680 681 void dml32_CalculateSwathWidth( 682 bool ForceSingleDPP, 683 unsigned int NumberOfActiveSurfaces, 684 enum source_format_class SourcePixelFormat[], 685 enum dm_rotation_angle SourceRotation[], 686 bool ViewportStationary[], 687 unsigned int ViewportWidth[], 688 unsigned int ViewportHeight[], 689 unsigned int ViewportXStart[], 690 unsigned int ViewportYStart[], 691 unsigned int ViewportXStartC[], 692 unsigned int ViewportYStartC[], 693 unsigned int SurfaceWidthY[], 694 unsigned int SurfaceWidthC[], 695 unsigned int SurfaceHeightY[], 696 unsigned int SurfaceHeightC[], 697 enum odm_combine_mode ODMMode[], 698 unsigned int BytePerPixY[], 699 unsigned int BytePerPixC[], 700 unsigned int Read256BytesBlockHeightY[], 701 unsigned int Read256BytesBlockHeightC[], 702 unsigned int Read256BytesBlockWidthY[], 703 unsigned int Read256BytesBlockWidthC[], 704 unsigned int BlendingAndTiming[], 705 unsigned int HActive[], 706 double HRatio[], 707 unsigned int DPPPerSurface[], 708 709 /* Output */ 710 double SwathWidthdoubleDPPY[], 711 double SwathWidthdoubleDPPC[], 712 double SwathWidthY[], // per-pipe 713 double SwathWidthC[], // per-pipe 714 unsigned int MaximumSwathHeightY[], 715 unsigned int MaximumSwathHeightC[], 716 unsigned int swath_width_luma_ub[], // per-pipe 717 unsigned int swath_width_chroma_ub[]) // per-pipe 718 { 719 unsigned int k, j; 720 enum odm_combine_mode MainSurfaceODMMode; 721 722 unsigned int surface_width_ub_l; 723 unsigned int surface_height_ub_l; 724 unsigned int surface_width_ub_c = 0; 725 unsigned int surface_height_ub_c = 0; 726 727 #ifdef __DML_VBA_DEBUG__ 728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 730 #endif 731 732 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 733 if (!IsVertical(SourceRotation[k])) 734 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 735 else 736 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 737 738 #ifdef __DML_VBA_DEBUG__ 739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 741 #endif 742 743 MainSurfaceODMMode = ODMMode[k]; 744 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 745 if (BlendingAndTiming[k] == j) 746 MainSurfaceODMMode = ODMMode[j]; 747 } 748 749 if (ForceSingleDPP) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 751 } else { 752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 754 dml_round(HActive[k] / 4.0 * HRatio[k])); 755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 757 dml_round(HActive[k] / 2.0 * HRatio[k])); 758 } else if (DPPPerSurface[k] == 2) { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 760 } else { 761 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 762 } 763 } 764 765 #ifdef __DML_VBA_DEBUG__ 766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 771 #endif 772 773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 774 SourcePixelFormat[k] == dm_420_12) { 775 SwathWidthC[k] = SwathWidthY[k] / 2; 776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 777 } else { 778 SwathWidthC[k] = SwathWidthY[k]; 779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 780 } 781 782 if (ForceSingleDPP == true) { 783 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 784 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 785 } 786 787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 789 790 if (!IsVertical(SourceRotation[k])) { 791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 795 dml_floor(ViewportXStart[k] + 796 SwathWidthY[k] + 797 Read256BytesBlockWidthY[k] - 1, 798 Read256BytesBlockWidthY[k]) - 799 dml_floor(ViewportXStart[k], 800 Read256BytesBlockWidthY[k])); 801 } else { 802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 803 dml_ceil(SwathWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) + 805 Read256BytesBlockWidthY[k]); 806 } 807 if (BytePerPixC[k] > 0) { 808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 812 Read256BytesBlockWidthC[k] - 1, 813 Read256BytesBlockWidthC[k]) - 814 dml_floor(ViewportXStartC[k], 815 Read256BytesBlockWidthC[k])); 816 } else { 817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 818 dml_ceil(SwathWidthC[k] - 1, 819 Read256BytesBlockWidthC[k]) + 820 Read256BytesBlockWidthC[k]); 821 } 822 } else { 823 swath_width_chroma_ub[k] = 0; 824 } 825 } else { 826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 828 829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 832 Read256BytesBlockHeightY[k]) - 833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 834 } else { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 837 } 838 if (BytePerPixC[k] > 0) { 839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 843 Read256BytesBlockHeightC[k] - 1, 844 Read256BytesBlockHeightC[k]) - 845 dml_floor(ViewportYStartC[k], 846 Read256BytesBlockHeightC[k])); 847 } else { 848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 850 Read256BytesBlockHeightC[k]); 851 } 852 } else { 853 swath_width_chroma_ub[k] = 0; 854 } 855 } 856 857 #ifdef __DML_VBA_DEBUG__ 858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 872 #endif 873 874 } 875 } // CalculateSwathWidth 876 877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 878 unsigned int TotalNumberOfActiveDPP, 879 bool NoChroma, 880 enum output_encoder_class Output, 881 enum dm_swizzle_mode SurfaceTiling, 882 bool CompBufReservedSpaceNeedAdjustment, 883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 884 { 885 bool ret_val = false; 886 887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 888 TotalNumberOfActiveDPP == 1 && NoChroma); 889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 890 ret_val = false; 891 892 if (SurfaceTiling == dm_sw_linear) 893 ret_val = false; 894 895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 896 ret_val = false; 897 898 #ifdef __DML_VBA_DEBUG__ 899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 902 #endif 903 904 return (ret_val); 905 } 906 907 void dml32_CalculateDETBufferSize( 908 unsigned int DETSizeOverride[], 909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 910 bool ForceSingleDPP, 911 unsigned int NumberOfActiveSurfaces, 912 bool UnboundedRequestEnabled, 913 unsigned int nomDETInKByte, 914 unsigned int MaxTotalDETInKByte, 915 unsigned int ConfigReturnBufferSizeInKByte, 916 unsigned int MinCompressedBufferSizeInKByte, 917 unsigned int CompressedBufferSegmentSizeInkByteFinal, 918 enum source_format_class SourcePixelFormat[], 919 double ReadBandwidthLuma[], 920 double ReadBandwidthChroma[], 921 unsigned int RoundedUpMaxSwathSizeBytesY[], 922 unsigned int RoundedUpMaxSwathSizeBytesC[], 923 unsigned int DPPPerSurface[], 924 /* Output */ 925 unsigned int DETBufferSizeInKByte[], 926 unsigned int *CompressedBufferSizeInkByte) 927 { 928 unsigned int DETBufferSizePoolInKByte; 929 unsigned int NextDETBufferPieceInKByte; 930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 931 bool NextPotentialSurfaceToAssignDETPieceFound; 932 unsigned int NextSurfaceToAssignDETPiece; 933 double TotalBandwidth; 934 double BandwidthOfSurfacesNotAssignedDETPiece; 935 unsigned int max_minDET; 936 unsigned int minDET; 937 unsigned int minDET_pipe; 938 unsigned int j, k; 939 940 #ifdef __DML_VBA_DEBUG__ 941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 949 CompressedBufferSegmentSizeInkByteFinal); 950 #endif 951 952 // Note: Will use default det size if that fits 2 swaths 953 if (UnboundedRequestEnabled) { 954 if (DETSizeOverride[0] > 0) { 955 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 956 } else { 957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 958 ((double) RoundedUpMaxSwathSizeBytesY[0] + 959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 960 } 961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 962 } else { 963 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 964 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 965 DETBufferSizeInKByte[k] = nomDETInKByte; 966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 967 SourcePixelFormat[k] == dm_420_12) { 968 max_minDET = nomDETInKByte - 64; 969 } else { 970 max_minDET = nomDETInKByte; 971 } 972 minDET = 128; 973 minDET_pipe = 0; 974 975 // add DET resource until can hold 2 full swaths 976 while (minDET <= max_minDET && minDET_pipe == 0) { 977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 979 minDET_pipe = minDET; 980 minDET = minDET + 64; 981 } 982 983 #ifdef __DML_VBA_DEBUG__ 984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 988 RoundedUpMaxSwathSizeBytesY[k]); 989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 990 RoundedUpMaxSwathSizeBytesC[k]); 991 #endif 992 993 if (minDET_pipe == 0) { 994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 998 __func__, k, minDET_pipe); 999 #endif 1000 } 1001 1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1003 DETBufferSizeInKByte[k] = 0; 1004 } else if (DETSizeOverride[k] > 0) { 1005 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1009 DETBufferSizeInKByte[k] = minDET_pipe; 1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1012 } 1013 1014 #ifdef __DML_VBA_DEBUG__ 1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1019 #endif 1020 } 1021 1022 TotalBandwidth = 0; 1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1026 } 1027 #ifdef __DML_VBA_DEBUG__ 1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1033 #endif 1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1036 1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1038 DETPieceAssignedToThisSurfaceAlready[k] = true; 1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1042 DETPieceAssignedToThisSurfaceAlready[k] = true; 1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1045 } else { 1046 DETPieceAssignedToThisSurfaceAlready[k] = false; 1047 } 1048 #ifdef __DML_VBA_DEBUG__ 1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1050 DETPieceAssignedToThisSurfaceAlready[k]); 1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1052 BandwidthOfSurfacesNotAssignedDETPiece); 1053 #endif 1054 } 1055 1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1057 NextPotentialSurfaceToAssignDETPieceFound = false; 1058 NextSurfaceToAssignDETPiece = 0; 1059 1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1061 #ifdef __DML_VBA_DEBUG__ 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1063 ReadBandwidthLuma[k]); 1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1065 ReadBandwidthChroma[k]); 1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1071 NextSurfaceToAssignDETPiece); 1072 #endif 1073 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1074 (!NextPotentialSurfaceToAssignDETPieceFound || 1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1078 NextSurfaceToAssignDETPiece = k; 1079 NextPotentialSurfaceToAssignDETPieceFound = true; 1080 } 1081 #ifdef __DML_VBA_DEBUG__ 1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1086 #endif 1087 } 1088 1089 if (NextPotentialSurfaceToAssignDETPieceFound) { 1090 // Note: To show the banker's rounding behavior in VBA and also the fact 1091 // that the DET buffer size varies due to precision issue 1092 // 1093 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1096 // BandwidthOfSurfacesNotAssignedDETPiece / 1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1101 //BandwidthOfSurfacesNotAssignedDETPiece / 1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1103 // 1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1106 1107 NextDETBufferPieceInKByte = dml_min( 1108 dml_round((double) DETBufferSizePoolInKByte * 1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1111 BandwidthOfSurfacesNotAssignedDETPiece / 1112 ((ForceSingleDPP ? 1 : 1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1114 (ForceSingleDPP ? 1 : 1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1116 dml_floor((double) DETBufferSizePoolInKByte, 1117 (ForceSingleDPP ? 1 : 1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1119 1120 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1121 // We should limit the per-pipe DET size to the nominal / max per pipe. 1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1127 } else { 1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1129 // already has the max per-pipe value 1130 NextDETBufferPieceInKByte = 0; 1131 } 1132 } 1133 1134 #ifdef __DML_VBA_DEBUG__ 1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1136 DETBufferSizePoolInKByte); 1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1138 NextSurfaceToAssignDETPiece); 1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1146 NextDETBufferPieceInKByte); 1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1148 __func__, j, NextSurfaceToAssignDETPiece, 1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1150 #endif 1151 1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1154 + NextDETBufferPieceInKByte 1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1156 #ifdef __DML_VBA_DEBUG__ 1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1158 #endif 1159 1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1165 } 1166 } 1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1168 } 1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1170 1171 #ifdef __DML_VBA_DEBUG__ 1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1177 } 1178 #endif 1179 } // CalculateDETBufferSize 1180 1181 void dml32_CalculateODMMode( 1182 unsigned int MaximumPixelsPerLinePerDSCUnit, 1183 unsigned int HActive, 1184 enum output_format_class OutFormat, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 unsigned int NumberOfDSCSlices, 1197 1198 /* Output */ 1199 bool *TotalAvailablePipesSupport, 1200 unsigned int *NumberOfDPP, 1201 enum odm_combine_mode *ODMMode, 1202 double *RequiredDISPCLKPerSurface) 1203 { 1204 1205 double SurfaceRequiredDISPCLKWithoutODMCombine; 1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1208 1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1211 MaxDispclk); 1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1214 MaxDispclk); 1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1217 MaxDispclk); 1218 *TotalAvailablePipesSupport = true; 1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1220 1221 if (ODMUse == dm_odm_combine_policy_none) 1222 *ODMMode = dm_odm_combine_mode_disabled; 1223 1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1225 *NumberOfDPP = 0; 1226 1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1229 1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) 1233 || NumberOfDSCSlices > 8)))) { 1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1235 *ODMMode = dm_odm_combine_mode_4to1; 1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1237 *NumberOfDPP = 4; 1238 } else { 1239 *TotalAvailablePipesSupport = false; 1240 } 1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) 1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { 1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1247 *ODMMode = dm_odm_combine_mode_2to1; 1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1249 *NumberOfDPP = 2; 1250 } else { 1251 *TotalAvailablePipesSupport = false; 1252 } 1253 } else { 1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1255 *NumberOfDPP = 1; 1256 else 1257 *TotalAvailablePipesSupport = false; 1258 } 1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && 1260 ODMUse != dm_odm_combine_policy_4to1) { 1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { 1262 *ODMMode = dm_odm_combine_mode_disabled; 1263 *NumberOfDPP = 0; 1264 *TotalAvailablePipesSupport = false; 1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || 1266 *ODMMode == dm_odm_combine_mode_4to1) { 1267 *ODMMode = dm_odm_combine_mode_4to1; 1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1269 *NumberOfDPP = 4; 1270 } else { 1271 *ODMMode = dm_odm_combine_mode_2to1; 1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1273 *NumberOfDPP = 2; 1274 } 1275 } 1276 if (Output == dm_hdmi && OutFormat == dm_420 && 1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { 1278 *ODMMode = dm_odm_combine_mode_disabled; 1279 *NumberOfDPP = 0; 1280 *TotalAvailablePipesSupport = false; 1281 } 1282 } 1283 1284 double dml32_CalculateRequiredDispclk( 1285 enum odm_combine_mode ODMMode, 1286 double PixelClock, 1287 double DISPCLKDPPCLKDSCCLKDownSpreading, 1288 double DISPCLKRampingMargin, 1289 double DISPCLKDPPCLKVCOSpeed, 1290 double MaxDispclk) 1291 { 1292 double RequiredDispclk = 0.; 1293 double PixelClockAfterODM; 1294 double DISPCLKWithRampingRoundedToDFSGranularity; 1295 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1296 double MaxDispclkRoundedDownToDFSGranularity; 1297 1298 if (ODMMode == dm_odm_combine_mode_4to1) 1299 PixelClockAfterODM = PixelClock / 4; 1300 else if (ODMMode == dm_odm_combine_mode_2to1) 1301 PixelClockAfterODM = PixelClock / 2; 1302 else 1303 PixelClockAfterODM = PixelClock; 1304 1305 1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1309 1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1312 1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1314 1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1319 else 1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1321 1322 return RequiredDispclk; 1323 } 1324 1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1326 { 1327 if (Clock <= 0.0) 1328 return 0.0; 1329 1330 if (round_up) 1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1332 else 1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1334 } 1335 1336 void dml32_CalculateOutputLink( 1337 double PHYCLKPerState, 1338 double PHYCLKD18PerState, 1339 double PHYCLKD32PerState, 1340 double Downspreading, 1341 bool IsMainSurfaceUsingTheIndicatedTiming, 1342 enum output_encoder_class Output, 1343 enum output_format_class OutputFormat, 1344 unsigned int HTotal, 1345 unsigned int HActive, 1346 double PixelClockBackEnd, 1347 double ForcedOutputLinkBPP, 1348 unsigned int DSCInputBitPerComponent, 1349 unsigned int NumberOfDSCSlices, 1350 double AudioSampleRate, 1351 unsigned int AudioSampleLayout, 1352 enum odm_combine_mode ODMModeNoDSC, 1353 enum odm_combine_mode ODMModeDSC, 1354 bool DSCEnable, 1355 unsigned int OutputLinkDPLanes, 1356 enum dm_output_link_dp_rate OutputLinkDPRate, 1357 1358 /* Output */ 1359 bool *RequiresDSC, 1360 double *RequiresFEC, 1361 double *OutBpp, 1362 enum dm_output_type *OutputType, 1363 enum dm_output_rate *OutputRate, 1364 unsigned int *RequiredSlots) 1365 { 1366 bool LinkDSCEnable; 1367 unsigned int dummy; 1368 *RequiresDSC = false; 1369 *RequiresFEC = false; 1370 *OutBpp = 0; 1371 *OutputType = dm_output_type_unknown; 1372 *OutputRate = dm_output_rate_unknown; 1373 1374 if (IsMainSurfaceUsingTheIndicatedTiming) { 1375 if (Output == dm_hdmi) { 1376 *RequiresDSC = false; 1377 *RequiresFEC = false; 1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1381 ODMModeNoDSC, ODMModeDSC, &dummy); 1382 //OutputTypeAndRate = "HDMI"; 1383 *OutputType = dm_output_type_hdmi; 1384 1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1386 if (DSCEnable == true) { 1387 *RequiresDSC = true; 1388 LinkDSCEnable = true; 1389 if (Output == dm_dp || Output == dm_dp2p0) 1390 *RequiresFEC = true; 1391 else 1392 *RequiresFEC = false; 1393 } else { 1394 *RequiresDSC = false; 1395 LinkDSCEnable = false; 1396 if (Output == dm_dp2p0) 1397 *RequiresFEC = true; 1398 else 1399 *RequiresFEC = false; 1400 } 1401 if (Output == dm_dp2p0) { 1402 *OutBpp = 0; 1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1404 PHYCLKD32PerState >= 10000 / 32) { 1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1411 ForcedOutputLinkBPP == 0) { 1412 *RequiresDSC = true; 1413 LinkDSCEnable = true; 1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1416 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1417 OutputFormat, DSCInputBitPerComponent, 1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1420 } 1421 //OutputTypeAndRate = Output & " UHBR10"; 1422 *OutputType = dm_output_type_dp2p0; 1423 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1424 } 1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1432 1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1434 ForcedOutputLinkBPP == 0) { 1435 *RequiresDSC = true; 1436 LinkDSCEnable = true; 1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1439 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1440 OutputFormat, DSCInputBitPerComponent, 1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1443 } 1444 //OutputTypeAndRate = Output & " UHBR13p5"; 1445 *OutputType = dm_output_type_dp2p0; 1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1447 } 1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1456 *RequiresDSC = true; 1457 LinkDSCEnable = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " UHBR20"; 1466 *OutputType = dm_output_type_dp2p0; 1467 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1468 } 1469 } else { 1470 *OutBpp = 0; 1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1472 PHYCLKPerState >= 270) { 1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1479 ForcedOutputLinkBPP == 0) { 1480 *RequiresDSC = true; 1481 LinkDSCEnable = true; 1482 if (Output == dm_dp) 1483 *RequiresFEC = true; 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1496 *OutBpp == 0 && PHYCLKPerState >= 540) { 1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1502 1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1504 ForcedOutputLinkBPP == 0) { 1505 *RequiresDSC = true; 1506 LinkDSCEnable = true; 1507 if (Output == dm_dp) 1508 *RequiresFEC = true; 1509 1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1512 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1513 OutputFormat, DSCInputBitPerComponent, 1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1516 } 1517 //OutputTypeAndRate = Output & " HBR2"; 1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1519 *OutputRate = dm_output_rate_dp_rate_hbr2; 1520 } 1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1524 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1527 RequiredSlots); 1528 1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1530 *RequiresDSC = true; 1531 LinkDSCEnable = true; 1532 if (Output == dm_dp) 1533 *RequiresFEC = true; 1534 1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1537 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1538 OutputFormat, DSCInputBitPerComponent, 1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1541 } 1542 //OutputTypeAndRate = Output & " HBR3"; 1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1544 *OutputRate = dm_output_rate_dp_rate_hbr3; 1545 } 1546 } 1547 } 1548 } 1549 } 1550 1551 void dml32_CalculateDPPCLK( 1552 unsigned int NumberOfActiveSurfaces, 1553 double DISPCLKDPPCLKDSCCLKDownSpreading, 1554 double DISPCLKDPPCLKVCOSpeed, 1555 double DPPCLKUsingSingleDPP[], 1556 unsigned int DPPPerSurface[], 1557 1558 /* output */ 1559 double *GlobalDPPCLK, 1560 double Dppclk[]) 1561 { 1562 unsigned int k; 1563 *GlobalDPPCLK = 0; 1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1567 } 1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1569 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1571 } 1572 1573 double dml32_TruncToValidBPP( 1574 double LinkBitRate, 1575 unsigned int Lanes, 1576 unsigned int HTotal, 1577 unsigned int HActive, 1578 double PixelClock, 1579 double DesiredBPP, 1580 bool DSCEnable, 1581 enum output_encoder_class Output, 1582 enum output_format_class Format, 1583 unsigned int DSCInputBitPerComponent, 1584 unsigned int DSCSlices, 1585 unsigned int AudioRate, 1586 unsigned int AudioLayout, 1587 enum odm_combine_mode ODMModeNoDSC, 1588 enum odm_combine_mode ODMModeDSC, 1589 /* Output */ 1590 unsigned int *RequiredSlots) 1591 { 1592 double MaxLinkBPP; 1593 unsigned int MinDSCBPP; 1594 double MaxDSCBPP; 1595 unsigned int NonDSCBPP0; 1596 unsigned int NonDSCBPP1; 1597 unsigned int NonDSCBPP2; 1598 1599 if (Format == dm_420) { 1600 NonDSCBPP0 = 12; 1601 NonDSCBPP1 = 15; 1602 NonDSCBPP2 = 18; 1603 MinDSCBPP = 6; 1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1605 } else if (Format == dm_444) { 1606 NonDSCBPP0 = 24; 1607 NonDSCBPP1 = 30; 1608 NonDSCBPP2 = 36; 1609 MinDSCBPP = 8; 1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1611 } else { 1612 if (Output == dm_hdmi) { 1613 NonDSCBPP0 = 24; 1614 NonDSCBPP1 = 24; 1615 NonDSCBPP2 = 24; 1616 } else { 1617 NonDSCBPP0 = 16; 1618 NonDSCBPP1 = 20; 1619 NonDSCBPP2 = 24; 1620 } 1621 if (Format == dm_n422) { 1622 MinDSCBPP = 7; 1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1624 } else { 1625 MinDSCBPP = 8; 1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1627 } 1628 } 1629 if (Output == dm_dp2p0) { 1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1631 } else if (DSCEnable && Output == dm_dp) { 1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1633 } else { 1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1635 } 1636 1637 if (DSCEnable) { 1638 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1639 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1641 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1642 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1643 MaxLinkBPP = 2 * MaxLinkBPP; 1644 } else { 1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1646 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1648 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1650 MaxLinkBPP = 2 * MaxLinkBPP; 1651 } 1652 1653 if (DesiredBPP == 0) { 1654 if (DSCEnable) { 1655 if (MaxLinkBPP < MinDSCBPP) 1656 return BPP_INVALID; 1657 else if (MaxLinkBPP >= MaxDSCBPP) 1658 return MaxDSCBPP; 1659 else 1660 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1661 } else { 1662 if (MaxLinkBPP >= NonDSCBPP2) 1663 return NonDSCBPP2; 1664 else if (MaxLinkBPP >= NonDSCBPP1) 1665 return NonDSCBPP1; 1666 else if (MaxLinkBPP >= NonDSCBPP0) 1667 return 16.0; 1668 else 1669 return BPP_INVALID; 1670 } 1671 } else { 1672 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1673 DesiredBPP <= NonDSCBPP0)) || 1674 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1675 return BPP_INVALID; 1676 else 1677 return DesiredBPP; 1678 } 1679 1680 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1681 1682 return BPP_INVALID; 1683 } // TruncToValidBPP 1684 1685 double dml32_RequiredDTBCLK( 1686 bool DSCEnable, 1687 double PixelClock, 1688 enum output_format_class OutputFormat, 1689 double OutputBpp, 1690 unsigned int DSCSlices, 1691 unsigned int HTotal, 1692 unsigned int HActive, 1693 unsigned int AudioRate, 1694 unsigned int AudioLayout) 1695 { 1696 double PixelWordRate; 1697 double HCActive; 1698 double HCBlank; 1699 double AverageTribyteRate; 1700 double HActiveTribyteRate; 1701 1702 if (DSCEnable != true) 1703 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1704 1705 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1706 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1707 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1708 HCBlank = 64 + 32 * 1709 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1710 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1711 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1712 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1713 } 1714 1715 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1716 enum odm_combine_mode ODMMode, 1717 unsigned int DSCInputBitPerComponent, 1718 double OutputBpp, 1719 unsigned int HActive, 1720 unsigned int HTotal, 1721 unsigned int NumberOfDSCSlices, 1722 enum output_format_class OutputFormat, 1723 enum output_encoder_class Output, 1724 double PixelClock, 1725 double PixelClockBackEnd, 1726 double dsc_delay_factor_wa) 1727 { 1728 unsigned int DSCDelayRequirement_val; 1729 1730 if (DSCEnabled == true && OutputBpp != 0) { 1731 if (ODMMode == dm_odm_combine_mode_4to1) { 1732 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1733 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1734 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1735 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1736 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1739 } else { 1740 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1743 } 1744 1745 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1746 dml_ceil((double)DSCDelayRequirement_val / HActive, 1); 1747 1748 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1749 1750 } else { 1751 DSCDelayRequirement_val = 0; 1752 } 1753 1754 #ifdef __DML_VBA_DEBUG__ 1755 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1756 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1757 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1758 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1759 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1760 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1761 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1762 #endif 1763 1764 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); 1765 } 1766 1767 void dml32_CalculateSurfaceSizeInMall( 1768 unsigned int NumberOfActiveSurfaces, 1769 unsigned int MALLAllocatedForDCN, 1770 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1771 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], 1772 bool DCCEnable[], 1773 bool ViewportStationary[], 1774 unsigned int ViewportXStartY[], 1775 unsigned int ViewportYStartY[], 1776 unsigned int ViewportXStartC[], 1777 unsigned int ViewportYStartC[], 1778 unsigned int ViewportWidthY[], 1779 unsigned int ViewportHeightY[], 1780 unsigned int BytesPerPixelY[], 1781 unsigned int ViewportWidthC[], 1782 unsigned int ViewportHeightC[], 1783 unsigned int BytesPerPixelC[], 1784 unsigned int SurfaceWidthY[], 1785 unsigned int SurfaceWidthC[], 1786 unsigned int SurfaceHeightY[], 1787 unsigned int SurfaceHeightC[], 1788 unsigned int Read256BytesBlockWidthY[], 1789 unsigned int Read256BytesBlockWidthC[], 1790 unsigned int Read256BytesBlockHeightY[], 1791 unsigned int Read256BytesBlockHeightC[], 1792 unsigned int ReadBlockWidthY[], 1793 unsigned int ReadBlockWidthC[], 1794 unsigned int ReadBlockHeightY[], 1795 unsigned int ReadBlockHeightC[], 1796 unsigned int DCCMetaPitchY[], 1797 unsigned int DCCMetaPitchC[], 1798 1799 /* Output */ 1800 unsigned int SurfaceSizeInMALL[], 1801 bool *ExceededMALLSize) 1802 { 1803 unsigned int k; 1804 unsigned int TotalSurfaceSizeInMALLForSS = 0; 1805 unsigned int TotalSurfaceSizeInMALLForSubVP = 0; 1806 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; 1807 1808 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1809 if (ViewportStationary[k]) { 1810 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1811 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1812 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1813 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1814 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1815 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1816 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1817 1818 if (ReadBlockWidthC[k] > 0) { 1819 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1820 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1821 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1822 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1823 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1824 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1825 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1826 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1827 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1828 BytesPerPixelC[k]; 1829 } 1830 if (DCCEnable[k] == true) { 1831 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1832 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]), 1833 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1834 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1835 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1836 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1837 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1838 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1839 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * 1840 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024); 1841 if (Read256BytesBlockWidthC[k] > 0) { 1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1843 dml_min(dml_ceil(DCCMetaPitchC[k], 8 * 1844 Read256BytesBlockWidthC[k]), 1845 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1846 * Read256BytesBlockWidthC[k] - 1, 8 * 1847 Read256BytesBlockWidthC[k]) - 1848 dml_floor(ViewportXStartC[k], 8 * 1849 Read256BytesBlockWidthC[k])) * 1850 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1851 Read256BytesBlockHeightC[k]), 1852 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1853 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1854 Read256BytesBlockHeightC[k]) - 1855 dml_floor(ViewportYStartC[k], 8 * 1856 Read256BytesBlockHeightC[k])) * 1857 BytesPerPixelC[k] / 256; 1858 } 1859 } 1860 } else { 1861 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1862 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1863 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1864 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1865 BytesPerPixelY[k]; 1866 if (ReadBlockWidthC[k] > 0) { 1867 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1868 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1869 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1870 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1871 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1872 BytesPerPixelC[k]; 1873 } 1874 if (DCCEnable[k] == true) { 1875 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1876 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 * 1877 Read256BytesBlockWidthY[k] - 1), 8 * 1878 Read256BytesBlockWidthY[k]) * 1879 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1880 Read256BytesBlockHeightY[k] - 1), 8 * 1881 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024); 1882 1883 if (Read256BytesBlockWidthC[k] > 0) { 1884 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1885 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 * 1886 Read256BytesBlockWidthC[k] - 1), 8 * 1887 Read256BytesBlockWidthC[k]) * 1888 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1889 Read256BytesBlockHeightC[k] - 1), 8 * 1890 Read256BytesBlockHeightC[k]) * 1891 BytesPerPixelC[k] / 256; 1892 } 1893 } 1894 } 1895 } 1896 1897 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1898 /* SS and Subvp counted separate as they are never used at the same time */ 1899 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) 1900 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k]; 1901 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1902 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k]; 1903 } 1904 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || 1905 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); 1906 } // CalculateSurfaceSizeInMall 1907 1908 void dml32_CalculateVMRowAndSwath( 1909 unsigned int NumberOfActiveSurfaces, 1910 DmlPipe myPipe[], 1911 unsigned int SurfaceSizeInMALL[], 1912 unsigned int PTEBufferSizeInRequestsLuma, 1913 unsigned int PTEBufferSizeInRequestsChroma, 1914 unsigned int DCCMetaBufferSizeBytes, 1915 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1917 unsigned int MALLAllocatedForDCN, 1918 double SwathWidthY[], 1919 double SwathWidthC[], 1920 bool GPUVMEnable, 1921 bool HostVMEnable, 1922 unsigned int HostVMMaxNonCachedPageTableLevels, 1923 unsigned int GPUVMMaxPageTableLevels, 1924 unsigned int GPUVMMinPageSizeKBytes[], 1925 unsigned int HostVMMinPageSize, 1926 1927 /* Output */ 1928 bool PTEBufferSizeNotExceeded[], 1929 bool DCCMetaBufferSizeNotExceeded[], 1930 unsigned int dpte_row_width_luma_ub[], 1931 unsigned int dpte_row_width_chroma_ub[], 1932 unsigned int dpte_row_height_luma[], 1933 unsigned int dpte_row_height_chroma[], 1934 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1935 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1936 unsigned int meta_req_width[], 1937 unsigned int meta_req_width_chroma[], 1938 unsigned int meta_req_height[], 1939 unsigned int meta_req_height_chroma[], 1940 unsigned int meta_row_width[], 1941 unsigned int meta_row_width_chroma[], 1942 unsigned int meta_row_height[], 1943 unsigned int meta_row_height_chroma[], 1944 unsigned int vm_group_bytes[], 1945 unsigned int dpte_group_bytes[], 1946 unsigned int PixelPTEReqWidthY[], 1947 unsigned int PixelPTEReqHeightY[], 1948 unsigned int PTERequestSizeY[], 1949 unsigned int PixelPTEReqWidthC[], 1950 unsigned int PixelPTEReqHeightC[], 1951 unsigned int PTERequestSizeC[], 1952 unsigned int dpde0_bytes_per_frame_ub_l[], 1953 unsigned int meta_pte_bytes_per_frame_ub_l[], 1954 unsigned int dpde0_bytes_per_frame_ub_c[], 1955 unsigned int meta_pte_bytes_per_frame_ub_c[], 1956 double PrefetchSourceLinesY[], 1957 double PrefetchSourceLinesC[], 1958 double VInitPreFillY[], 1959 double VInitPreFillC[], 1960 unsigned int MaxNumSwathY[], 1961 unsigned int MaxNumSwathC[], 1962 double meta_row_bw[], 1963 double dpte_row_bw[], 1964 double PixelPTEBytesPerRow[], 1965 double PDEAndMetaPTEBytesFrame[], 1966 double MetaRowByte[], 1967 bool use_one_row_for_frame[], 1968 bool use_one_row_for_frame_flip[], 1969 bool UsesMALLForStaticScreen[], 1970 bool PTE_BUFFER_MODE[], 1971 unsigned int BIGK_FRAGMENT_SIZE[]) 1972 { 1973 unsigned int k; 1974 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1975 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1976 unsigned int PDEAndMetaPTEBytesFrameY; 1977 unsigned int PDEAndMetaPTEBytesFrameC; 1978 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1979 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1980 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1981 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1982 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1983 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1984 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1985 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1986 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1987 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1988 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1989 1990 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1991 if (HostVMEnable == true) { 1992 vm_group_bytes[k] = 512; 1993 dpte_group_bytes[k] = 512; 1994 } else if (GPUVMEnable == true) { 1995 vm_group_bytes[k] = 2048; 1996 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1997 dpte_group_bytes[k] = 512; 1998 else 1999 dpte_group_bytes[k] = 2048; 2000 } else { 2001 vm_group_bytes[k] = 0; 2002 dpte_group_bytes[k] = 0; 2003 } 2004 2005 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 2006 myPipe[k].SourcePixelFormat == dm_420_12 || 2007 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 2008 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 2009 !IsVertical(myPipe[k].SourceRotation)) { 2010 PTEBufferSizeInRequestsForLuma[k] = 2011 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 2013 } else { 2014 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 2015 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 2016 } 2017 2018 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 2019 myPipe[k].ViewportStationary, 2020 myPipe[k].DCCEnable, 2021 myPipe[k].DPPPerSurface, 2022 myPipe[k].BlockHeight256BytesC, 2023 myPipe[k].BlockWidth256BytesC, 2024 myPipe[k].SourcePixelFormat, 2025 myPipe[k].SurfaceTiling, 2026 myPipe[k].BytePerPixelC, 2027 myPipe[k].SourceRotation, 2028 SwathWidthC[k], 2029 myPipe[k].ViewportHeightChroma, 2030 myPipe[k].ViewportXStartC, 2031 myPipe[k].ViewportYStartC, 2032 GPUVMEnable, 2033 HostVMEnable, 2034 HostVMMaxNonCachedPageTableLevels, 2035 GPUVMMaxPageTableLevels, 2036 GPUVMMinPageSizeKBytes[k], 2037 HostVMMinPageSize, 2038 PTEBufferSizeInRequestsForChroma[k], 2039 myPipe[k].PitchC, 2040 myPipe[k].DCCMetaPitchC, 2041 myPipe[k].BlockWidthC, 2042 myPipe[k].BlockHeightC, 2043 2044 /* Output */ 2045 &MetaRowByteC[k], 2046 &PixelPTEBytesPerRowC[k], 2047 &dpte_row_width_chroma_ub[k], 2048 &dpte_row_height_chroma[k], 2049 &dpte_row_height_linear_chroma[k], 2050 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2051 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2052 &dpte_row_height_chroma_one_row_per_frame[k], 2053 &meta_req_width_chroma[k], 2054 &meta_req_height_chroma[k], 2055 &meta_row_width_chroma[k], 2056 &meta_row_height_chroma[k], 2057 &PixelPTEReqWidthC[k], 2058 &PixelPTEReqHeightC[k], 2059 &PTERequestSizeC[k], 2060 &dpde0_bytes_per_frame_ub_c[k], 2061 &meta_pte_bytes_per_frame_ub_c[k]); 2062 2063 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2064 myPipe[k].VRatioChroma, 2065 myPipe[k].VTapsChroma, 2066 myPipe[k].InterlaceEnable, 2067 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2068 myPipe[k].SwathHeightC, 2069 myPipe[k].SourceRotation, 2070 myPipe[k].ViewportStationary, 2071 SwathWidthC[k], 2072 myPipe[k].ViewportHeightChroma, 2073 myPipe[k].ViewportXStartC, 2074 myPipe[k].ViewportYStartC, 2075 2076 /* Output */ 2077 &VInitPreFillC[k], 2078 &MaxNumSwathC[k]); 2079 } else { 2080 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2081 PTEBufferSizeInRequestsForChroma[k] = 0; 2082 PixelPTEBytesPerRowC[k] = 0; 2083 PDEAndMetaPTEBytesFrameC = 0; 2084 MetaRowByteC[k] = 0; 2085 MaxNumSwathC[k] = 0; 2086 PrefetchSourceLinesC[k] = 0; 2087 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2088 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2089 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2090 } 2091 2092 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2093 myPipe[k].ViewportStationary, 2094 myPipe[k].DCCEnable, 2095 myPipe[k].DPPPerSurface, 2096 myPipe[k].BlockHeight256BytesY, 2097 myPipe[k].BlockWidth256BytesY, 2098 myPipe[k].SourcePixelFormat, 2099 myPipe[k].SurfaceTiling, 2100 myPipe[k].BytePerPixelY, 2101 myPipe[k].SourceRotation, 2102 SwathWidthY[k], 2103 myPipe[k].ViewportHeight, 2104 myPipe[k].ViewportXStart, 2105 myPipe[k].ViewportYStart, 2106 GPUVMEnable, 2107 HostVMEnable, 2108 HostVMMaxNonCachedPageTableLevels, 2109 GPUVMMaxPageTableLevels, 2110 GPUVMMinPageSizeKBytes[k], 2111 HostVMMinPageSize, 2112 PTEBufferSizeInRequestsForLuma[k], 2113 myPipe[k].PitchY, 2114 myPipe[k].DCCMetaPitchY, 2115 myPipe[k].BlockWidthY, 2116 myPipe[k].BlockHeightY, 2117 2118 /* Output */ 2119 &MetaRowByteY[k], 2120 &PixelPTEBytesPerRowY[k], 2121 &dpte_row_width_luma_ub[k], 2122 &dpte_row_height_luma[k], 2123 &dpte_row_height_linear_luma[k], 2124 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2125 &dpte_row_width_luma_ub_one_row_per_frame[k], 2126 &dpte_row_height_luma_one_row_per_frame[k], 2127 &meta_req_width[k], 2128 &meta_req_height[k], 2129 &meta_row_width[k], 2130 &meta_row_height[k], 2131 &PixelPTEReqWidthY[k], 2132 &PixelPTEReqHeightY[k], 2133 &PTERequestSizeY[k], 2134 &dpde0_bytes_per_frame_ub_l[k], 2135 &meta_pte_bytes_per_frame_ub_l[k]); 2136 2137 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2138 myPipe[k].VRatio, 2139 myPipe[k].VTaps, 2140 myPipe[k].InterlaceEnable, 2141 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2142 myPipe[k].SwathHeightY, 2143 myPipe[k].SourceRotation, 2144 myPipe[k].ViewportStationary, 2145 SwathWidthY[k], 2146 myPipe[k].ViewportHeight, 2147 myPipe[k].ViewportXStart, 2148 myPipe[k].ViewportYStart, 2149 2150 /* Output */ 2151 &VInitPreFillY[k], 2152 &MaxNumSwathY[k]); 2153 2154 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2155 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2156 2157 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2158 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2159 PTEBufferSizeNotExceeded[k] = true; 2160 } else { 2161 PTEBufferSizeNotExceeded[k] = false; 2162 } 2163 2164 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2165 PTEBufferSizeInRequestsForLuma[k] && 2166 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2167 } 2168 2169 dml32_CalculateMALLUseForStaticScreen( 2170 NumberOfActiveSurfaces, 2171 MALLAllocatedForDCN, 2172 UseMALLForStaticScreen, // mode 2173 SurfaceSizeInMALL, 2174 one_row_per_frame_fits_in_buffer, 2175 /* Output */ 2176 UsesMALLForStaticScreen); // boolen 2177 2178 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2179 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2180 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2181 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2182 (GPUVMMinPageSizeKBytes[k] > 64); 2183 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2184 } 2185 2186 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2187 #ifdef __DML_VBA_DEBUG__ 2188 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2189 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2190 #endif 2191 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2192 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2193 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2194 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2195 2196 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2197 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2198 2199 if (use_one_row_for_frame[k]) { 2200 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2201 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2202 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2203 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2204 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2205 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2206 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2207 } 2208 2209 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2210 DCCMetaBufferSizeNotExceeded[k] = true; 2211 else 2212 DCCMetaBufferSizeNotExceeded[k] = false; 2213 2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2215 if (use_one_row_for_frame[k]) 2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2217 2218 dml32_CalculateRowBandwidth( 2219 GPUVMEnable, 2220 myPipe[k].SourcePixelFormat, 2221 myPipe[k].VRatio, 2222 myPipe[k].VRatioChroma, 2223 myPipe[k].DCCEnable, 2224 myPipe[k].HTotal / myPipe[k].PixelClock, 2225 MetaRowByteY[k], MetaRowByteC[k], 2226 meta_row_height[k], 2227 meta_row_height_chroma[k], 2228 PixelPTEBytesPerRowY[k], 2229 PixelPTEBytesPerRowC[k], 2230 dpte_row_height_luma[k], 2231 dpte_row_height_chroma[k], 2232 2233 /* Output */ 2234 &meta_row_bw[k], 2235 &dpte_row_bw[k]); 2236 #ifdef __DML_VBA_DEBUG__ 2237 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2238 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2239 __func__, k, use_one_row_for_frame_flip[k]); 2240 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2241 __func__, k, UseMALLForPStateChange[k]); 2242 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2243 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2244 __func__, k, dpte_row_width_luma_ub[k]); 2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2246 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2247 __func__, k, dpte_row_height_chroma[k]); 2248 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2249 __func__, k, dpte_row_width_chroma_ub[k]); 2250 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2251 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2252 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2253 __func__, k, PTEBufferSizeNotExceeded[k]); 2254 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2255 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2256 #endif 2257 } 2258 } // CalculateVMRowAndSwath 2259 2260 unsigned int dml32_CalculateVMAndRowBytes( 2261 bool ViewportStationary, 2262 bool DCCEnable, 2263 unsigned int NumberOfDPPs, 2264 unsigned int BlockHeight256Bytes, 2265 unsigned int BlockWidth256Bytes, 2266 enum source_format_class SourcePixelFormat, 2267 unsigned int SurfaceTiling, 2268 unsigned int BytePerPixel, 2269 enum dm_rotation_angle SourceRotation, 2270 double SwathWidth, 2271 unsigned int ViewportHeight, 2272 unsigned int ViewportXStart, 2273 unsigned int ViewportYStart, 2274 bool GPUVMEnable, 2275 bool HostVMEnable, 2276 unsigned int HostVMMaxNonCachedPageTableLevels, 2277 unsigned int GPUVMMaxPageTableLevels, 2278 unsigned int GPUVMMinPageSizeKBytes, 2279 unsigned int HostVMMinPageSize, 2280 unsigned int PTEBufferSizeInRequests, 2281 unsigned int Pitch, 2282 unsigned int DCCMetaPitch, 2283 unsigned int MacroTileWidth, 2284 unsigned int MacroTileHeight, 2285 2286 /* Output */ 2287 unsigned int *MetaRowByte, 2288 unsigned int *PixelPTEBytesPerRow, 2289 unsigned int *dpte_row_width_ub, 2290 unsigned int *dpte_row_height, 2291 unsigned int *dpte_row_height_linear, 2292 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2293 unsigned int *dpte_row_width_ub_one_row_per_frame, 2294 unsigned int *dpte_row_height_one_row_per_frame, 2295 unsigned int *MetaRequestWidth, 2296 unsigned int *MetaRequestHeight, 2297 unsigned int *meta_row_width, 2298 unsigned int *meta_row_height, 2299 unsigned int *PixelPTEReqWidth, 2300 unsigned int *PixelPTEReqHeight, 2301 unsigned int *PTERequestSize, 2302 unsigned int *DPDE0BytesFrame, 2303 unsigned int *MetaPTEBytesFrame) 2304 { 2305 unsigned int MPDEBytesFrame; 2306 unsigned int DCCMetaSurfaceBytes; 2307 unsigned int ExtraDPDEBytesFrame; 2308 unsigned int PDEAndMetaPTEBytesFrame; 2309 unsigned int HostVMDynamicLevels = 0; 2310 unsigned int MacroTileSizeBytes; 2311 unsigned int vp_height_meta_ub; 2312 unsigned int vp_height_dpte_ub; 2313 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2314 2315 if (GPUVMEnable == true && HostVMEnable == true) { 2316 if (HostVMMinPageSize < 2048) 2317 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2318 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2319 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2320 else 2321 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2322 } 2323 2324 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2325 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2326 if (SurfaceTiling == dm_sw_linear) { 2327 *meta_row_height = 32; 2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2329 - dml_floor(ViewportXStart, *MetaRequestWidth); 2330 } else if (!IsVertical(SourceRotation)) { 2331 *meta_row_height = *MetaRequestHeight; 2332 if (ViewportStationary && NumberOfDPPs == 1) { 2333 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2334 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2335 } else { 2336 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2337 } 2338 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2339 } else { 2340 *meta_row_height = *MetaRequestWidth; 2341 if (ViewportStationary && NumberOfDPPs == 1) { 2342 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2343 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2344 } else { 2345 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2346 } 2347 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2348 } 2349 2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2351 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2352 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2353 } else if (!IsVertical(SourceRotation)) { 2354 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2355 } else { 2356 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2357 } 2358 2359 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2360 2361 if (GPUVMEnable == true) { 2362 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2363 (8 * 4.0 * 1024), 1) + 1) * 64; 2364 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2365 } else { 2366 *MetaPTEBytesFrame = 0; 2367 MPDEBytesFrame = 0; 2368 } 2369 2370 if (DCCEnable != true) { 2371 *MetaPTEBytesFrame = 0; 2372 MPDEBytesFrame = 0; 2373 *MetaRowByte = 0; 2374 } 2375 2376 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2377 2378 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2379 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2380 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2381 MacroTileHeight - 1, MacroTileHeight) - 2382 dml_floor(ViewportYStart, MacroTileHeight); 2383 } else if (!IsVertical(SourceRotation)) { 2384 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2385 } else { 2386 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2387 } 2388 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2389 (8 * 2097152), 1) + 1); 2390 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2391 } else { 2392 *DPDE0BytesFrame = 0; 2393 ExtraDPDEBytesFrame = 0; 2394 vp_height_dpte_ub = 0; 2395 } 2396 2397 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2398 2399 #ifdef __DML_VBA_DEBUG__ 2400 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2401 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2402 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2403 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2404 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2405 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2406 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2407 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2408 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2409 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2410 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2411 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2412 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2413 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2414 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2415 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2416 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2417 #endif 2418 2419 if (HostVMEnable == true) 2420 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2421 2422 if (SurfaceTiling == dm_sw_linear) { 2423 *PixelPTEReqHeight = 1; 2424 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2425 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2426 *PTERequestSize = 64; 2427 } else if (GPUVMMinPageSizeKBytes == 4) { 2428 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2429 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2430 *PTERequestSize = 128; 2431 } else { 2432 *PixelPTEReqHeight = MacroTileHeight; 2433 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2434 *PTERequestSize = 64; 2435 } 2436 #ifdef __DML_VBA_DEBUG__ 2437 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2438 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2439 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2440 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2441 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2442 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2443 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2444 #endif 2445 2446 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2447 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2448 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2449 (double) *PixelPTEReqWidth; 2450 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2451 *PTERequestSize; 2452 2453 if (SurfaceTiling == dm_sw_linear) { 2454 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2455 *PixelPTEReqWidth / Pitch), 1)); 2456 #ifdef __DML_VBA_DEBUG__ 2457 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2458 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2459 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2460 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2461 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2462 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2463 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2464 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2465 *PixelPTEReqWidth / Pitch), 1)); 2466 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2467 #endif 2468 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2469 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2470 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2471 2472 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2473 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2474 PixelPTEReqWidth_linear / Pitch), 1); 2475 if (*dpte_row_height_linear > 128) 2476 *dpte_row_height_linear = 128; 2477 2478 } else if (!IsVertical(SourceRotation)) { 2479 *dpte_row_height = *PixelPTEReqHeight; 2480 2481 if (GPUVMMinPageSizeKBytes > 64) { 2482 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2483 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2484 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2485 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2486 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2487 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2488 } else { 2489 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2490 *PixelPTEReqWidth; 2491 } 2492 2493 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2494 } else { 2495 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2496 2497 if (ViewportStationary && (NumberOfDPPs == 1)) { 2498 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2499 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2500 } else { 2501 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2502 * *PixelPTEReqHeight; 2503 } 2504 2505 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2506 } 2507 2508 if (GPUVMEnable != true) 2509 *PixelPTEBytesPerRow = 0; 2510 if (HostVMEnable == true) 2511 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2512 2513 #ifdef __DML_VBA_DEBUG__ 2514 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2515 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2516 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2517 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2518 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2519 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2520 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2521 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2522 __func__, *dpte_row_width_ub_one_row_per_frame); 2523 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2524 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2525 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2526 *MetaPTEBytesFrame); 2527 #endif 2528 2529 return PDEAndMetaPTEBytesFrame; 2530 } // CalculateVMAndRowBytes 2531 2532 double dml32_CalculatePrefetchSourceLines( 2533 double VRatio, 2534 unsigned int VTaps, 2535 bool Interlace, 2536 bool ProgressiveToInterlaceUnitInOPP, 2537 unsigned int SwathHeight, 2538 enum dm_rotation_angle SourceRotation, 2539 bool ViewportStationary, 2540 double SwathWidth, 2541 unsigned int ViewportHeight, 2542 unsigned int ViewportXStart, 2543 unsigned int ViewportYStart, 2544 2545 /* Output */ 2546 double *VInitPreFill, 2547 unsigned int *MaxNumSwath) 2548 { 2549 2550 unsigned int vp_start_rot; 2551 unsigned int sw0_tmp; 2552 unsigned int MaxPartialSwath; 2553 double numLines; 2554 2555 #ifdef __DML_VBA_DEBUG__ 2556 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2557 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2558 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2559 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2560 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2561 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2562 #endif 2563 if (ProgressiveToInterlaceUnitInOPP) 2564 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2565 else 2566 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2567 2568 if (ViewportStationary) { 2569 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2570 vp_start_rot = SwathHeight - 2571 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2572 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2573 vp_start_rot = ViewportXStart; 2574 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2575 vp_start_rot = SwathHeight - 2576 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2577 } else { 2578 vp_start_rot = ViewportYStart; 2579 } 2580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2581 if (sw0_tmp < *VInitPreFill) 2582 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2583 else 2584 *MaxNumSwath = 1; 2585 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2586 } else { 2587 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2588 if (*VInitPreFill > 1) 2589 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2590 else 2591 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2592 } 2593 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2594 2595 #ifdef __DML_VBA_DEBUG__ 2596 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2597 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2598 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2599 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2600 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2601 #endif 2602 return numLines; 2603 2604 } // CalculatePrefetchSourceLines 2605 2606 void dml32_CalculateMALLUseForStaticScreen( 2607 unsigned int NumberOfActiveSurfaces, 2608 unsigned int MALLAllocatedForDCNFinal, 2609 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2610 unsigned int SurfaceSizeInMALL[], 2611 bool one_row_per_frame_fits_in_buffer[], 2612 2613 /* output */ 2614 bool UsesMALLForStaticScreen[]) 2615 { 2616 unsigned int k; 2617 unsigned int SurfaceToAddToMALL; 2618 bool CanAddAnotherSurfaceToMALL; 2619 unsigned int TotalSurfaceSizeInMALL; 2620 2621 TotalSurfaceSizeInMALL = 0; 2622 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2623 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2624 if (UsesMALLForStaticScreen[k]) 2625 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2626 #ifdef __DML_VBA_DEBUG__ 2627 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2628 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2629 #endif 2630 } 2631 2632 SurfaceToAddToMALL = 0; 2633 CanAddAnotherSurfaceToMALL = true; 2634 while (CanAddAnotherSurfaceToMALL) { 2635 CanAddAnotherSurfaceToMALL = false; 2636 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2637 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2638 !UsesMALLForStaticScreen[k] && 2639 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2640 one_row_per_frame_fits_in_buffer[k] && 2641 (!CanAddAnotherSurfaceToMALL || 2642 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2643 CanAddAnotherSurfaceToMALL = true; 2644 SurfaceToAddToMALL = k; 2645 #ifdef __DML_VBA_DEBUG__ 2646 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2647 __func__, k, UseMALLForStaticScreen[k]); 2648 #endif 2649 } 2650 } 2651 if (CanAddAnotherSurfaceToMALL) { 2652 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2653 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2654 2655 #ifdef __DML_VBA_DEBUG__ 2656 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2657 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2658 #endif 2659 2660 } 2661 } 2662 } 2663 2664 void dml32_CalculateRowBandwidth( 2665 bool GPUVMEnable, 2666 enum source_format_class SourcePixelFormat, 2667 double VRatio, 2668 double VRatioChroma, 2669 bool DCCEnable, 2670 double LineTime, 2671 unsigned int MetaRowByteLuma, 2672 unsigned int MetaRowByteChroma, 2673 unsigned int meta_row_height_luma, 2674 unsigned int meta_row_height_chroma, 2675 unsigned int PixelPTEBytesPerRowLuma, 2676 unsigned int PixelPTEBytesPerRowChroma, 2677 unsigned int dpte_row_height_luma, 2678 unsigned int dpte_row_height_chroma, 2679 /* Output */ 2680 double *meta_row_bw, 2681 double *dpte_row_bw) 2682 { 2683 if (DCCEnable != true) { 2684 *meta_row_bw = 0; 2685 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2686 SourcePixelFormat == dm_rgbe_alpha) { 2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2688 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2689 } else { 2690 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2691 } 2692 2693 if (GPUVMEnable != true) { 2694 *dpte_row_bw = 0; 2695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2696 SourcePixelFormat == dm_rgbe_alpha) { 2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2698 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2699 } else { 2700 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2701 } 2702 } 2703 2704 double dml32_CalculateUrgentLatency( 2705 double UrgentLatencyPixelDataOnly, 2706 double UrgentLatencyPixelMixedWithVMData, 2707 double UrgentLatencyVMDataOnly, 2708 bool DoUrgentLatencyAdjustment, 2709 double UrgentLatencyAdjustmentFabricClockComponent, 2710 double UrgentLatencyAdjustmentFabricClockReference, 2711 double FabricClock) 2712 { 2713 double ret; 2714 2715 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2716 if (DoUrgentLatencyAdjustment == true) { 2717 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2718 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2719 } 2720 return ret; 2721 } 2722 2723 void dml32_CalculateUrgentBurstFactor( 2724 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2725 unsigned int swath_width_luma_ub, 2726 unsigned int swath_width_chroma_ub, 2727 unsigned int SwathHeightY, 2728 unsigned int SwathHeightC, 2729 double LineTime, 2730 double UrgentLatency, 2731 double CursorBufferSize, 2732 unsigned int CursorWidth, 2733 unsigned int CursorBPP, 2734 double VRatio, 2735 double VRatioC, 2736 double BytePerPixelInDETY, 2737 double BytePerPixelInDETC, 2738 unsigned int DETBufferSizeY, 2739 unsigned int DETBufferSizeC, 2740 /* Output */ 2741 double *UrgentBurstFactorCursor, 2742 double *UrgentBurstFactorLuma, 2743 double *UrgentBurstFactorChroma, 2744 bool *NotEnoughUrgentLatencyHiding) 2745 { 2746 double LinesInDETLuma; 2747 double LinesInDETChroma; 2748 unsigned int LinesInCursorBuffer; 2749 double CursorBufferSizeInTime; 2750 double DETBufferSizeInTimeLuma; 2751 double DETBufferSizeInTimeChroma; 2752 2753 *NotEnoughUrgentLatencyHiding = 0; 2754 2755 if (CursorWidth > 0) { 2756 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2757 (CursorWidth * CursorBPP / 8.0)), 1.0); 2758 if (VRatio > 0) { 2759 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2760 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2761 *NotEnoughUrgentLatencyHiding = 1; 2762 *UrgentBurstFactorCursor = 0; 2763 } else { 2764 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2765 (CursorBufferSizeInTime - UrgentLatency); 2766 } 2767 } else { 2768 *UrgentBurstFactorCursor = 1; 2769 } 2770 } 2771 2772 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2773 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2774 2775 if (VRatio > 0) { 2776 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2777 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2778 *NotEnoughUrgentLatencyHiding = 1; 2779 *UrgentBurstFactorLuma = 0; 2780 } else { 2781 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2782 } 2783 } else { 2784 *UrgentBurstFactorLuma = 1; 2785 } 2786 2787 if (BytePerPixelInDETC > 0) { 2788 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2789 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2790 / swath_width_chroma_ub; 2791 2792 if (VRatio > 0) { 2793 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2794 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2795 *NotEnoughUrgentLatencyHiding = 1; 2796 *UrgentBurstFactorChroma = 0; 2797 } else { 2798 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2799 / (DETBufferSizeInTimeChroma - UrgentLatency); 2800 } 2801 } else { 2802 *UrgentBurstFactorChroma = 1; 2803 } 2804 } 2805 } // CalculateUrgentBurstFactor 2806 2807 void dml32_CalculateDCFCLKDeepSleep( 2808 unsigned int NumberOfActiveSurfaces, 2809 unsigned int BytePerPixelY[], 2810 unsigned int BytePerPixelC[], 2811 double VRatio[], 2812 double VRatioChroma[], 2813 double SwathWidthY[], 2814 double SwathWidthC[], 2815 unsigned int DPPPerSurface[], 2816 double HRatio[], 2817 double HRatioChroma[], 2818 double PixelClock[], 2819 double PSCL_THROUGHPUT[], 2820 double PSCL_THROUGHPUT_CHROMA[], 2821 double Dppclk[], 2822 double ReadBandwidthLuma[], 2823 double ReadBandwidthChroma[], 2824 unsigned int ReturnBusWidth, 2825 2826 /* Output */ 2827 double *DCFClkDeepSleep) 2828 { 2829 unsigned int k; 2830 double DisplayPipeLineDeliveryTimeLuma; 2831 double DisplayPipeLineDeliveryTimeChroma; 2832 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2833 double ReadBandwidth = 0.0; 2834 2835 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2836 2837 if (VRatio[k] <= 1) { 2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2839 / PixelClock[k]; 2840 } else { 2841 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2842 } 2843 if (BytePerPixelC[k] == 0) { 2844 DisplayPipeLineDeliveryTimeChroma = 0; 2845 } else { 2846 if (VRatioChroma[k] <= 1) { 2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2848 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2849 } else { 2850 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2851 / Dppclk[k]; 2852 } 2853 } 2854 2855 if (BytePerPixelC[k] > 0) { 2856 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2857 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2858 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2859 32.0 / DisplayPipeLineDeliveryTimeChroma); 2860 } else { 2861 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2862 64.0 / DisplayPipeLineDeliveryTimeLuma; 2863 } 2864 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2865 2866 #ifdef __DML_VBA_DEBUG__ 2867 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2868 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2869 #endif 2870 } 2871 2872 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2873 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2874 2875 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2876 2877 #ifdef __DML_VBA_DEBUG__ 2878 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2879 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2880 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2881 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2882 #endif 2883 2884 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2885 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2886 #ifdef __DML_VBA_DEBUG__ 2887 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2888 #endif 2889 } // CalculateDCFCLKDeepSleep 2890 2891 double dml32_CalculateWriteBackDelay( 2892 enum source_format_class WritebackPixelFormat, 2893 double WritebackHRatio, 2894 double WritebackVRatio, 2895 unsigned int WritebackVTaps, 2896 unsigned int WritebackDestinationWidth, 2897 unsigned int WritebackDestinationHeight, 2898 unsigned int WritebackSourceHeight, 2899 unsigned int HTotal) 2900 { 2901 double CalculateWriteBackDelay; 2902 double Line_length; 2903 double Output_lines_last_notclamped; 2904 double WritebackVInit; 2905 2906 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2907 Line_length = dml_max((double) WritebackDestinationWidth, 2908 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2909 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2910 dml_ceil(((double)WritebackSourceHeight - 2911 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2912 if (Output_lines_last_notclamped < 0) { 2913 CalculateWriteBackDelay = 0; 2914 } else { 2915 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2916 (HTotal - WritebackDestinationWidth) + 80; 2917 } 2918 return CalculateWriteBackDelay; 2919 } 2920 2921 void dml32_UseMinimumDCFCLK( 2922 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2923 bool DRRDisplay[], 2924 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2925 unsigned int MaxInterDCNTileRepeaters, 2926 unsigned int MaxPrefetchMode, 2927 double DRAMClockChangeLatencyFinal, 2928 double FCLKChangeLatency, 2929 double SREnterPlusExitTime, 2930 unsigned int ReturnBusWidth, 2931 unsigned int RoundTripPingLatencyCycles, 2932 unsigned int ReorderingBytes, 2933 unsigned int PixelChunkSizeInKByte, 2934 unsigned int MetaChunkSize, 2935 bool GPUVMEnable, 2936 unsigned int GPUVMMaxPageTableLevels, 2937 bool HostVMEnable, 2938 unsigned int NumberOfActiveSurfaces, 2939 double HostVMMinPageSize, 2940 unsigned int HostVMMaxNonCachedPageTableLevels, 2941 bool DynamicMetadataVMEnabled, 2942 bool ImmediateFlipRequirement, 2943 bool ProgressiveToInterlaceUnitInOPP, 2944 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2945 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2946 unsigned int VTotal[], 2947 unsigned int VActive[], 2948 unsigned int DynamicMetadataTransmittedBytes[], 2949 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2950 bool Interlace[], 2951 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2952 double RequiredDISPCLK[][2], 2953 double UrgLatency[], 2954 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2955 double ProjectedDCFClkDeepSleep[][2], 2956 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2957 unsigned int TotalNumberOfActiveDPP[][2], 2958 unsigned int TotalNumberOfDCCActiveDPP[][2], 2959 unsigned int dpte_group_bytes[], 2960 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2961 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2962 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2963 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2964 unsigned int BytePerPixelY[], 2965 unsigned int BytePerPixelC[], 2966 unsigned int HTotal[], 2967 double PixelClock[], 2968 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2969 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2970 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2971 bool DynamicMetadataEnable[], 2972 double ReadBandwidthLuma[], 2973 double ReadBandwidthChroma[], 2974 double DCFCLKPerState[], 2975 /* Output */ 2976 double DCFCLKState[][2]) 2977 { 2978 unsigned int i, j, k; 2979 unsigned int dummy1; 2980 double dummy2, dummy3; 2981 double NormalEfficiency; 2982 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2983 2984 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2985 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2986 for (j = 0; j <= 1; ++j) { 2987 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2988 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2989 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2990 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2991 double MinimumTWait = 0.0; 2992 double DPTEBandwidth; 2993 double DCFCLKRequiredForAverageBandwidth; 2994 unsigned int ExtraLatencyBytes; 2995 double ExtraLatencyCycles; 2996 double DCFCLKRequiredForPeakBandwidth; 2997 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2998 double MinimumTvmPlus2Tr0; 2999 3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 3001 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 3003 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 3004 / (15.75 * HTotal[k] / PixelClock[k]); 3005 } 3006 3007 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 3008 NoOfDPPState[k] = NoOfDPP[i][j][k]; 3009 3010 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 3011 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 3012 3013 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 3014 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 3015 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 3016 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 3017 HostVMMaxNonCachedPageTableLevels); 3018 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 3019 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 3020 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3021 double DCFCLKCyclesRequiredInPrefetch; 3022 double PrefetchTime; 3023 3024 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 3025 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 3026 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 3027 * BytePerPixelC[k]) / NormalEfficiency 3028 / ReturnBusWidth; 3029 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 3030 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 3031 / NormalEfficiency / ReturnBusWidth 3032 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3033 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3034 / ReturnBusWidth 3035 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3036 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3037 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3038 * HTotal[k] / PixelClock[k]; 3039 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3040 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3041 UrgLatency[i] * GPUVMMaxPageTableLevels * 3042 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3043 3044 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3045 UseMALLForPStateChange[k], 3046 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3047 DRRDisplay[k], 3048 DRAMClockChangeLatencyFinal, 3049 FCLKChangeLatency, 3050 UrgLatency[i], 3051 SREnterPlusExitTime); 3052 3053 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3054 MinimumTWait - UrgLatency[i] * 3055 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3056 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3057 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3058 DynamicMetadataVMExtraLatency[k]; 3059 3060 if (PrefetchTime > 0) { 3061 double ExpectedVRatioPrefetch; 3062 3063 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3064 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3065 DCFCLKCyclesRequiredInPrefetch); 3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3067 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3068 PrefetchPixelLinesTime[k] * 3069 dml_max(1.0, ExpectedVRatioPrefetch) * 3070 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3071 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3074 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3075 NormalEfficiency / ReturnBusWidth; 3076 } 3077 } else { 3078 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3079 } 3080 if (DynamicMetadataEnable[k] == true) { 3081 double TSetupPipe; 3082 double TdmbfPipe; 3083 double TdmsksPipe; 3084 double TdmecPipe; 3085 double AllowedTimeForUrgentExtraLatency; 3086 3087 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3088 MaxInterDCNTileRepeaters, 3089 RequiredDPPCLKPerSurface[i][j][k], 3090 RequiredDISPCLK[i][j], 3091 ProjectedDCFClkDeepSleep[i][j], 3092 PixelClock[k], 3093 HTotal[k], 3094 VTotal[k] - VActive[k], 3095 DynamicMetadataTransmittedBytes[k], 3096 DynamicMetadataLinesBeforeActiveRequired[k], 3097 Interlace[k], 3098 ProgressiveToInterlaceUnitInOPP, 3099 3100 /* output */ 3101 &TSetupPipe, 3102 &TdmbfPipe, 3103 &TdmecPipe, 3104 &TdmsksPipe, 3105 &dummy1, 3106 &dummy2, 3107 &dummy3); 3108 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3109 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3110 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3111 if (AllowedTimeForUrgentExtraLatency > 0) 3112 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3113 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3114 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3115 else 3116 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3117 } 3118 } 3119 DCFCLKRequiredForPeakBandwidth = 0; 3120 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3121 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3122 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3123 } 3124 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3125 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3126 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3127 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3128 double MaximumTvmPlus2Tr0PlusTsw; 3129 3130 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3131 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3132 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3133 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3134 } else { 3135 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3136 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3137 MinimumTvmPlus2Tr0 - 3138 PrefetchPixelLinesTime[k] / 4), 3139 (2 * ExtraLatencyCycles + 3140 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3141 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3142 } 3143 } 3144 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3145 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3146 } 3147 } 3148 } 3149 3150 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3151 unsigned int TotalNumberOfActiveDPP, 3152 unsigned int PixelChunkSizeInKByte, 3153 unsigned int TotalNumberOfDCCActiveDPP, 3154 unsigned int MetaChunkSize, 3155 bool GPUVMEnable, 3156 bool HostVMEnable, 3157 unsigned int NumberOfActiveSurfaces, 3158 unsigned int NumberOfDPP[], 3159 unsigned int dpte_group_bytes[], 3160 double HostVMInefficiencyFactor, 3161 double HostVMMinPageSize, 3162 unsigned int HostVMMaxNonCachedPageTableLevels) 3163 { 3164 unsigned int k; 3165 double ret; 3166 unsigned int HostVMDynamicLevels; 3167 3168 if (GPUVMEnable == true && HostVMEnable == true) { 3169 if (HostVMMinPageSize < 2048) 3170 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3171 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3172 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3173 else 3174 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3175 } else { 3176 HostVMDynamicLevels = 0; 3177 } 3178 3179 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3180 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3181 3182 if (GPUVMEnable == true) { 3183 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3184 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3185 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3186 } 3187 } 3188 return ret; 3189 } 3190 3191 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3192 unsigned int MaxInterDCNTileRepeaters, 3193 double Dppclk, 3194 double Dispclk, 3195 double DCFClkDeepSleep, 3196 double PixelClock, 3197 unsigned int HTotal, 3198 unsigned int VBlank, 3199 unsigned int DynamicMetadataTransmittedBytes, 3200 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3201 unsigned int InterlaceEnable, 3202 bool ProgressiveToInterlaceUnitInOPP, 3203 3204 /* output */ 3205 double *TSetup, 3206 double *Tdmbf, 3207 double *Tdmec, 3208 double *Tdmsks, 3209 unsigned int *VUpdateOffsetPix, 3210 double *VUpdateWidthPix, 3211 double *VReadyOffsetPix) 3212 { 3213 double TotalRepeaterDelayTime; 3214 3215 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3216 *VUpdateWidthPix = 3217 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3218 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3219 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3220 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3221 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3222 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3223 *Tdmec = HTotal / PixelClock; 3224 3225 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3226 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3227 else 3228 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3229 3230 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3231 *Tdmsks = *Tdmsks / 2; 3232 #ifdef __DML_VBA_DEBUG__ 3233 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3234 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3235 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3236 3237 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3238 __func__, DynamicMetadataLinesBeforeActiveRequired); 3239 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3240 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3241 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3242 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3243 #endif 3244 } 3245 3246 double dml32_CalculateTWait( 3247 unsigned int PrefetchMode, 3248 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3249 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3250 bool DRRDisplay, 3251 double DRAMClockChangeLatency, 3252 double FCLKChangeLatency, 3253 double UrgentLatency, 3254 double SREnterPlusExitTime) 3255 { 3256 double TWait = 0.0; 3257 3258 if (PrefetchMode == 0 && 3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3260 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3262 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3263 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3264 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3265 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3266 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3267 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3268 } else { 3269 TWait = UrgentLatency; 3270 } 3271 3272 #ifdef __DML_VBA_DEBUG__ 3273 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3274 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3275 #endif 3276 return TWait; 3277 } // CalculateTWait 3278 3279 // Function: get_return_bw_mbps 3280 // Megabyte per second 3281 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3282 const int VoltageLevel, 3283 const bool HostVMEnable, 3284 const double DCFCLK, 3285 const double FabricClock, 3286 const double DRAMSpeed) 3287 { 3288 double ReturnBW = 0.; 3289 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3290 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3291 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3292 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3293 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3294 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3295 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3296 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3297 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3298 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3299 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3300 3301 if (HostVMEnable != true) 3302 ReturnBW = PixelDataOnlyReturnBW; 3303 else 3304 ReturnBW = PixelMixedWithVMDataReturnBW; 3305 3306 #ifdef __DML_VBA_DEBUG__ 3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3308 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3309 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3310 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3311 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3312 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3313 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3314 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3315 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3316 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3317 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3318 #endif 3319 return ReturnBW; 3320 } 3321 3322 // Function: get_return_bw_mbps_vm_only 3323 // Megabyte per second 3324 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3325 const int VoltageLevel, 3326 const double DCFCLK, 3327 const double FabricClock, 3328 const double DRAMSpeed) 3329 { 3330 double VMDataOnlyReturnBW = dml_min3( 3331 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3332 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3333 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3334 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3335 * (VoltageLevel < 2 ? 3336 soc->pct_ideal_dram_bw_after_urgent_strobe : 3337 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3338 #ifdef __DML_VBA_DEBUG__ 3339 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3340 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3341 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3342 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3343 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3344 #endif 3345 return VMDataOnlyReturnBW; 3346 } 3347 3348 double dml32_CalculateExtraLatency( 3349 unsigned int RoundTripPingLatencyCycles, 3350 unsigned int ReorderingBytes, 3351 double DCFCLK, 3352 unsigned int TotalNumberOfActiveDPP, 3353 unsigned int PixelChunkSizeInKByte, 3354 unsigned int TotalNumberOfDCCActiveDPP, 3355 unsigned int MetaChunkSize, 3356 double ReturnBW, 3357 bool GPUVMEnable, 3358 bool HostVMEnable, 3359 unsigned int NumberOfActiveSurfaces, 3360 unsigned int NumberOfDPP[], 3361 unsigned int dpte_group_bytes[], 3362 double HostVMInefficiencyFactor, 3363 double HostVMMinPageSize, 3364 unsigned int HostVMMaxNonCachedPageTableLevels) 3365 { 3366 double ExtraLatencyBytes; 3367 double ExtraLatency; 3368 3369 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3370 ReorderingBytes, 3371 TotalNumberOfActiveDPP, 3372 PixelChunkSizeInKByte, 3373 TotalNumberOfDCCActiveDPP, 3374 MetaChunkSize, 3375 GPUVMEnable, 3376 HostVMEnable, 3377 NumberOfActiveSurfaces, 3378 NumberOfDPP, 3379 dpte_group_bytes, 3380 HostVMInefficiencyFactor, 3381 HostVMMinPageSize, 3382 HostVMMaxNonCachedPageTableLevels); 3383 3384 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3385 3386 #ifdef __DML_VBA_DEBUG__ 3387 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3388 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3389 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3390 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3391 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3392 #endif 3393 3394 return ExtraLatency; 3395 } // CalculateExtraLatency 3396 3397 bool dml32_CalculatePrefetchSchedule( 3398 struct vba_vars_st *v, 3399 unsigned int k, 3400 double HostVMInefficiencyFactor, 3401 DmlPipe *myPipe, 3402 unsigned int DSCDelay, 3403 unsigned int DPP_RECOUT_WIDTH, 3404 unsigned int VStartup, 3405 unsigned int MaxVStartup, 3406 double UrgentLatency, 3407 double UrgentExtraLatency, 3408 double TCalc, 3409 unsigned int PDEAndMetaPTEBytesFrame, 3410 unsigned int MetaRowByte, 3411 unsigned int PixelPTEBytesPerRow, 3412 double PrefetchSourceLinesY, 3413 unsigned int SwathWidthY, 3414 unsigned int VInitPreFillY, 3415 unsigned int MaxNumSwathY, 3416 double PrefetchSourceLinesC, 3417 unsigned int SwathWidthC, 3418 unsigned int VInitPreFillC, 3419 unsigned int MaxNumSwathC, 3420 unsigned int swath_width_luma_ub, 3421 unsigned int swath_width_chroma_ub, 3422 unsigned int SwathHeightY, 3423 unsigned int SwathHeightC, 3424 double TWait, 3425 double TPreReq, 3426 /* Output */ 3427 double *DSTXAfterScaler, 3428 double *DSTYAfterScaler, 3429 double *DestinationLinesForPrefetch, 3430 double *PrefetchBandwidth, 3431 double *DestinationLinesToRequestVMInVBlank, 3432 double *DestinationLinesToRequestRowInVBlank, 3433 double *VRatioPrefetchY, 3434 double *VRatioPrefetchC, 3435 double *RequiredPrefetchPixDataBWLuma, 3436 double *RequiredPrefetchPixDataBWChroma, 3437 bool *NotEnoughTimeForDynamicMetadata, 3438 double *Tno_bw, 3439 double *prefetch_vmrow_bw, 3440 double *Tdmdl_vm, 3441 double *Tdmdl, 3442 double *TSetup, 3443 unsigned int *VUpdateOffsetPix, 3444 double *VUpdateWidthPix, 3445 double *VReadyOffsetPix) 3446 { 3447 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 3448 bool MyError = false; 3449 unsigned int DPPCycles, DISPCLKCycles; 3450 double DSTTotalPixelsAfterScaler; 3451 double LineTime; 3452 double dst_y_prefetch_equ; 3453 double prefetch_bw_oto; 3454 double Tvm_oto; 3455 double Tr0_oto; 3456 double Tvm_oto_lines; 3457 double Tr0_oto_lines; 3458 double dst_y_prefetch_oto; 3459 double TimeForFetchingMetaPTE = 0; 3460 double TimeForFetchingRowInVBlank = 0; 3461 double LinesToRequestPrefetchPixelData = 0; 3462 double LinesForPrefetchBandwidth = 0; 3463 unsigned int HostVMDynamicLevelsTrips; 3464 double trip_to_mem; 3465 double Tvm_trips; 3466 double Tr0_trips; 3467 double Tvm_trips_rounded; 3468 double Tr0_trips_rounded; 3469 double Lsw_oto; 3470 double Tpre_rounded; 3471 double prefetch_bw_equ; 3472 double Tvm_equ; 3473 double Tr0_equ; 3474 double Tdmbf; 3475 double Tdmec; 3476 double Tdmsks; 3477 double prefetch_sw_bytes; 3478 double bytes_pp; 3479 double dep_bytes; 3480 unsigned int max_vratio_pre = v->MaxVRatioPre; 3481 double min_Lsw; 3482 double Tsw_est1 = 0; 3483 double Tsw_est3 = 0; 3484 3485 if (v->GPUVMEnable == true && v->HostVMEnable == true) 3486 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3487 else 3488 HostVMDynamicLevelsTrips = 0; 3489 #ifdef __DML_VBA_DEBUG__ 3490 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); 3491 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); 3492 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3493 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3494 __func__, v->HostVMEnable, HostVMInefficiencyFactor); 3495 #endif 3496 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3497 v->MaxInterDCNTileRepeaters, 3498 myPipe->Dppclk, 3499 myPipe->Dispclk, 3500 myPipe->DCFClkDeepSleep, 3501 myPipe->PixelClock, 3502 myPipe->HTotal, 3503 myPipe->VBlank, 3504 v->DynamicMetadataTransmittedBytes[k], 3505 v->DynamicMetadataLinesBeforeActiveRequired[k], 3506 myPipe->InterlaceEnable, 3507 myPipe->ProgressiveToInterlaceUnitInOPP, 3508 TSetup, 3509 3510 /* output */ 3511 &Tdmbf, 3512 &Tdmec, 3513 &Tdmsks, 3514 VUpdateOffsetPix, 3515 VUpdateWidthPix, 3516 VReadyOffsetPix); 3517 3518 LineTime = myPipe->HTotal / myPipe->PixelClock; 3519 trip_to_mem = UrgentLatency; 3520 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3521 3522 if (v->DynamicMetadataVMEnabled == true) 3523 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3524 else 3525 *Tdmdl = TWait + UrgentExtraLatency; 3526 3527 #ifdef __DML_VBA_ALLOW_DELTA__ 3528 if (v->DynamicMetadataEnable[k] == false) 3529 *Tdmdl = 0.0; 3530 #endif 3531 3532 if (v->DynamicMetadataEnable[k] == true) { 3533 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3534 *NotEnoughTimeForDynamicMetadata = true; 3535 #ifdef __DML_VBA_DEBUG__ 3536 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3537 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3538 __func__, Tdmbf); 3539 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3540 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3541 __func__, Tdmsks); 3542 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3543 __func__, *Tdmdl); 3544 #endif 3545 } else { 3546 *NotEnoughTimeForDynamicMetadata = false; 3547 } 3548 } else { 3549 *NotEnoughTimeForDynamicMetadata = false; 3550 } 3551 3552 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && 3553 v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 3554 3555 if (myPipe->ScalerEnabled) 3556 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 3557 else 3558 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 3559 3560 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 3561 3562 DISPCLKCycles = v->DISPCLKDelaySubtotal; 3563 3564 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3565 return true; 3566 3567 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3568 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3569 3570 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3571 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3572 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3573 myPipe->HActive / 2 : 0) 3574 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3575 3576 #ifdef __DML_VBA_DEBUG__ 3577 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3578 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3579 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3580 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3581 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3582 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3583 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3584 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3585 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3586 #endif 3587 3588 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3589 *DSTYAfterScaler = 1; 3590 else 3591 *DSTYAfterScaler = 0; 3592 3593 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3594 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3595 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3596 #ifdef __DML_VBA_DEBUG__ 3597 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3598 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3599 #endif 3600 3601 MyError = false; 3602 3603 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3604 3605 if (v->GPUVMEnable == true) { 3606 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3607 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3608 if (v->GPUVMMaxPageTableLevels >= 3) { 3609 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3610 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3611 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { 3612 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3613 4.0 * LineTime; // VBA_ERROR 3614 *Tno_bw = UrgentExtraLatency; 3615 } else { 3616 *Tno_bw = 0; 3617 } 3618 } else if (myPipe->DCCEnable == true) { 3619 Tvm_trips_rounded = LineTime / 4.0; 3620 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3621 *Tno_bw = 0; 3622 } else { 3623 Tvm_trips_rounded = LineTime / 4.0; 3624 Tr0_trips_rounded = LineTime / 2.0; 3625 *Tno_bw = 0; 3626 } 3627 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3628 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3629 3630 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3631 || myPipe->SourcePixelFormat == dm_420_12) { 3632 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3633 } else { 3634 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3635 } 3636 3637 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3638 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3639 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3640 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3641 3642 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3643 min_Lsw = dml_max(min_Lsw, 1.0); 3644 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3645 3646 if (v->GPUVMEnable == true) { 3647 Tvm_oto = dml_max3( 3648 Tvm_trips, 3649 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3650 LineTime / 4.0); 3651 } else 3652 Tvm_oto = LineTime / 4.0; 3653 3654 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3655 Tr0_oto = dml_max4( 3656 Tr0_trips, 3657 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3658 (LineTime - Tvm_oto)/2.0, 3659 LineTime / 4.0); 3660 #ifdef __DML_VBA_DEBUG__ 3661 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3662 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3663 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3664 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3665 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3666 #endif 3667 } else 3668 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3669 3670 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3671 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3672 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3673 3674 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3675 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3676 3677 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); 3678 #ifdef __DML_VBA_DEBUG__ 3679 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3680 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3681 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3682 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3683 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3684 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3685 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3686 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3687 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3688 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3689 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3690 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3691 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3692 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3693 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3694 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3695 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3696 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3697 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3698 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3699 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3700 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3701 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3702 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3703 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3704 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3705 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3706 #endif 3707 3708 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3709 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3710 #ifdef __DML_VBA_DEBUG__ 3711 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3712 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3713 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3714 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3715 __func__, VStartup * LineTime); 3716 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3717 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3718 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3719 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3720 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3721 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3722 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3723 __func__, *DSTYAfterScaler); 3724 #endif 3725 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3726 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3727 3728 if (prefetch_sw_bytes < dep_bytes) 3729 prefetch_sw_bytes = 2 * dep_bytes; 3730 3731 *PrefetchBandwidth = 0; 3732 *DestinationLinesToRequestVMInVBlank = 0; 3733 *DestinationLinesToRequestRowInVBlank = 0; 3734 *VRatioPrefetchY = 0; 3735 *VRatioPrefetchC = 0; 3736 *RequiredPrefetchPixDataBWLuma = 0; 3737 if (dst_y_prefetch_equ > 1 && 3738 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { 3739 double PrefetchBandwidth1; 3740 double PrefetchBandwidth2; 3741 double PrefetchBandwidth3; 3742 double PrefetchBandwidth4; 3743 3744 if (Tpre_rounded - *Tno_bw > 0) { 3745 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3746 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3747 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3748 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3749 } else 3750 PrefetchBandwidth1 = 0; 3751 3752 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3753 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3754 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3755 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3756 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3757 } 3758 3759 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3760 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3761 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3762 else 3763 PrefetchBandwidth2 = 0; 3764 3765 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3766 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3767 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3768 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3769 } else 3770 PrefetchBandwidth3 = 0; 3771 3772 3773 if (VStartup == MaxVStartup && 3774 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3775 LineTime - Tvm_trips_rounded > 0) { 3776 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3777 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3778 } 3779 3780 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3781 PrefetchBandwidth4 = prefetch_sw_bytes / 3782 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3783 } else { 3784 PrefetchBandwidth4 = 0; 3785 } 3786 3787 #ifdef __DML_VBA_DEBUG__ 3788 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3789 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3790 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3791 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3792 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3793 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3794 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3795 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3796 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3797 #endif 3798 { 3799 bool Case1OK; 3800 bool Case2OK; 3801 bool Case3OK; 3802 3803 if (PrefetchBandwidth1 > 0) { 3804 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3805 >= Tvm_trips_rounded 3806 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3807 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3808 Case1OK = true; 3809 } else { 3810 Case1OK = false; 3811 } 3812 } else { 3813 Case1OK = false; 3814 } 3815 3816 if (PrefetchBandwidth2 > 0) { 3817 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3818 >= Tvm_trips_rounded 3819 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3820 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3821 Case2OK = true; 3822 } else { 3823 Case2OK = false; 3824 } 3825 } else { 3826 Case2OK = false; 3827 } 3828 3829 if (PrefetchBandwidth3 > 0) { 3830 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3831 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3832 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3833 Tr0_trips_rounded) { 3834 Case3OK = true; 3835 } else { 3836 Case3OK = false; 3837 } 3838 } else { 3839 Case3OK = false; 3840 } 3841 3842 if (Case1OK) 3843 prefetch_bw_equ = PrefetchBandwidth1; 3844 else if (Case2OK) 3845 prefetch_bw_equ = PrefetchBandwidth2; 3846 else if (Case3OK) 3847 prefetch_bw_equ = PrefetchBandwidth3; 3848 else 3849 prefetch_bw_equ = PrefetchBandwidth4; 3850 3851 #ifdef __DML_VBA_DEBUG__ 3852 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3853 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3854 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3855 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3856 #endif 3857 3858 if (prefetch_bw_equ > 0) { 3859 if (v->GPUVMEnable == true) { 3860 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3861 HostVMInefficiencyFactor / prefetch_bw_equ, 3862 Tvm_trips, LineTime / 4); 3863 } else { 3864 Tvm_equ = LineTime / 4; 3865 } 3866 3867 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3868 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3869 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3870 (LineTime - Tvm_equ) / 2, LineTime / 4); 3871 } else { 3872 Tr0_equ = (LineTime - Tvm_equ) / 2; 3873 } 3874 } else { 3875 Tvm_equ = 0; 3876 Tr0_equ = 0; 3877 #ifdef __DML_VBA_DEBUG__ 3878 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3879 #endif 3880 } 3881 } 3882 3883 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3884 if (dst_y_prefetch_oto * LineTime < TPreReq) { 3885 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3886 } else { 3887 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3888 } 3889 TimeForFetchingMetaPTE = Tvm_oto; 3890 TimeForFetchingRowInVBlank = Tr0_oto; 3891 *PrefetchBandwidth = prefetch_bw_oto; 3892 /* Clamp to oto for bandwidth calculation */ 3893 LinesForPrefetchBandwidth = dst_y_prefetch_oto; 3894 } else { 3895 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3896 TimeForFetchingMetaPTE = Tvm_equ; 3897 TimeForFetchingRowInVBlank = Tr0_equ; 3898 *PrefetchBandwidth = prefetch_bw_equ; 3899 /* Clamp to equ for bandwidth calculation */ 3900 LinesForPrefetchBandwidth = dst_y_prefetch_equ; 3901 } 3902 3903 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3904 3905 *DestinationLinesToRequestRowInVBlank = 3906 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3907 3908 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth - 3909 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3910 3911 #ifdef __DML_VBA_DEBUG__ 3912 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3913 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3914 __func__, *DestinationLinesToRequestVMInVBlank); 3915 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3916 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3917 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3918 __func__, *DestinationLinesToRequestRowInVBlank); 3919 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3920 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3921 #endif 3922 3923 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3924 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3925 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3926 #ifdef __DML_VBA_DEBUG__ 3927 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3928 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3929 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3930 #endif 3931 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3932 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3933 *VRatioPrefetchY = 3934 dml_max((double) PrefetchSourceLinesY / 3935 LinesToRequestPrefetchPixelData, 3936 (double) MaxNumSwathY * SwathHeightY / 3937 (LinesToRequestPrefetchPixelData - 3938 (VInitPreFillY - 3.0) / 2.0)); 3939 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3940 } else { 3941 MyError = true; 3942 *VRatioPrefetchY = 0; 3943 } 3944 #ifdef __DML_VBA_DEBUG__ 3945 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3946 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3947 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3948 #endif 3949 } 3950 3951 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3952 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3953 3954 #ifdef __DML_VBA_DEBUG__ 3955 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3956 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3957 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3958 #endif 3959 if ((SwathHeightC > 4)) { 3960 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3961 *VRatioPrefetchC = 3962 dml_max(*VRatioPrefetchC, 3963 (double) MaxNumSwathC * SwathHeightC / 3964 (LinesToRequestPrefetchPixelData - 3965 (VInitPreFillC - 3.0) / 2.0)); 3966 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3967 } else { 3968 MyError = true; 3969 *VRatioPrefetchC = 0; 3970 } 3971 #ifdef __DML_VBA_DEBUG__ 3972 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3973 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3974 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3975 #endif 3976 } 3977 3978 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3979 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3980 / LineTime; 3981 3982 #ifdef __DML_VBA_DEBUG__ 3983 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3984 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3985 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3986 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3987 __func__, *RequiredPrefetchPixDataBWLuma); 3988 #endif 3989 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3990 LinesToRequestPrefetchPixelData 3991 * myPipe->BytePerPixelC 3992 * swath_width_chroma_ub / LineTime; 3993 } else { 3994 MyError = true; 3995 #ifdef __DML_VBA_DEBUG__ 3996 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3997 __func__, LinesToRequestPrefetchPixelData); 3998 #endif 3999 *VRatioPrefetchY = 0; 4000 *VRatioPrefetchC = 0; 4001 *RequiredPrefetchPixDataBWLuma = 0; 4002 *RequiredPrefetchPixDataBWChroma = 0; 4003 } 4004 #ifdef __DML_VBA_DEBUG__ 4005 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 4006 (double)LinesToRequestPrefetchPixelData * LineTime + 4007 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 4008 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 4009 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 4010 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 4011 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 4012 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 4013 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 4014 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 4015 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 4016 PixelPTEBytesPerRow); 4017 #endif 4018 } else { 4019 MyError = true; 4020 #ifdef __DML_VBA_DEBUG__ 4021 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 4022 __func__, dst_y_prefetch_equ); 4023 #endif 4024 } 4025 4026 { 4027 double prefetch_vm_bw; 4028 double prefetch_row_bw; 4029 4030 if (PDEAndMetaPTEBytesFrame == 0) { 4031 prefetch_vm_bw = 0; 4032 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4033 #ifdef __DML_VBA_DEBUG__ 4034 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4035 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4036 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4037 __func__, *DestinationLinesToRequestVMInVBlank); 4038 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4039 #endif 4040 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4041 (*DestinationLinesToRequestVMInVBlank * LineTime); 4042 #ifdef __DML_VBA_DEBUG__ 4043 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4044 #endif 4045 } else { 4046 prefetch_vm_bw = 0; 4047 MyError = true; 4048 #ifdef __DML_VBA_DEBUG__ 4049 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4050 __func__, *DestinationLinesToRequestVMInVBlank); 4051 #endif 4052 } 4053 4054 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4055 prefetch_row_bw = 0; 4056 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4057 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4058 (*DestinationLinesToRequestRowInVBlank * LineTime); 4059 4060 #ifdef __DML_VBA_DEBUG__ 4061 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4062 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4063 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4064 __func__, *DestinationLinesToRequestRowInVBlank); 4065 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4066 #endif 4067 } else { 4068 prefetch_row_bw = 0; 4069 MyError = true; 4070 #ifdef __DML_VBA_DEBUG__ 4071 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4072 __func__, *DestinationLinesToRequestRowInVBlank); 4073 #endif 4074 } 4075 4076 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4077 } 4078 4079 if (MyError) { 4080 *PrefetchBandwidth = 0; 4081 TimeForFetchingMetaPTE = 0; 4082 TimeForFetchingRowInVBlank = 0; 4083 *DestinationLinesToRequestVMInVBlank = 0; 4084 *DestinationLinesToRequestRowInVBlank = 0; 4085 *DestinationLinesForPrefetch = 0; 4086 LinesToRequestPrefetchPixelData = 0; 4087 *VRatioPrefetchY = 0; 4088 *VRatioPrefetchC = 0; 4089 *RequiredPrefetchPixDataBWLuma = 0; 4090 *RequiredPrefetchPixDataBWChroma = 0; 4091 } 4092 4093 return MyError; 4094 } // CalculatePrefetchSchedule 4095 4096 void dml32_CalculateFlipSchedule( 4097 double HostVMInefficiencyFactor, 4098 double UrgentExtraLatency, 4099 double UrgentLatency, 4100 unsigned int GPUVMMaxPageTableLevels, 4101 bool HostVMEnable, 4102 unsigned int HostVMMaxNonCachedPageTableLevels, 4103 bool GPUVMEnable, 4104 double HostVMMinPageSize, 4105 double PDEAndMetaPTEBytesPerFrame, 4106 double MetaRowBytes, 4107 double DPTEBytesPerRow, 4108 double BandwidthAvailableForImmediateFlip, 4109 unsigned int TotImmediateFlipBytes, 4110 enum source_format_class SourcePixelFormat, 4111 double LineTime, 4112 double VRatio, 4113 double VRatioChroma, 4114 double Tno_bw, 4115 bool DCCEnable, 4116 unsigned int dpte_row_height, 4117 unsigned int meta_row_height, 4118 unsigned int dpte_row_height_chroma, 4119 unsigned int meta_row_height_chroma, 4120 bool use_one_row_for_frame_flip, 4121 4122 /* Output */ 4123 double *DestinationLinesToRequestVMInImmediateFlip, 4124 double *DestinationLinesToRequestRowInImmediateFlip, 4125 double *final_flip_bw, 4126 bool *ImmediateFlipSupportedForPipe) 4127 { 4128 double min_row_time = 0.0; 4129 unsigned int HostVMDynamicLevelsTrips; 4130 double TimeForFetchingMetaPTEImmediateFlip; 4131 double TimeForFetchingRowInVBlankImmediateFlip; 4132 double ImmediateFlipBW = 1.0; 4133 4134 if (GPUVMEnable == true && HostVMEnable == true) 4135 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4136 else 4137 HostVMDynamicLevelsTrips = 0; 4138 4139 #ifdef __DML_VBA_DEBUG__ 4140 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4141 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4142 #endif 4143 4144 if (TotImmediateFlipBytes > 0) { 4145 if (use_one_row_for_frame_flip) { 4146 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4147 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4148 } else { 4149 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4150 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4151 } 4152 if (GPUVMEnable == true) { 4153 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4154 HostVMInefficiencyFactor / ImmediateFlipBW, 4155 UrgentExtraLatency + UrgentLatency * 4156 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4157 LineTime / 4.0); 4158 } else { 4159 TimeForFetchingMetaPTEImmediateFlip = 0; 4160 } 4161 if ((GPUVMEnable == true || DCCEnable == true)) { 4162 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4163 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4164 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4165 } else { 4166 TimeForFetchingRowInVBlankImmediateFlip = 0; 4167 } 4168 4169 *DestinationLinesToRequestVMInImmediateFlip = 4170 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4171 *DestinationLinesToRequestRowInImmediateFlip = 4172 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4173 4174 if (GPUVMEnable == true) { 4175 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4176 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4177 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4178 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4179 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4180 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4181 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4182 } else { 4183 *final_flip_bw = 0; 4184 } 4185 } else { 4186 TimeForFetchingMetaPTEImmediateFlip = 0; 4187 TimeForFetchingRowInVBlankImmediateFlip = 0; 4188 *DestinationLinesToRequestVMInImmediateFlip = 0; 4189 *DestinationLinesToRequestRowInImmediateFlip = 0; 4190 *final_flip_bw = 0; 4191 } 4192 4193 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4194 if (GPUVMEnable == true && DCCEnable != true) { 4195 min_row_time = dml_min(dpte_row_height * 4196 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4197 } else if (GPUVMEnable != true && DCCEnable == true) { 4198 min_row_time = dml_min(meta_row_height * 4199 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4200 } else { 4201 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4202 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4203 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4204 } 4205 } else { 4206 if (GPUVMEnable == true && DCCEnable != true) { 4207 min_row_time = dpte_row_height * LineTime / VRatio; 4208 } else if (GPUVMEnable != true && DCCEnable == true) { 4209 min_row_time = meta_row_height * LineTime / VRatio; 4210 } else { 4211 min_row_time = 4212 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4213 } 4214 } 4215 4216 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4217 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4218 > min_row_time) { 4219 *ImmediateFlipSupportedForPipe = false; 4220 } else { 4221 *ImmediateFlipSupportedForPipe = true; 4222 } 4223 4224 #ifdef __DML_VBA_DEBUG__ 4225 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4226 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4227 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4228 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4229 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4230 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4231 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4232 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4233 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4234 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4235 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4236 #endif 4237 } // CalculateFlipSchedule 4238 4239 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4240 struct vba_vars_st *v, 4241 unsigned int PrefetchMode, 4242 double DCFCLK, 4243 double ReturnBW, 4244 SOCParametersList mmSOCParameters, 4245 double SOCCLK, 4246 double DCFClkDeepSleep, 4247 unsigned int DETBufferSizeY[], 4248 unsigned int DETBufferSizeC[], 4249 unsigned int SwathHeightY[], 4250 unsigned int SwathHeightC[], 4251 double SwathWidthY[], 4252 double SwathWidthC[], 4253 unsigned int DPPPerSurface[], 4254 double BytePerPixelDETY[], 4255 double BytePerPixelDETC[], 4256 double DSTXAfterScaler[], 4257 double DSTYAfterScaler[], 4258 bool UnboundedRequestEnabled, 4259 unsigned int CompressedBufferSizeInkByte, 4260 4261 /* Output */ 4262 enum clock_change_support *DRAMClockChangeSupport, 4263 double MaxActiveDRAMClockChangeLatencySupported[], 4264 unsigned int SubViewportLinesNeededInMALL[], 4265 enum dm_fclock_change_support *FCLKChangeSupport, 4266 double *MinActiveFCLKChangeLatencySupported, 4267 bool *USRRetrainingSupport, 4268 double ActiveDRAMClockChangeLatencyMargin[]) 4269 { 4270 unsigned int i, j, k; 4271 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4272 unsigned int DRAMClockChangeSupportNumber = 0; 4273 unsigned int LastSurfaceWithoutMargin; 4274 unsigned int DRAMClockChangeMethod = 0; 4275 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4276 double MinActiveFCLKChangeMargin = 0.; 4277 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4278 double ActiveClockChangeLatencyHidingY; 4279 double ActiveClockChangeLatencyHidingC; 4280 double ActiveClockChangeLatencyHiding; 4281 double EffectiveDETBufferSizeY; 4282 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4283 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4284 double TotalPixelBW = 0.0; 4285 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4286 double EffectiveLBLatencyHidingY; 4287 double EffectiveLBLatencyHidingC; 4288 double LinesInDETY[DC__NUM_DPP__MAX]; 4289 double LinesInDETC[DC__NUM_DPP__MAX]; 4290 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4291 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4292 double FullDETBufferingTimeY; 4293 double FullDETBufferingTimeC; 4294 double WritebackDRAMClockChangeLatencyMargin; 4295 double WritebackFCLKChangeLatencyMargin; 4296 double WritebackLatencyHiding; 4297 bool SameTimingForFCLKChange; 4298 4299 unsigned int TotalActiveWriteback = 0; 4300 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4301 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4302 4303 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4304 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4305 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4306 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; 4307 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; 4308 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4309 + 10 / DCFClkDeepSleep; 4310 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4311 + 10 / DCFClkDeepSleep; 4312 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4313 + 10 / DCFClkDeepSleep; 4314 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4315 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4316 4317 #ifdef __DML_VBA_DEBUG__ 4318 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4319 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4320 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4321 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); 4322 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); 4323 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); 4324 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); 4325 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); 4326 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); 4327 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); 4328 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4329 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); 4330 #endif 4331 4332 4333 TotalActiveWriteback = 0; 4334 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4335 if (v->WritebackEnable[k] == true) 4336 TotalActiveWriteback = TotalActiveWriteback + 1; 4337 } 4338 4339 if (TotalActiveWriteback <= 1) { 4340 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4341 } else { 4342 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4343 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4344 } 4345 if (v->USRRetrainingRequiredFinal) 4346 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4347 + mmSOCParameters.USRRetrainingLatency; 4348 4349 if (TotalActiveWriteback <= 1) { 4350 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4351 + mmSOCParameters.WritebackLatency; 4352 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4353 + mmSOCParameters.WritebackLatency; 4354 } else { 4355 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4356 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4357 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4358 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; 4359 } 4360 4361 if (v->USRRetrainingRequiredFinal) 4362 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4363 + mmSOCParameters.USRRetrainingLatency; 4364 4365 if (v->USRRetrainingRequiredFinal) 4366 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark 4367 + mmSOCParameters.USRRetrainingLatency; 4368 4369 #ifdef __DML_VBA_DEBUG__ 4370 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4371 __func__, v->Watermark.WritebackDRAMClockChangeWatermark); 4372 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); 4373 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); 4374 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); 4375 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4376 #endif 4377 4378 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4379 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + 4380 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); 4381 } 4382 4383 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4384 4385 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 4386 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 4387 4388 4389 #ifdef __DML_VBA_DEBUG__ 4390 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); 4391 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); 4392 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); 4393 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); 4394 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); 4395 #endif 4396 4397 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 4398 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 4399 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4400 4401 if (UnboundedRequestEnabled) { 4402 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4403 + CompressedBufferSizeInkByte * 1024 4404 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) 4405 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 4406 } 4407 4408 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4409 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4410 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 4411 4412 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4413 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; 4414 4415 if (v->NumberOfActiveSurfaces > 1) { 4416 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4417 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] 4418 / v->PixelClock[k] / v->VRatio[k]; 4419 } 4420 4421 if (BytePerPixelDETC[k] > 0) { 4422 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4423 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4424 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) 4425 / v->VRatioChroma[k]; 4426 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4427 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] 4428 / v->PixelClock[k]; 4429 if (v->NumberOfActiveSurfaces > 1) { 4430 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4431 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] 4432 / v->PixelClock[k] / v->VRatioChroma[k]; 4433 } 4434 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4435 ActiveClockChangeLatencyHidingC); 4436 } else { 4437 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4438 } 4439 4440 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4441 - v->Watermark.DRAMClockChangeWatermark; 4442 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4443 - v->Watermark.FCLKChangeWatermark; 4444 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; 4445 4446 if (v->WritebackEnable[k]) { 4447 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 4448 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4449 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 4450 if (v->WritebackPixelFormat[k] == dm_444_64) 4451 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4452 4453 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4454 - v->Watermark.WritebackDRAMClockChangeWatermark; 4455 4456 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4457 - v->Watermark.WritebackFCLKChangeWatermark; 4458 4459 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4460 WritebackFCLKChangeLatencyMargin); 4461 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4462 WritebackDRAMClockChangeLatencyMargin); 4463 } 4464 MaxActiveDRAMClockChangeLatencySupported[k] = 4465 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4466 0 : 4467 (ActiveDRAMClockChangeLatencyMargin[k] 4468 + mmSOCParameters.DRAMClockChangeLatency); 4469 } 4470 4471 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { 4472 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { 4473 if (i == j || 4474 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || 4475 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || 4476 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || 4477 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && 4478 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && 4479 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4480 (v->DRRDisplay[i] || v->DRRDisplay[j]))) { 4481 SynchronizedSurfaces[i][j] = true; 4482 } else { 4483 SynchronizedSurfaces[i][j] = false; 4484 } 4485 } 4486 } 4487 4488 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4489 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4490 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4491 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4492 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4493 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4494 SurfaceWithMinActiveFCLKChangeMargin = k; 4495 } 4496 } 4497 4498 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4499 4500 SameTimingForFCLKChange = true; 4501 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4502 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4503 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4504 (SameTimingForFCLKChange || 4505 ActiveFCLKChangeLatencyMargin[k] < 4506 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4507 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4508 } 4509 SameTimingForFCLKChange = false; 4510 } 4511 } 4512 4513 if (MinActiveFCLKChangeMargin > 0) { 4514 *FCLKChangeSupport = dm_fclock_change_vactive; 4515 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4516 (PrefetchMode <= 1)) { 4517 *FCLKChangeSupport = dm_fclock_change_vblank; 4518 } else { 4519 *FCLKChangeSupport = dm_fclock_change_unsupported; 4520 } 4521 4522 *USRRetrainingSupport = true; 4523 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4524 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4525 (USRRetrainingLatencyMargin[k] < 0)) { 4526 *USRRetrainingSupport = false; 4527 } 4528 } 4529 4530 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4531 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4532 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4533 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4534 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4535 if (PrefetchMode > 0) { 4536 DRAMClockChangeSupportNumber = 2; 4537 } else if (DRAMClockChangeSupportNumber == 0) { 4538 DRAMClockChangeSupportNumber = 1; 4539 LastSurfaceWithoutMargin = k; 4540 } else if (DRAMClockChangeSupportNumber == 1 && 4541 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4542 DRAMClockChangeSupportNumber = 2; 4543 } 4544 } 4545 } 4546 4547 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4548 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4549 DRAMClockChangeMethod = 1; 4550 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4551 DRAMClockChangeMethod = 2; 4552 } 4553 4554 if (DRAMClockChangeMethod == 0) { 4555 if (DRAMClockChangeSupportNumber == 0) 4556 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4557 else if (DRAMClockChangeSupportNumber == 1) 4558 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4559 else 4560 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4561 } else if (DRAMClockChangeMethod == 1) { 4562 if (DRAMClockChangeSupportNumber == 0) 4563 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4564 else if (DRAMClockChangeSupportNumber == 1) 4565 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4566 else 4567 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4568 } else { 4569 if (DRAMClockChangeSupportNumber == 0) 4570 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4571 else if (DRAMClockChangeSupportNumber == 1) 4572 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4573 else 4574 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4575 } 4576 4577 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4578 unsigned int dst_y_pstate; 4579 unsigned int src_y_pstate_l; 4580 unsigned int src_y_pstate_c; 4581 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4582 4583 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); 4584 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); 4585 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4586 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; 4587 4588 #ifdef __DML_VBA_DEBUG__ 4589 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4590 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4591 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4592 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4593 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4594 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4595 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4596 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4597 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); 4598 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4599 #endif 4600 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4601 4602 if (BytePerPixelDETC[k] > 0) { 4603 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); 4604 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4605 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; 4606 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4607 4608 #ifdef __DML_VBA_DEBUG__ 4609 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4610 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4611 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); 4612 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4613 #endif 4614 } 4615 } 4616 #ifdef __DML_VBA_DEBUG__ 4617 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4618 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4619 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4620 __func__, *MinActiveFCLKChangeLatencySupported); 4621 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4622 #endif 4623 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4624 4625 double dml32_CalculateWriteBackDISPCLK( 4626 enum source_format_class WritebackPixelFormat, 4627 double PixelClock, 4628 double WritebackHRatio, 4629 double WritebackVRatio, 4630 unsigned int WritebackHTaps, 4631 unsigned int WritebackVTaps, 4632 unsigned int WritebackSourceWidth, 4633 unsigned int WritebackDestinationWidth, 4634 unsigned int HTotal, 4635 unsigned int WritebackLineBufferSize, 4636 double DISPCLKDPPCLKVCOSpeed) 4637 { 4638 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4639 4640 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4641 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4642 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4643 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4644 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4645 } 4646 4647 void dml32_CalculateMinAndMaxPrefetchMode( 4648 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4649 unsigned int *MinPrefetchMode, 4650 unsigned int *MaxPrefetchMode) 4651 { 4652 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4653 *MinPrefetchMode = 3; 4654 *MaxPrefetchMode = 3; 4655 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4656 *MinPrefetchMode = 2; 4657 *MaxPrefetchMode = 2; 4658 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4659 *MinPrefetchMode = 1; 4660 *MaxPrefetchMode = 1; 4661 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4662 *MinPrefetchMode = 0; 4663 *MaxPrefetchMode = 0; 4664 } else { 4665 *MinPrefetchMode = 0; 4666 *MaxPrefetchMode = 3; 4667 } 4668 } // CalculateMinAndMaxPrefetchMode 4669 4670 void dml32_CalculatePixelDeliveryTimes( 4671 unsigned int NumberOfActiveSurfaces, 4672 double VRatio[], 4673 double VRatioChroma[], 4674 double VRatioPrefetchY[], 4675 double VRatioPrefetchC[], 4676 unsigned int swath_width_luma_ub[], 4677 unsigned int swath_width_chroma_ub[], 4678 unsigned int DPPPerSurface[], 4679 double HRatio[], 4680 double HRatioChroma[], 4681 double PixelClock[], 4682 double PSCL_THROUGHPUT[], 4683 double PSCL_THROUGHPUT_CHROMA[], 4684 double Dppclk[], 4685 unsigned int BytePerPixelC[], 4686 enum dm_rotation_angle SourceRotation[], 4687 unsigned int NumberOfCursors[], 4688 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4689 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4690 unsigned int BlockWidth256BytesY[], 4691 unsigned int BlockHeight256BytesY[], 4692 unsigned int BlockWidth256BytesC[], 4693 unsigned int BlockHeight256BytesC[], 4694 4695 /* Output */ 4696 double DisplayPipeLineDeliveryTimeLuma[], 4697 double DisplayPipeLineDeliveryTimeChroma[], 4698 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4699 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4700 double DisplayPipeRequestDeliveryTimeLuma[], 4701 double DisplayPipeRequestDeliveryTimeChroma[], 4702 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4703 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4704 double CursorRequestDeliveryTime[], 4705 double CursorRequestDeliveryTimePrefetch[]) 4706 { 4707 double req_per_swath_ub; 4708 unsigned int k; 4709 4710 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4711 4712 #ifdef __DML_VBA_DEBUG__ 4713 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4714 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4715 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4716 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4717 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4718 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4719 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4720 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4721 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4722 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4723 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4724 #endif 4725 4726 if (VRatio[k] <= 1) { 4727 DisplayPipeLineDeliveryTimeLuma[k] = 4728 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4729 } else { 4730 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4731 } 4732 4733 if (BytePerPixelC[k] == 0) { 4734 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4735 } else { 4736 if (VRatioChroma[k] <= 1) { 4737 DisplayPipeLineDeliveryTimeChroma[k] = 4738 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4739 } else { 4740 DisplayPipeLineDeliveryTimeChroma[k] = 4741 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4742 } 4743 } 4744 4745 if (VRatioPrefetchY[k] <= 1) { 4746 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4747 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4748 } else { 4749 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4750 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4751 } 4752 4753 if (BytePerPixelC[k] == 0) { 4754 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4755 } else { 4756 if (VRatioPrefetchC[k] <= 1) { 4757 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4758 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4759 } else { 4760 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4761 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4762 } 4763 } 4764 #ifdef __DML_VBA_DEBUG__ 4765 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4766 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4767 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4768 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4769 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4770 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4771 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4772 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4773 #endif 4774 } 4775 4776 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4777 if (!IsVertical(SourceRotation[k])) 4778 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4779 else 4780 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4781 #ifdef __DML_VBA_DEBUG__ 4782 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4783 #endif 4784 4785 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4786 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4787 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4788 if (BytePerPixelC[k] == 0) { 4789 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4790 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4791 } else { 4792 if (!IsVertical(SourceRotation[k])) 4793 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4794 else 4795 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4796 #ifdef __DML_VBA_DEBUG__ 4797 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4798 #endif 4799 DisplayPipeRequestDeliveryTimeChroma[k] = 4800 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4801 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4802 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4803 } 4804 #ifdef __DML_VBA_DEBUG__ 4805 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4806 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4807 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4808 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4809 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4810 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4811 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4812 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4813 #endif 4814 } 4815 4816 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4817 unsigned int cursor_req_per_width; 4818 4819 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4820 256.0 / 8.0, 1.0); 4821 if (NumberOfCursors[k] > 0) { 4822 if (VRatio[k] <= 1) { 4823 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4824 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4825 } else { 4826 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4827 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4828 } 4829 if (VRatioPrefetchY[k] <= 1) { 4830 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4831 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4832 } else { 4833 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4834 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4835 } 4836 } else { 4837 CursorRequestDeliveryTime[k] = 0; 4838 CursorRequestDeliveryTimePrefetch[k] = 0; 4839 } 4840 #ifdef __DML_VBA_DEBUG__ 4841 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4842 __func__, k, NumberOfCursors[k]); 4843 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4844 __func__, k, CursorRequestDeliveryTime[k]); 4845 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4846 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4847 #endif 4848 } 4849 } // CalculatePixelDeliveryTimes 4850 4851 void dml32_CalculateMetaAndPTETimes( 4852 bool use_one_row_for_frame[], 4853 unsigned int NumberOfActiveSurfaces, 4854 bool GPUVMEnable, 4855 unsigned int MetaChunkSize, 4856 unsigned int MinMetaChunkSizeBytes, 4857 unsigned int HTotal[], 4858 double VRatio[], 4859 double VRatioChroma[], 4860 double DestinationLinesToRequestRowInVBlank[], 4861 double DestinationLinesToRequestRowInImmediateFlip[], 4862 bool DCCEnable[], 4863 double PixelClock[], 4864 unsigned int BytePerPixelY[], 4865 unsigned int BytePerPixelC[], 4866 enum dm_rotation_angle SourceRotation[], 4867 unsigned int dpte_row_height[], 4868 unsigned int dpte_row_height_chroma[], 4869 unsigned int meta_row_width[], 4870 unsigned int meta_row_width_chroma[], 4871 unsigned int meta_row_height[], 4872 unsigned int meta_row_height_chroma[], 4873 unsigned int meta_req_width[], 4874 unsigned int meta_req_width_chroma[], 4875 unsigned int meta_req_height[], 4876 unsigned int meta_req_height_chroma[], 4877 unsigned int dpte_group_bytes[], 4878 unsigned int PTERequestSizeY[], 4879 unsigned int PTERequestSizeC[], 4880 unsigned int PixelPTEReqWidthY[], 4881 unsigned int PixelPTEReqHeightY[], 4882 unsigned int PixelPTEReqWidthC[], 4883 unsigned int PixelPTEReqHeightC[], 4884 unsigned int dpte_row_width_luma_ub[], 4885 unsigned int dpte_row_width_chroma_ub[], 4886 4887 /* Output */ 4888 double DST_Y_PER_PTE_ROW_NOM_L[], 4889 double DST_Y_PER_PTE_ROW_NOM_C[], 4890 double DST_Y_PER_META_ROW_NOM_L[], 4891 double DST_Y_PER_META_ROW_NOM_C[], 4892 double TimePerMetaChunkNominal[], 4893 double TimePerChromaMetaChunkNominal[], 4894 double TimePerMetaChunkVBlank[], 4895 double TimePerChromaMetaChunkVBlank[], 4896 double TimePerMetaChunkFlip[], 4897 double TimePerChromaMetaChunkFlip[], 4898 double time_per_pte_group_nom_luma[], 4899 double time_per_pte_group_vblank_luma[], 4900 double time_per_pte_group_flip_luma[], 4901 double time_per_pte_group_nom_chroma[], 4902 double time_per_pte_group_vblank_chroma[], 4903 double time_per_pte_group_flip_chroma[]) 4904 { 4905 unsigned int meta_chunk_width; 4906 unsigned int min_meta_chunk_width; 4907 unsigned int meta_chunk_per_row_int; 4908 unsigned int meta_row_remainder; 4909 unsigned int meta_chunk_threshold; 4910 unsigned int meta_chunks_per_row_ub; 4911 unsigned int meta_chunk_width_chroma; 4912 unsigned int min_meta_chunk_width_chroma; 4913 unsigned int meta_chunk_per_row_int_chroma; 4914 unsigned int meta_row_remainder_chroma; 4915 unsigned int meta_chunk_threshold_chroma; 4916 unsigned int meta_chunks_per_row_ub_chroma; 4917 unsigned int dpte_group_width_luma; 4918 unsigned int dpte_groups_per_row_luma_ub; 4919 unsigned int dpte_group_width_chroma; 4920 unsigned int dpte_groups_per_row_chroma_ub; 4921 unsigned int k; 4922 4923 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4924 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4925 if (BytePerPixelC[k] == 0) 4926 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4927 else 4928 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4929 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4930 if (BytePerPixelC[k] == 0) 4931 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4932 else 4933 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4934 } 4935 4936 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4937 if (DCCEnable[k] == true) { 4938 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4939 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4940 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4941 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4942 if (!IsVertical(SourceRotation[k])) 4943 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4944 else 4945 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4946 4947 if (meta_row_remainder <= meta_chunk_threshold) 4948 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4949 else 4950 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4951 4952 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4953 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4954 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4955 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4956 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4957 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4958 if (BytePerPixelC[k] == 0) { 4959 TimePerChromaMetaChunkNominal[k] = 0; 4960 TimePerChromaMetaChunkVBlank[k] = 0; 4961 TimePerChromaMetaChunkFlip[k] = 0; 4962 } else { 4963 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4964 meta_row_height_chroma[k]; 4965 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4966 meta_row_height_chroma[k]; 4967 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4968 meta_chunk_width_chroma; 4969 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4970 if (!IsVertical(SourceRotation[k])) { 4971 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4972 meta_req_width_chroma[k]; 4973 } else { 4974 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4975 meta_req_height_chroma[k]; 4976 } 4977 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4978 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4979 else 4980 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4981 4982 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4983 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4984 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4985 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4986 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4987 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4988 } 4989 } else { 4990 TimePerMetaChunkNominal[k] = 0; 4991 TimePerMetaChunkVBlank[k] = 0; 4992 TimePerMetaChunkFlip[k] = 0; 4993 TimePerChromaMetaChunkNominal[k] = 0; 4994 TimePerChromaMetaChunkVBlank[k] = 0; 4995 TimePerChromaMetaChunkFlip[k] = 0; 4996 } 4997 } 4998 4999 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5000 if (GPUVMEnable == true) { 5001 if (!IsVertical(SourceRotation[k])) { 5002 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5003 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 5004 } else { 5005 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5006 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 5007 } 5008 5009 if (use_one_row_for_frame[k]) { 5010 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5011 (double) dpte_group_width_luma / 2.0, 1.0); 5012 } else { 5013 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5014 (double) dpte_group_width_luma, 1.0); 5015 } 5016 #ifdef __DML_VBA_DEBUG__ 5017 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5018 __func__, k, use_one_row_for_frame[k]); 5019 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5020 __func__, k, dpte_group_bytes[k]); 5021 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5022 __func__, k, PTERequestSizeY[k]); 5023 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5024 __func__, k, PixelPTEReqWidthY[k]); 5025 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5026 __func__, k, PixelPTEReqHeightY[k]); 5027 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5028 __func__, k, dpte_row_width_luma_ub[k]); 5029 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5030 __func__, k, dpte_group_width_luma); 5031 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5032 __func__, k, dpte_groups_per_row_luma_ub); 5033 #endif 5034 5035 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5036 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5037 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5038 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5039 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5040 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5041 if (BytePerPixelC[k] == 0) { 5042 time_per_pte_group_nom_chroma[k] = 0; 5043 time_per_pte_group_vblank_chroma[k] = 0; 5044 time_per_pte_group_flip_chroma[k] = 0; 5045 } else { 5046 if (!IsVertical(SourceRotation[k])) { 5047 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5048 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5049 } else { 5050 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5051 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5052 } 5053 5054 if (use_one_row_for_frame[k]) { 5055 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5056 (double) dpte_group_width_chroma / 2.0, 1.0); 5057 } else { 5058 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5059 (double) dpte_group_width_chroma, 1.0); 5060 } 5061 #ifdef __DML_VBA_DEBUG__ 5062 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5063 __func__, k, dpte_row_width_chroma_ub[k]); 5064 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5065 __func__, k, dpte_group_width_chroma); 5066 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5067 __func__, k, dpte_groups_per_row_chroma_ub); 5068 #endif 5069 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5070 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5071 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5072 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5073 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5074 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5075 } 5076 } else { 5077 time_per_pte_group_nom_luma[k] = 0; 5078 time_per_pte_group_vblank_luma[k] = 0; 5079 time_per_pte_group_flip_luma[k] = 0; 5080 time_per_pte_group_nom_chroma[k] = 0; 5081 time_per_pte_group_vblank_chroma[k] = 0; 5082 time_per_pte_group_flip_chroma[k] = 0; 5083 } 5084 #ifdef __DML_VBA_DEBUG__ 5085 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5086 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5087 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5088 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5089 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5090 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5091 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5092 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5093 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5094 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5095 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5096 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5097 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5098 __func__, k, TimePerMetaChunkNominal[k]); 5099 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5100 __func__, k, TimePerMetaChunkVBlank[k]); 5101 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5102 __func__, k, TimePerMetaChunkFlip[k]); 5103 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5104 __func__, k, TimePerChromaMetaChunkNominal[k]); 5105 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5106 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5107 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5108 __func__, k, TimePerChromaMetaChunkFlip[k]); 5109 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5110 __func__, k, time_per_pte_group_nom_luma[k]); 5111 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5112 __func__, k, time_per_pte_group_vblank_luma[k]); 5113 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5114 __func__, k, time_per_pte_group_flip_luma[k]); 5115 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5116 __func__, k, time_per_pte_group_nom_chroma[k]); 5117 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5118 __func__, k, time_per_pte_group_vblank_chroma[k]); 5119 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5120 __func__, k, time_per_pte_group_flip_chroma[k]); 5121 #endif 5122 } 5123 } // CalculateMetaAndPTETimes 5124 5125 void dml32_CalculateVMGroupAndRequestTimes( 5126 unsigned int NumberOfActiveSurfaces, 5127 bool GPUVMEnable, 5128 unsigned int GPUVMMaxPageTableLevels, 5129 unsigned int HTotal[], 5130 unsigned int BytePerPixelC[], 5131 double DestinationLinesToRequestVMInVBlank[], 5132 double DestinationLinesToRequestVMInImmediateFlip[], 5133 bool DCCEnable[], 5134 double PixelClock[], 5135 unsigned int dpte_row_width_luma_ub[], 5136 unsigned int dpte_row_width_chroma_ub[], 5137 unsigned int vm_group_bytes[], 5138 unsigned int dpde0_bytes_per_frame_ub_l[], 5139 unsigned int dpde0_bytes_per_frame_ub_c[], 5140 unsigned int meta_pte_bytes_per_frame_ub_l[], 5141 unsigned int meta_pte_bytes_per_frame_ub_c[], 5142 5143 /* Output */ 5144 double TimePerVMGroupVBlank[], 5145 double TimePerVMGroupFlip[], 5146 double TimePerVMRequestVBlank[], 5147 double TimePerVMRequestFlip[]) 5148 { 5149 unsigned int k; 5150 unsigned int num_group_per_lower_vm_stage; 5151 unsigned int num_req_per_lower_vm_stage; 5152 5153 #ifdef __DML_VBA_DEBUG__ 5154 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5155 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5156 #endif 5157 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5158 5159 #ifdef __DML_VBA_DEBUG__ 5160 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5161 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5162 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5163 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5164 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5165 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5166 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5167 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5168 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5169 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5170 #endif 5171 5172 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5173 if (DCCEnable[k] == false) { 5174 if (BytePerPixelC[k] > 0) { 5175 num_group_per_lower_vm_stage = dml_ceil( 5176 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5177 (double) (vm_group_bytes[k]), 1.0) + 5178 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5179 (double) (vm_group_bytes[k]), 1.0); 5180 } else { 5181 num_group_per_lower_vm_stage = dml_ceil( 5182 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5183 (double) (vm_group_bytes[k]), 1.0); 5184 } 5185 } else { 5186 if (GPUVMMaxPageTableLevels == 1) { 5187 if (BytePerPixelC[k] > 0) { 5188 num_group_per_lower_vm_stage = dml_ceil( 5189 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5190 (double) (vm_group_bytes[k]), 1.0) + 5191 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5192 (double) (vm_group_bytes[k]), 1.0); 5193 } else { 5194 num_group_per_lower_vm_stage = dml_ceil( 5195 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5196 (double) (vm_group_bytes[k]), 1.0); 5197 } 5198 } else { 5199 if (BytePerPixelC[k] > 0) { 5200 num_group_per_lower_vm_stage = 2 + dml_ceil( 5201 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5202 (double) (vm_group_bytes[k]), 1) + 5203 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5204 (double) (vm_group_bytes[k]), 1) + 5205 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5206 (double) (vm_group_bytes[k]), 1) + 5207 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5208 (double) (vm_group_bytes[k]), 1); 5209 } else { 5210 num_group_per_lower_vm_stage = 1 + dml_ceil( 5211 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5212 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5213 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5214 (double) (vm_group_bytes[k]), 1); 5215 } 5216 } 5217 } 5218 5219 if (DCCEnable[k] == false) { 5220 if (BytePerPixelC[k] > 0) { 5221 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5222 dpde0_bytes_per_frame_ub_c[k] / 64; 5223 } else { 5224 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5225 } 5226 } else { 5227 if (GPUVMMaxPageTableLevels == 1) { 5228 if (BytePerPixelC[k] > 0) { 5229 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5230 meta_pte_bytes_per_frame_ub_c[k] / 64; 5231 } else { 5232 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5233 } 5234 } else { 5235 if (BytePerPixelC[k] > 0) { 5236 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5237 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5238 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5239 meta_pte_bytes_per_frame_ub_c[k] / 64; 5240 } else { 5241 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5242 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5243 } 5244 } 5245 } 5246 5247 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5248 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5249 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5250 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5251 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5252 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5253 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5254 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5255 5256 if (GPUVMMaxPageTableLevels > 2) { 5257 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5258 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5259 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5260 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5261 } 5262 5263 } else { 5264 TimePerVMGroupVBlank[k] = 0; 5265 TimePerVMGroupFlip[k] = 0; 5266 TimePerVMRequestVBlank[k] = 0; 5267 TimePerVMRequestFlip[k] = 0; 5268 } 5269 5270 #ifdef __DML_VBA_DEBUG__ 5271 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5272 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5273 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5274 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5275 #endif 5276 } 5277 } // CalculateVMGroupAndRequestTimes 5278 5279 void dml32_CalculateDCCConfiguration( 5280 bool DCCEnabled, 5281 bool DCCProgrammingAssumesScanDirectionUnknown, 5282 enum source_format_class SourcePixelFormat, 5283 unsigned int SurfaceWidthLuma, 5284 unsigned int SurfaceWidthChroma, 5285 unsigned int SurfaceHeightLuma, 5286 unsigned int SurfaceHeightChroma, 5287 unsigned int nomDETInKByte, 5288 unsigned int RequestHeight256ByteLuma, 5289 unsigned int RequestHeight256ByteChroma, 5290 enum dm_swizzle_mode TilingFormat, 5291 unsigned int BytePerPixelY, 5292 unsigned int BytePerPixelC, 5293 double BytePerPixelDETY, 5294 double BytePerPixelDETC, 5295 enum dm_rotation_angle SourceRotation, 5296 /* Output */ 5297 unsigned int *MaxUncompressedBlockLuma, 5298 unsigned int *MaxUncompressedBlockChroma, 5299 unsigned int *MaxCompressedBlockLuma, 5300 unsigned int *MaxCompressedBlockChroma, 5301 unsigned int *IndependentBlockLuma, 5302 unsigned int *IndependentBlockChroma) 5303 { 5304 typedef enum { 5305 REQ_256Bytes, 5306 REQ_128BytesNonContiguous, 5307 REQ_128BytesContiguous, 5308 REQ_NA 5309 } RequestType; 5310 5311 RequestType RequestLuma; 5312 RequestType RequestChroma; 5313 5314 unsigned int segment_order_horz_contiguous_luma; 5315 unsigned int segment_order_horz_contiguous_chroma; 5316 unsigned int segment_order_vert_contiguous_luma; 5317 unsigned int segment_order_vert_contiguous_chroma; 5318 unsigned int req128_horz_wc_l; 5319 unsigned int req128_horz_wc_c; 5320 unsigned int req128_vert_wc_l; 5321 unsigned int req128_vert_wc_c; 5322 unsigned int MAS_vp_horz_limit; 5323 unsigned int MAS_vp_vert_limit; 5324 unsigned int max_vp_horz_width; 5325 unsigned int max_vp_vert_height; 5326 unsigned int eff_surf_width_l; 5327 unsigned int eff_surf_width_c; 5328 unsigned int eff_surf_height_l; 5329 unsigned int eff_surf_height_c; 5330 unsigned int full_swath_bytes_horz_wc_l; 5331 unsigned int full_swath_bytes_horz_wc_c; 5332 unsigned int full_swath_bytes_vert_wc_l; 5333 unsigned int full_swath_bytes_vert_wc_c; 5334 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5335 5336 unsigned int yuv420; 5337 unsigned int horz_div_l; 5338 unsigned int horz_div_c; 5339 unsigned int vert_div_l; 5340 unsigned int vert_div_c; 5341 5342 unsigned int swath_buf_size; 5343 double detile_buf_vp_horz_limit; 5344 double detile_buf_vp_vert_limit; 5345 5346 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5347 SourcePixelFormat == dm_420_12) ? 1 : 0); 5348 horz_div_l = 1; 5349 horz_div_c = 1; 5350 vert_div_l = 1; 5351 vert_div_c = 1; 5352 5353 if (BytePerPixelY == 1) 5354 vert_div_l = 0; 5355 if (BytePerPixelC == 1) 5356 vert_div_c = 0; 5357 5358 if (BytePerPixelC == 0) { 5359 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5360 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5361 BytePerPixelY / (1 + horz_div_l)); 5362 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5363 (1 + vert_div_l)); 5364 } else { 5365 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5366 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5367 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5368 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5369 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5370 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5371 (1 + vert_div_c) / (1 + yuv420)); 5372 } 5373 5374 if (SourcePixelFormat == dm_420_10) { 5375 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5376 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5377 } 5378 5379 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5380 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5381 5382 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5383 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5384 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5385 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5386 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5387 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5388 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5389 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5390 5391 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5392 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5393 if (BytePerPixelC > 0) { 5394 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5395 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5396 } else { 5397 full_swath_bytes_horz_wc_c = 0; 5398 full_swath_bytes_vert_wc_c = 0; 5399 } 5400 5401 if (SourcePixelFormat == dm_420_10) { 5402 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5403 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5404 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5405 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5406 } 5407 5408 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5409 req128_horz_wc_l = 0; 5410 req128_horz_wc_c = 0; 5411 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5412 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5413 req128_horz_wc_l = 0; 5414 req128_horz_wc_c = 1; 5415 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5416 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5417 req128_horz_wc_l = 1; 5418 req128_horz_wc_c = 0; 5419 } else { 5420 req128_horz_wc_l = 1; 5421 req128_horz_wc_c = 1; 5422 } 5423 5424 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5425 req128_vert_wc_l = 0; 5426 req128_vert_wc_c = 0; 5427 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5428 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5429 req128_vert_wc_l = 0; 5430 req128_vert_wc_c = 1; 5431 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5432 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5433 req128_vert_wc_l = 1; 5434 req128_vert_wc_c = 0; 5435 } else { 5436 req128_vert_wc_l = 1; 5437 req128_vert_wc_c = 1; 5438 } 5439 5440 if (BytePerPixelY == 2) { 5441 segment_order_horz_contiguous_luma = 0; 5442 segment_order_vert_contiguous_luma = 1; 5443 } else { 5444 segment_order_horz_contiguous_luma = 1; 5445 segment_order_vert_contiguous_luma = 0; 5446 } 5447 5448 if (BytePerPixelC == 2) { 5449 segment_order_horz_contiguous_chroma = 0; 5450 segment_order_vert_contiguous_chroma = 1; 5451 } else { 5452 segment_order_horz_contiguous_chroma = 1; 5453 segment_order_vert_contiguous_chroma = 0; 5454 } 5455 #ifdef __DML_VBA_DEBUG__ 5456 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5457 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5458 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5459 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5460 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5461 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5462 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5463 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5464 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5465 __func__, segment_order_horz_contiguous_chroma); 5466 #endif 5467 5468 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5469 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5470 RequestLuma = REQ_256Bytes; 5471 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5472 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5473 RequestLuma = REQ_128BytesNonContiguous; 5474 else 5475 RequestLuma = REQ_128BytesContiguous; 5476 5477 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5478 RequestChroma = REQ_256Bytes; 5479 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5480 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5481 RequestChroma = REQ_128BytesNonContiguous; 5482 else 5483 RequestChroma = REQ_128BytesContiguous; 5484 5485 } else if (!IsVertical(SourceRotation)) { 5486 if (req128_horz_wc_l == 0) 5487 RequestLuma = REQ_256Bytes; 5488 else if (segment_order_horz_contiguous_luma == 0) 5489 RequestLuma = REQ_128BytesNonContiguous; 5490 else 5491 RequestLuma = REQ_128BytesContiguous; 5492 5493 if (req128_horz_wc_c == 0) 5494 RequestChroma = REQ_256Bytes; 5495 else if (segment_order_horz_contiguous_chroma == 0) 5496 RequestChroma = REQ_128BytesNonContiguous; 5497 else 5498 RequestChroma = REQ_128BytesContiguous; 5499 5500 } else { 5501 if (req128_vert_wc_l == 0) 5502 RequestLuma = REQ_256Bytes; 5503 else if (segment_order_vert_contiguous_luma == 0) 5504 RequestLuma = REQ_128BytesNonContiguous; 5505 else 5506 RequestLuma = REQ_128BytesContiguous; 5507 5508 if (req128_vert_wc_c == 0) 5509 RequestChroma = REQ_256Bytes; 5510 else if (segment_order_vert_contiguous_chroma == 0) 5511 RequestChroma = REQ_128BytesNonContiguous; 5512 else 5513 RequestChroma = REQ_128BytesContiguous; 5514 } 5515 5516 if (RequestLuma == REQ_256Bytes) { 5517 *MaxUncompressedBlockLuma = 256; 5518 *MaxCompressedBlockLuma = 256; 5519 *IndependentBlockLuma = 0; 5520 } else if (RequestLuma == REQ_128BytesContiguous) { 5521 *MaxUncompressedBlockLuma = 256; 5522 *MaxCompressedBlockLuma = 128; 5523 *IndependentBlockLuma = 128; 5524 } else { 5525 *MaxUncompressedBlockLuma = 256; 5526 *MaxCompressedBlockLuma = 64; 5527 *IndependentBlockLuma = 64; 5528 } 5529 5530 if (RequestChroma == REQ_256Bytes) { 5531 *MaxUncompressedBlockChroma = 256; 5532 *MaxCompressedBlockChroma = 256; 5533 *IndependentBlockChroma = 0; 5534 } else if (RequestChroma == REQ_128BytesContiguous) { 5535 *MaxUncompressedBlockChroma = 256; 5536 *MaxCompressedBlockChroma = 128; 5537 *IndependentBlockChroma = 128; 5538 } else { 5539 *MaxUncompressedBlockChroma = 256; 5540 *MaxCompressedBlockChroma = 64; 5541 *IndependentBlockChroma = 64; 5542 } 5543 5544 if (DCCEnabled != true || BytePerPixelC == 0) { 5545 *MaxUncompressedBlockChroma = 0; 5546 *MaxCompressedBlockChroma = 0; 5547 *IndependentBlockChroma = 0; 5548 } 5549 5550 if (DCCEnabled != true) { 5551 *MaxUncompressedBlockLuma = 0; 5552 *MaxCompressedBlockLuma = 0; 5553 *IndependentBlockLuma = 0; 5554 } 5555 5556 #ifdef __DML_VBA_DEBUG__ 5557 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5558 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5559 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5560 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5561 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5562 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5563 #endif 5564 5565 } // CalculateDCCConfiguration 5566 5567 void dml32_CalculateStutterEfficiency( 5568 unsigned int CompressedBufferSizeInkByte, 5569 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5570 bool UnboundedRequestEnabled, 5571 unsigned int MetaFIFOSizeInKEntries, 5572 unsigned int ZeroSizeBufferEntries, 5573 unsigned int PixelChunkSizeInKByte, 5574 unsigned int NumberOfActiveSurfaces, 5575 unsigned int ROBBufferSizeInKByte, 5576 double TotalDataReadBandwidth, 5577 double DCFCLK, 5578 double ReturnBW, 5579 unsigned int CompbufReservedSpace64B, 5580 unsigned int CompbufReservedSpaceZs, 5581 double SRExitTime, 5582 double SRExitZ8Time, 5583 bool SynchronizeTimingsFinal, 5584 unsigned int BlendingAndTiming[], 5585 double StutterEnterPlusExitWatermark, 5586 double Z8StutterEnterPlusExitWatermark, 5587 bool ProgressiveToInterlaceUnitInOPP, 5588 bool Interlace[], 5589 double MinTTUVBlank[], 5590 unsigned int DPPPerSurface[], 5591 unsigned int DETBufferSizeY[], 5592 unsigned int BytePerPixelY[], 5593 double BytePerPixelDETY[], 5594 double SwathWidthY[], 5595 unsigned int SwathHeightY[], 5596 unsigned int SwathHeightC[], 5597 double NetDCCRateLuma[], 5598 double NetDCCRateChroma[], 5599 double DCCFractionOfZeroSizeRequestsLuma[], 5600 double DCCFractionOfZeroSizeRequestsChroma[], 5601 unsigned int HTotal[], 5602 unsigned int VTotal[], 5603 double PixelClock[], 5604 double VRatio[], 5605 enum dm_rotation_angle SourceRotation[], 5606 unsigned int BlockHeight256BytesY[], 5607 unsigned int BlockWidth256BytesY[], 5608 unsigned int BlockHeight256BytesC[], 5609 unsigned int BlockWidth256BytesC[], 5610 unsigned int DCCYMaxUncompressedBlock[], 5611 unsigned int DCCCMaxUncompressedBlock[], 5612 unsigned int VActive[], 5613 bool DCCEnable[], 5614 bool WritebackEnable[], 5615 double ReadBandwidthSurfaceLuma[], 5616 double ReadBandwidthSurfaceChroma[], 5617 double meta_row_bw[], 5618 double dpte_row_bw[], 5619 5620 /* Output */ 5621 double *StutterEfficiencyNotIncludingVBlank, 5622 double *StutterEfficiency, 5623 unsigned int *NumberOfStutterBurstsPerFrame, 5624 double *Z8StutterEfficiencyNotIncludingVBlank, 5625 double *Z8StutterEfficiency, 5626 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5627 double *StutterPeriod, 5628 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5629 { 5630 5631 bool FoundCriticalSurface = false; 5632 unsigned int SwathSizeCriticalSurface = 0; 5633 unsigned int LastChunkOfSwathSize; 5634 unsigned int MissingPartOfLastSwathOfDETSize; 5635 double LastZ8StutterPeriod = 0.0; 5636 double LastStutterPeriod = 0.0; 5637 unsigned int TotalNumberOfActiveOTG = 0; 5638 double doublePixelClock; 5639 unsigned int doubleHTotal; 5640 unsigned int doubleVTotal; 5641 bool SameTiming = true; 5642 double DETBufferingTimeY; 5643 double SwathWidthYCriticalSurface = 0.0; 5644 double SwathHeightYCriticalSurface = 0.0; 5645 double VActiveTimeCriticalSurface = 0.0; 5646 double FrameTimeCriticalSurface = 0.0; 5647 unsigned int BytePerPixelYCriticalSurface = 0; 5648 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5649 unsigned int DETBufferSizeYCriticalSurface = 0; 5650 double MinTTUVBlankCriticalSurface = 0.0; 5651 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5652 bool doublePlaneCriticalSurface = 0; 5653 bool doublePipeCriticalSurface = 0; 5654 double TotalCompressedReadBandwidth; 5655 double TotalRowReadBandwidth; 5656 double AverageDCCCompressionRate; 5657 double EffectiveCompressedBufferSize; 5658 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5659 double StutterBurstTime; 5660 unsigned int TotalActiveWriteback; 5661 double LinesInDETY; 5662 double LinesInDETYRoundedDownToSwath; 5663 double MaximumEffectiveCompressionLuma; 5664 double MaximumEffectiveCompressionChroma; 5665 double TotalZeroSizeRequestReadBandwidth; 5666 double TotalZeroSizeCompressedReadBandwidth; 5667 double AverageDCCZeroSizeFraction; 5668 double AverageZeroSizeCompressionRate; 5669 unsigned int k; 5670 5671 TotalZeroSizeRequestReadBandwidth = 0; 5672 TotalZeroSizeCompressedReadBandwidth = 0; 5673 TotalRowReadBandwidth = 0; 5674 TotalCompressedReadBandwidth = 0; 5675 5676 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5677 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5678 if (DCCEnable[k] == true) { 5679 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5680 || (!IsVertical(SourceRotation[k]) 5681 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5682 || DCCYMaxUncompressedBlock[k] < 256) { 5683 MaximumEffectiveCompressionLuma = 2; 5684 } else { 5685 MaximumEffectiveCompressionLuma = 4; 5686 } 5687 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5688 + ReadBandwidthSurfaceLuma[k] 5689 / dml_min(NetDCCRateLuma[k], 5690 MaximumEffectiveCompressionLuma); 5691 #ifdef __DML_VBA_DEBUG__ 5692 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5693 __func__, k, ReadBandwidthSurfaceLuma[k]); 5694 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5695 __func__, k, NetDCCRateLuma[k]); 5696 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5697 __func__, k, MaximumEffectiveCompressionLuma); 5698 #endif 5699 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5700 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5701 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5702 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5703 / MaximumEffectiveCompressionLuma; 5704 5705 if (ReadBandwidthSurfaceChroma[k] > 0) { 5706 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5707 || (!IsVertical(SourceRotation[k]) 5708 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5709 || DCCCMaxUncompressedBlock[k] < 256) { 5710 MaximumEffectiveCompressionChroma = 2; 5711 } else { 5712 MaximumEffectiveCompressionChroma = 4; 5713 } 5714 TotalCompressedReadBandwidth = 5715 TotalCompressedReadBandwidth 5716 + ReadBandwidthSurfaceChroma[k] 5717 / dml_min(NetDCCRateChroma[k], 5718 MaximumEffectiveCompressionChroma); 5719 #ifdef __DML_VBA_DEBUG__ 5720 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5721 __func__, k, ReadBandwidthSurfaceChroma[k]); 5722 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5723 __func__, k, NetDCCRateChroma[k]); 5724 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5725 __func__, k, MaximumEffectiveCompressionChroma); 5726 #endif 5727 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5728 + ReadBandwidthSurfaceChroma[k] 5729 * DCCFractionOfZeroSizeRequestsChroma[k]; 5730 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5731 + ReadBandwidthSurfaceChroma[k] 5732 * DCCFractionOfZeroSizeRequestsChroma[k] 5733 / MaximumEffectiveCompressionChroma; 5734 } 5735 } else { 5736 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5737 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5738 } 5739 TotalRowReadBandwidth = TotalRowReadBandwidth 5740 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5741 } 5742 } 5743 5744 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5745 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5746 5747 #ifdef __DML_VBA_DEBUG__ 5748 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5749 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5750 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5751 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5752 __func__, TotalZeroSizeCompressedReadBandwidth); 5753 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5754 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5755 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5756 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5757 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5758 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5759 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5760 #endif 5761 if (AverageDCCZeroSizeFraction == 1) { 5762 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5763 / TotalZeroSizeCompressedReadBandwidth; 5764 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5765 * AverageZeroSizeCompressionRate 5766 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5767 * AverageZeroSizeCompressionRate; 5768 } else if (AverageDCCZeroSizeFraction > 0) { 5769 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5770 / TotalZeroSizeCompressedReadBandwidth; 5771 EffectiveCompressedBufferSize = dml_min( 5772 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5773 (double) MetaFIFOSizeInKEntries * 1024 * 64 5774 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5775 + 1 / AverageDCCCompressionRate)) 5776 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5777 * AverageDCCCompressionRate, 5778 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5779 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5780 5781 #ifdef __DML_VBA_DEBUG__ 5782 dml_print("DML::%s: min 1 = %f\n", __func__, 5783 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5784 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5785 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5786 AverageDCCCompressionRate)); 5787 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5788 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5789 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5790 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5791 #endif 5792 } else { 5793 EffectiveCompressedBufferSize = dml_min( 5794 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5795 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5796 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5797 * AverageDCCCompressionRate; 5798 5799 #ifdef __DML_VBA_DEBUG__ 5800 dml_print("DML::%s: min 1 = %f\n", __func__, 5801 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5802 dml_print("DML::%s: min 2 = %f\n", __func__, 5803 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5804 #endif 5805 } 5806 5807 #ifdef __DML_VBA_DEBUG__ 5808 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5809 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5810 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5811 #endif 5812 5813 *StutterPeriod = 0; 5814 5815 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5816 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5817 LinesInDETY = ((double) DETBufferSizeY[k] 5818 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5819 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5820 / BytePerPixelDETY[k] / SwathWidthY[k]; 5821 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5822 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5823 / VRatio[k]; 5824 #ifdef __DML_VBA_DEBUG__ 5825 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5826 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5827 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5828 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5829 __func__, k, ReadBandwidthSurfaceLuma[k]); 5830 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5831 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5832 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5833 __func__, k, LinesInDETYRoundedDownToSwath); 5834 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5835 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5836 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5837 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5838 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5839 #endif 5840 5841 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5842 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5843 5844 FoundCriticalSurface = true; 5845 *StutterPeriod = DETBufferingTimeY; 5846 FrameTimeCriticalSurface = ( 5847 isInterlaceTiming ? 5848 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5849 * (double) HTotal[k] / PixelClock[k]; 5850 VActiveTimeCriticalSurface = ( 5851 isInterlaceTiming ? 5852 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5853 * (double) HTotal[k] / PixelClock[k]; 5854 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5855 SwathWidthYCriticalSurface = SwathWidthY[k]; 5856 SwathHeightYCriticalSurface = SwathHeightY[k]; 5857 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5858 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5859 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5860 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5861 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5862 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5863 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5864 5865 #ifdef __DML_VBA_DEBUG__ 5866 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5867 __func__, k, FoundCriticalSurface); 5868 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5869 __func__, k, *StutterPeriod); 5870 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5871 __func__, k, MinTTUVBlankCriticalSurface); 5872 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5873 __func__, k, FrameTimeCriticalSurface); 5874 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5875 __func__, k, VActiveTimeCriticalSurface); 5876 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5877 __func__, k, BytePerPixelYCriticalSurface); 5878 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5879 __func__, k, SwathWidthYCriticalSurface); 5880 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5881 __func__, k, SwathHeightYCriticalSurface); 5882 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5883 __func__, k, BlockWidth256BytesYCriticalSurface); 5884 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5885 __func__, k, doublePlaneCriticalSurface); 5886 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5887 __func__, k, doublePipeCriticalSurface); 5888 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5889 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5890 #endif 5891 } 5892 } 5893 } 5894 5895 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5896 EffectiveCompressedBufferSize); 5897 #ifdef __DML_VBA_DEBUG__ 5898 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5899 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5900 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5901 __func__, *StutterPeriod * TotalDataReadBandwidth); 5902 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5903 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5904 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5905 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5906 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5907 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5908 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5909 #endif 5910 5911 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5912 / ReturnBW 5913 + (*StutterPeriod * TotalDataReadBandwidth 5914 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5915 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5916 #ifdef __DML_VBA_DEBUG__ 5917 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5918 AverageDCCCompressionRate / ReturnBW); 5919 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5920 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5921 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5922 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5923 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5924 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5925 #endif 5926 StutterBurstTime = dml_max(StutterBurstTime, 5927 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5928 * SwathWidthYCriticalSurface / ReturnBW); 5929 5930 #ifdef __DML_VBA_DEBUG__ 5931 dml_print("DML::%s: Time to finish residue swath=%f\n", 5932 __func__, 5933 LinesToFinishSwathTransferStutterCriticalSurface * 5934 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5935 #endif 5936 5937 TotalActiveWriteback = 0; 5938 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5939 if (WritebackEnable[k]) 5940 TotalActiveWriteback = TotalActiveWriteback + 1; 5941 } 5942 5943 if (TotalActiveWriteback == 0) { 5944 #ifdef __DML_VBA_DEBUG__ 5945 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5946 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5947 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5948 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5949 #endif 5950 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5951 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5952 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5953 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5954 *NumberOfStutterBurstsPerFrame = ( 5955 *StutterEfficiencyNotIncludingVBlank > 0 ? 5956 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5957 *Z8NumberOfStutterBurstsPerFrame = ( 5958 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5959 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5960 } else { 5961 *StutterEfficiencyNotIncludingVBlank = 0.; 5962 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5963 *NumberOfStutterBurstsPerFrame = 0; 5964 *Z8NumberOfStutterBurstsPerFrame = 0; 5965 } 5966 #ifdef __DML_VBA_DEBUG__ 5967 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5968 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5969 __func__, *StutterEfficiencyNotIncludingVBlank); 5970 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5971 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5972 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5973 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5974 #endif 5975 5976 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5977 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5978 if (BlendingAndTiming[k] == k) { 5979 if (TotalNumberOfActiveOTG == 0) { 5980 doublePixelClock = PixelClock[k]; 5981 doubleHTotal = HTotal[k]; 5982 doubleVTotal = VTotal[k]; 5983 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5984 || doubleVTotal != VTotal[k]) { 5985 SameTiming = false; 5986 } 5987 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5988 } 5989 } 5990 } 5991 5992 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5993 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5994 5995 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5996 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 5997 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 5998 + StutterBurstTime * VActiveTimeCriticalSurface 5999 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6000 } else { 6001 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6002 } 6003 } else { 6004 *StutterEfficiency = 0; 6005 } 6006 6007 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6008 LastZ8StutterPeriod = VActiveTimeCriticalSurface 6009 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6010 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 6011 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 6012 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 6013 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6014 } else { 6015 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6016 } 6017 } else { 6018 *Z8StutterEfficiency = 0.; 6019 } 6020 6021 #ifdef __DML_VBA_DEBUG__ 6022 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6023 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6024 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6025 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6026 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6027 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6028 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6029 __func__, *StutterEfficiencyNotIncludingVBlank); 6030 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6031 #endif 6032 6033 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6034 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6035 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6036 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6037 - DETBufferSizeYCriticalSurface; 6038 6039 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6040 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6041 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6042 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6043 6044 #ifdef __DML_VBA_DEBUG__ 6045 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6046 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6047 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6048 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6049 #endif 6050 } // CalculateStutterEfficiency 6051 6052 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6053 unsigned int ConfigReturnBufferSizeInKByte, 6054 unsigned int ROBBufferSizeInKByte, 6055 unsigned int MaxNumDPP, 6056 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6057 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6058 6059 /* Output */ 6060 unsigned int *MaxTotalDETInKByte, 6061 unsigned int *nomDETInKByte, 6062 unsigned int *MinCompressedBufferSizeInKByte) 6063 { 6064 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6065 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6066 6067 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6068 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6069 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6070 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6071 6072 #ifdef __DML_VBA_DEBUG__ 6073 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6074 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6075 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6076 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6077 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6078 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6079 #endif 6080 6081 if (det_buff_size_override_en) { 6082 *nomDETInKByte = det_buff_size_override_val; 6083 #ifdef __DML_VBA_DEBUG__ 6084 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6085 #endif 6086 } 6087 } // CalculateMaxDETAndMinCompressedBufferSize 6088 6089 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6090 double ReturnBW, 6091 bool NotUrgentLatencyHiding[], 6092 double ReadBandwidthLuma[], 6093 double ReadBandwidthChroma[], 6094 double cursor_bw[], 6095 double meta_row_bandwidth[], 6096 double dpte_row_bandwidth[], 6097 unsigned int NumberOfDPP[], 6098 double UrgentBurstFactorLuma[], 6099 double UrgentBurstFactorChroma[], 6100 double UrgentBurstFactorCursor[]) 6101 { 6102 unsigned int k; 6103 bool NotEnoughUrgentLatencyHiding = false; 6104 bool CalculateVActiveBandwithSupport_val = false; 6105 double VActiveBandwith = 0; 6106 6107 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6108 if (NotUrgentLatencyHiding[k]) { 6109 NotEnoughUrgentLatencyHiding = true; 6110 } 6111 } 6112 6113 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6114 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6115 } 6116 6117 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6118 6119 #ifdef __DML_VBA_DEBUG__ 6120 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6121 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6122 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6123 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6124 #endif 6125 return CalculateVActiveBandwithSupport_val; 6126 } 6127 6128 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6129 double ReturnBW, 6130 bool NotUrgentLatencyHiding[], 6131 double ReadBandwidthLuma[], 6132 double ReadBandwidthChroma[], 6133 double PrefetchBandwidthLuma[], 6134 double PrefetchBandwidthChroma[], 6135 double cursor_bw[], 6136 double meta_row_bandwidth[], 6137 double dpte_row_bandwidth[], 6138 double cursor_bw_pre[], 6139 double prefetch_vmrow_bw[], 6140 unsigned int NumberOfDPP[], 6141 double UrgentBurstFactorLuma[], 6142 double UrgentBurstFactorChroma[], 6143 double UrgentBurstFactorCursor[], 6144 double UrgentBurstFactorLumaPre[], 6145 double UrgentBurstFactorChromaPre[], 6146 double UrgentBurstFactorCursorPre[], 6147 double PrefetchBW[], 6148 double VRatio[], 6149 double MaxVRatioPre, 6150 6151 /* output */ 6152 double *MaxPrefetchBandwidth, 6153 double *FractionOfUrgentBandwidth, 6154 bool *PrefetchBandwidthSupport) 6155 { 6156 unsigned int k; 6157 double ActiveBandwidthPerSurface; 6158 bool NotEnoughUrgentLatencyHiding = false; 6159 double TotalActiveBandwidth = 0; 6160 double TotalPrefetchBandwidth = 0; 6161 6162 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6163 if (NotUrgentLatencyHiding[k]) { 6164 NotEnoughUrgentLatencyHiding = true; 6165 } 6166 } 6167 6168 *MaxPrefetchBandwidth = 0; 6169 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6170 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]); 6171 6172 TotalActiveBandwidth += ActiveBandwidthPerSurface; 6173 6174 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k]; 6175 6176 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6177 ActiveBandwidthPerSurface, 6178 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6179 } 6180 6181 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__) 6182 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding; 6183 else 6184 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6185 6186 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW; 6187 } 6188 6189 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6190 double ReturnBW, 6191 double ReadBandwidthLuma[], 6192 double ReadBandwidthChroma[], 6193 double PrefetchBandwidthLuma[], 6194 double PrefetchBandwidthChroma[], 6195 double cursor_bw[], 6196 double cursor_bw_pre[], 6197 unsigned int NumberOfDPP[], 6198 double UrgentBurstFactorLuma[], 6199 double UrgentBurstFactorChroma[], 6200 double UrgentBurstFactorCursor[], 6201 double UrgentBurstFactorLumaPre[], 6202 double UrgentBurstFactorChromaPre[], 6203 double UrgentBurstFactorCursorPre[]) 6204 { 6205 unsigned int k; 6206 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6207 6208 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6209 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6210 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6211 } 6212 6213 return CalculateBandwidthAvailableForImmediateFlip_val; 6214 } 6215 6216 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6217 double ReturnBW, 6218 enum immediate_flip_requirement ImmediateFlipRequirement[], 6219 double final_flip_bw[], 6220 double ReadBandwidthLuma[], 6221 double ReadBandwidthChroma[], 6222 double PrefetchBandwidthLuma[], 6223 double PrefetchBandwidthChroma[], 6224 double cursor_bw[], 6225 double meta_row_bandwidth[], 6226 double dpte_row_bandwidth[], 6227 double cursor_bw_pre[], 6228 double prefetch_vmrow_bw[], 6229 unsigned int NumberOfDPP[], 6230 double UrgentBurstFactorLuma[], 6231 double UrgentBurstFactorChroma[], 6232 double UrgentBurstFactorCursor[], 6233 double UrgentBurstFactorLumaPre[], 6234 double UrgentBurstFactorChromaPre[], 6235 double UrgentBurstFactorCursorPre[], 6236 6237 /* output */ 6238 double *TotalBandwidth, 6239 double *FractionOfUrgentBandwidth, 6240 bool *ImmediateFlipBandwidthSupport) 6241 { 6242 unsigned int k; 6243 *TotalBandwidth = 0; 6244 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6245 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6246 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6247 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6248 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6249 } else { 6250 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6251 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6252 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6253 } 6254 } 6255 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6256 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6257 } 6258 6259 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, 6260 double ReturnBW, 6261 double UrgentLatency, 6262 unsigned int SwathHeightY[], 6263 unsigned int SwathHeightC[], 6264 unsigned int SwathWidthY[], 6265 unsigned int SwathWidthC[], 6266 double BytePerPixelInDETY[], 6267 double BytePerPixelInDETC[], 6268 unsigned int DETBufferSizeY[], 6269 unsigned int DETBufferSizeC[], 6270 unsigned int NumOfDPP[], 6271 unsigned int HTotal[], 6272 double PixelClock[], 6273 double VRatioY[], 6274 double VRatioC[], 6275 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], 6276 enum unbounded_requesting_policy UseUnboundedRequesting) 6277 { 6278 int k; 6279 double SwathSizeAllSurfaces = 0; 6280 double SwathSizeAllSurfacesInFetchTimeUs; 6281 double DETSwathLatencyHidingUs; 6282 double DETSwathLatencyHidingYUs; 6283 double DETSwathLatencyHidingCUs; 6284 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; 6285 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; 6286 bool NotEnoughDETSwathFillLatencyHiding = false; 6287 6288 if (UseUnboundedRequesting == dm_unbounded_requesting) 6289 return false; 6290 6291 /* calculate sum of single swath size for all pipes in bytes */ 6292 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6293 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; 6294 6295 if (SwathHeightC[k] != 0) 6296 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; 6297 else 6298 SwathSizePerSurfaceC[k] = 0; 6299 6300 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; 6301 } 6302 6303 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; 6304 6305 /* ensure all DET - 1 swath can hide a fetch for all surfaces */ 6306 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6307 double LineTime = HTotal[k] / PixelClock[k]; 6308 6309 /* only care if surface is not phantom */ 6310 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 6311 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; 6312 6313 if (SwathHeightC[k] != 0) { 6314 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; 6315 6316 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); 6317 } else { 6318 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; 6319 } 6320 6321 /* DET must be able to hide time to fetch 1 swath for each surface */ 6322 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { 6323 NotEnoughDETSwathFillLatencyHiding = true; 6324 break; 6325 } 6326 } 6327 } 6328 6329 return NotEnoughDETSwathFillLatencyHiding; 6330 } 6331