1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 31 32 unsigned int dml32_dscceComputeDelay( 33 unsigned int bpc, 34 double BPP, 35 unsigned int sliceWidth, 36 unsigned int numSlices, 37 enum output_format_class pixelFormat, 38 enum output_encoder_class Output) 39 { 40 // valid bpc = source bits per component in the set of {8, 10, 12} 41 // valid bpp = increments of 1/16 of a bit 42 // min = 6/7/8 in N420/N422/444, respectively 43 // max = such that compression is 1:1 44 //valid sliceWidth = number of pixels per slice line, 45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 48 49 // fixed value 50 unsigned int rcModelSize = 8192; 51 52 // N422/N420 operate at 2 pixels per clock 53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 54 Delay, pixels; 55 56 if (pixelFormat == dm_420) 57 pixelsPerClock = 2; 58 else if (pixelFormat == dm_n422) 59 pixelsPerClock = 2; 60 // #all other modes operate at 1 pixel per clock 61 else 62 pixelsPerClock = 1; 63 64 //initial transmit delay as per PPS 65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 66 67 //compute ssm delay 68 if (bpc == 8) 69 D = 81; 70 else if (bpc == 10) 71 D = 89; 72 else 73 D = 113; 74 75 //divide by pixel per cycle to compute slice width as seen by DSC 76 w = sliceWidth / pixelsPerClock; 77 78 //422 mode has an additional cycle of delay 79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 80 s = 0; 81 else 82 s = 1; 83 84 //main calculation for the dscce 85 ix = initalXmitDelay + 45; 86 wx = (w + 2) / 3; 87 p = 3 * wx - w; 88 l0 = ix / w; 89 a = ix + p * l0; 90 ax = (a + 2) / 3 + D + 6 + 1; 91 L = (ax + wx - 1) / wx; 92 if ((ix % w) == 0 && p != 0) 93 lstall = 1; 94 else 95 lstall = 0; 96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 97 98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 99 pixels = Delay * 3 * pixelsPerClock; 100 101 #ifdef __DML_VBA_DEBUG__ 102 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 103 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 107 dml_print("DML::%s: Output: %d\n", __func__, Output); 108 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 109 #endif 110 111 return pixels; 112 } 113 114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 115 { 116 unsigned int Delay = 0; 117 118 if (pixelFormat == dm_420) { 119 // sfr 120 Delay = Delay + 2; 121 // dsccif 122 Delay = Delay + 0; 123 // dscc - input deserializer 124 Delay = Delay + 3; 125 // dscc gets pixels every other cycle 126 Delay = Delay + 2; 127 // dscc - input cdc fifo 128 Delay = Delay + 12; 129 // dscc gets pixels every other cycle 130 Delay = Delay + 13; 131 // dscc - cdc uncertainty 132 Delay = Delay + 2; 133 // dscc - output cdc fifo 134 Delay = Delay + 7; 135 // dscc gets pixels every other cycle 136 Delay = Delay + 3; 137 // dscc - cdc uncertainty 138 Delay = Delay + 2; 139 // dscc - output serializer 140 Delay = Delay + 1; 141 // sft 142 Delay = Delay + 1; 143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 144 // sfr 145 Delay = Delay + 2; 146 // dsccif 147 Delay = Delay + 1; 148 // dscc - input deserializer 149 Delay = Delay + 5; 150 // dscc - input cdc fifo 151 Delay = Delay + 25; 152 // dscc - cdc uncertainty 153 Delay = Delay + 2; 154 // dscc - output cdc fifo 155 Delay = Delay + 10; 156 // dscc - cdc uncertainty 157 Delay = Delay + 2; 158 // dscc - output serializer 159 Delay = Delay + 1; 160 // sft 161 Delay = Delay + 1; 162 } else { 163 // sfr 164 Delay = Delay + 2; 165 // dsccif 166 Delay = Delay + 0; 167 // dscc - input deserializer 168 Delay = Delay + 3; 169 // dscc - input cdc fifo 170 Delay = Delay + 12; 171 // dscc - cdc uncertainty 172 Delay = Delay + 2; 173 // dscc - output cdc fifo 174 Delay = Delay + 7; 175 // dscc - output serializer 176 Delay = Delay + 1; 177 // dscc - cdc uncertainty 178 Delay = Delay + 2; 179 // sft 180 Delay = Delay + 1; 181 } 182 183 return Delay; 184 } 185 186 187 bool IsVertical(enum dm_rotation_angle Scan) 188 { 189 bool is_vert = false; 190 191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 192 is_vert = true; 193 else 194 is_vert = false; 195 return is_vert; 196 } 197 198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 199 double HRatio, 200 double HRatioChroma, 201 double VRatio, 202 double VRatioChroma, 203 double MaxDCHUBToPSCLThroughput, 204 double MaxPSCLToLBThroughput, 205 double PixelClock, 206 enum source_format_class SourcePixelFormat, 207 unsigned int HTaps, 208 unsigned int HTapsChroma, 209 unsigned int VTaps, 210 unsigned int VTapsChroma, 211 212 /* output */ 213 double *PSCL_THROUGHPUT, 214 double *PSCL_THROUGHPUT_CHROMA, 215 double *DPPCLKUsingSingleDPP) 216 { 217 double DPPCLKUsingSingleDPPLuma; 218 double DPPCLKUsingSingleDPPChroma; 219 220 if (HRatio > 1) { 221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 222 dml_ceil((double) HTaps / 6.0, 1.0)); 223 } else { 224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 225 } 226 227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 228 *PSCL_THROUGHPUT, 1); 229 230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 232 233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 234 SourcePixelFormat != dm_rgbe_alpha)) { 235 *PSCL_THROUGHPUT_CHROMA = 0; 236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 237 } else { 238 if (HRatioChroma > 1) { 239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 241 } else { 242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 243 } 244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 249 } 250 } 251 252 void dml32_CalculateBytePerPixelAndBlockSizes( 253 enum source_format_class SourcePixelFormat, 254 enum dm_swizzle_mode SurfaceTiling, 255 256 /* Output */ 257 unsigned int *BytePerPixelY, 258 unsigned int *BytePerPixelC, 259 double *BytePerPixelDETY, 260 double *BytePerPixelDETC, 261 unsigned int *BlockHeight256BytesY, 262 unsigned int *BlockHeight256BytesC, 263 unsigned int *BlockWidth256BytesY, 264 unsigned int *BlockWidth256BytesC, 265 unsigned int *MacroTileHeightY, 266 unsigned int *MacroTileHeightC, 267 unsigned int *MacroTileWidthY, 268 unsigned int *MacroTileWidthC) 269 { 270 if (SourcePixelFormat == dm_444_64) { 271 *BytePerPixelDETY = 8; 272 *BytePerPixelDETC = 0; 273 *BytePerPixelY = 8; 274 *BytePerPixelC = 0; 275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 276 *BytePerPixelDETY = 4; 277 *BytePerPixelDETC = 0; 278 *BytePerPixelY = 4; 279 *BytePerPixelC = 0; 280 } else if (SourcePixelFormat == dm_444_16) { 281 *BytePerPixelDETY = 2; 282 *BytePerPixelDETC = 0; 283 *BytePerPixelY = 2; 284 *BytePerPixelC = 0; 285 } else if (SourcePixelFormat == dm_444_8) { 286 *BytePerPixelDETY = 1; 287 *BytePerPixelDETC = 0; 288 *BytePerPixelY = 1; 289 *BytePerPixelC = 0; 290 } else if (SourcePixelFormat == dm_rgbe_alpha) { 291 *BytePerPixelDETY = 4; 292 *BytePerPixelDETC = 1; 293 *BytePerPixelY = 4; 294 *BytePerPixelC = 1; 295 } else if (SourcePixelFormat == dm_420_8) { 296 *BytePerPixelDETY = 1; 297 *BytePerPixelDETC = 2; 298 *BytePerPixelY = 1; 299 *BytePerPixelC = 2; 300 } else if (SourcePixelFormat == dm_420_12) { 301 *BytePerPixelDETY = 2; 302 *BytePerPixelDETC = 4; 303 *BytePerPixelY = 2; 304 *BytePerPixelC = 4; 305 } else { 306 *BytePerPixelDETY = 4.0 / 3; 307 *BytePerPixelDETC = 8.0 / 3; 308 *BytePerPixelY = 2; 309 *BytePerPixelC = 4; 310 } 311 #ifdef __DML_VBA_DEBUG__ 312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 317 #endif 318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 319 || SourcePixelFormat == dm_444_16 320 || SourcePixelFormat == dm_444_8 321 || SourcePixelFormat == dm_mono_16 322 || SourcePixelFormat == dm_mono_8 323 || SourcePixelFormat == dm_rgbe)) { 324 if (SurfaceTiling == dm_sw_linear) 325 *BlockHeight256BytesY = 1; 326 else if (SourcePixelFormat == dm_444_64) 327 *BlockHeight256BytesY = 4; 328 else if (SourcePixelFormat == dm_444_8) 329 *BlockHeight256BytesY = 16; 330 else 331 *BlockHeight256BytesY = 8; 332 333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 334 *BlockHeight256BytesC = 0; 335 *BlockWidth256BytesC = 0; 336 } else { 337 if (SurfaceTiling == dm_sw_linear) { 338 *BlockHeight256BytesY = 1; 339 *BlockHeight256BytesC = 1; 340 } else if (SourcePixelFormat == dm_rgbe_alpha) { 341 *BlockHeight256BytesY = 8; 342 *BlockHeight256BytesC = 16; 343 } else if (SourcePixelFormat == dm_420_8) { 344 *BlockHeight256BytesY = 16; 345 *BlockHeight256BytesC = 8; 346 } else { 347 *BlockHeight256BytesY = 8; 348 *BlockHeight256BytesC = 8; 349 } 350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 352 } 353 #ifdef __DML_VBA_DEBUG__ 354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 358 #endif 359 360 if (SurfaceTiling == dm_sw_linear) { 361 *MacroTileHeightY = *BlockHeight256BytesY; 362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 363 *MacroTileHeightC = *BlockHeight256BytesC; 364 if (*MacroTileHeightC == 0) 365 *MacroTileWidthC = 0; 366 else 367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 370 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 372 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 373 if (*MacroTileHeightC == 0) 374 *MacroTileWidthC = 0; 375 else 376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 377 } else { 378 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 380 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 381 if (*MacroTileHeightC == 0) 382 *MacroTileWidthC = 0; 383 else 384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 385 } 386 387 #ifdef __DML_VBA_DEBUG__ 388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 392 #endif 393 } // CalculateBytePerPixelAndBlockSizes 394 395 void dml32_CalculateSwathAndDETConfiguration( 396 unsigned int DETSizeOverride[], 397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 398 unsigned int ConfigReturnBufferSizeInKByte, 399 unsigned int MaxTotalDETInKByte, 400 unsigned int MinCompressedBufferSizeInKByte, 401 double ForceSingleDPP, 402 unsigned int NumberOfActiveSurfaces, 403 unsigned int nomDETInKByte, 404 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 406 unsigned int PixelChunkSizeKBytes, 407 unsigned int ROBSizeKBytes, 408 unsigned int CompressedBufferSegmentSizeInkByteFinal, 409 enum output_encoder_class Output[], 410 double ReadBandwidthLuma[], 411 double ReadBandwidthChroma[], 412 double MaximumSwathWidthLuma[], 413 double MaximumSwathWidthChroma[], 414 enum dm_rotation_angle SourceRotation[], 415 bool ViewportStationary[], 416 enum source_format_class SourcePixelFormat[], 417 enum dm_swizzle_mode SurfaceTiling[], 418 unsigned int ViewportWidth[], 419 unsigned int ViewportHeight[], 420 unsigned int ViewportXStart[], 421 unsigned int ViewportYStart[], 422 unsigned int ViewportXStartC[], 423 unsigned int ViewportYStartC[], 424 unsigned int SurfaceWidthY[], 425 unsigned int SurfaceWidthC[], 426 unsigned int SurfaceHeightY[], 427 unsigned int SurfaceHeightC[], 428 unsigned int Read256BytesBlockHeightY[], 429 unsigned int Read256BytesBlockHeightC[], 430 unsigned int Read256BytesBlockWidthY[], 431 unsigned int Read256BytesBlockWidthC[], 432 enum odm_combine_mode ODMMode[], 433 unsigned int BlendingAndTiming[], 434 unsigned int BytePerPixY[], 435 unsigned int BytePerPixC[], 436 double BytePerPixDETY[], 437 double BytePerPixDETC[], 438 unsigned int HActive[], 439 double HRatio[], 440 double HRatioChroma[], 441 unsigned int DPPPerSurface[], 442 443 /* Output */ 444 unsigned int swath_width_luma_ub[], 445 unsigned int swath_width_chroma_ub[], 446 double SwathWidth[], 447 double SwathWidthChroma[], 448 unsigned int SwathHeightY[], 449 unsigned int SwathHeightC[], 450 unsigned int DETBufferSizeInKByte[], 451 unsigned int DETBufferSizeY[], 452 unsigned int DETBufferSizeC[], 453 bool *UnboundedRequestEnabled, 454 unsigned int *CompressedBufferSizeInkByte, 455 unsigned int *CompBufReservedSpaceKBytes, 456 bool *CompBufReservedSpaceNeedAdjustment, 457 bool ViewportSizeSupportPerSurface[], 458 bool *ViewportSizeSupport) 459 { 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 464 unsigned int RoundedUpSwathSizeBytesY; 465 unsigned int RoundedUpSwathSizeBytesC; 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 468 unsigned int k; 469 unsigned int TotalActiveDPP = 0; 470 bool NoChromaSurfaces = true; 471 unsigned int DETBufferSizeInKByteForSwathCalculation; 472 473 #ifdef __DML_VBA_DEBUG__ 474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 477 #endif 478 dml32_CalculateSwathWidth(ForceSingleDPP, 479 NumberOfActiveSurfaces, 480 SourcePixelFormat, 481 SourceRotation, 482 ViewportStationary, 483 ViewportWidth, 484 ViewportHeight, 485 ViewportXStart, 486 ViewportYStart, 487 ViewportXStartC, 488 ViewportYStartC, 489 SurfaceWidthY, 490 SurfaceWidthC, 491 SurfaceHeightY, 492 SurfaceHeightC, 493 ODMMode, 494 BytePerPixY, 495 BytePerPixC, 496 Read256BytesBlockHeightY, 497 Read256BytesBlockHeightC, 498 Read256BytesBlockWidthY, 499 Read256BytesBlockWidthC, 500 BlendingAndTiming, 501 HActive, 502 HRatio, 503 DPPPerSurface, 504 505 /* Output */ 506 SwathWidthdoubleDPP, 507 SwathWidthdoubleDPPChroma, 508 SwathWidth, 509 SwathWidthChroma, 510 MaximumSwathHeightY, 511 MaximumSwathHeightC, 512 swath_width_luma_ub, 513 swath_width_chroma_ub); 514 515 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 518 #ifdef __DML_VBA_DEBUG__ 519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 524 RoundedUpMaxSwathSizeBytesY[k]); 525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 529 RoundedUpMaxSwathSizeBytesC[k]); 530 #endif 531 532 if (SourcePixelFormat[k] == dm_420_10) { 533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 535 } 536 } 537 538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 542 NoChromaSurfaces = false; 543 } 544 } 545 546 // By default, just set the reserved space to 2 pixel chunks size 547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 548 549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 553 554 if (*CompBufReservedSpaceNeedAdjustment == 1) { 555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 556 } 557 558 #ifdef __DML_VBA_DEBUG__ 559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 561 #endif 562 563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 564 565 dml32_CalculateDETBufferSize(DETSizeOverride, 566 UseMALLForPStateChange, 567 ForceSingleDPP, 568 NumberOfActiveSurfaces, 569 *UnboundedRequestEnabled, 570 nomDETInKByte, 571 MaxTotalDETInKByte, 572 ConfigReturnBufferSizeInKByte, 573 MinCompressedBufferSizeInKByte, 574 CompressedBufferSegmentSizeInkByteFinal, 575 SourcePixelFormat, 576 ReadBandwidthLuma, 577 ReadBandwidthChroma, 578 RoundedUpMaxSwathSizeBytesY, 579 RoundedUpMaxSwathSizeBytesC, 580 DPPPerSurface, 581 582 /* Output */ 583 DETBufferSizeInKByte, // per hubp pipe 584 CompressedBufferSizeInkByte); 585 586 #ifdef __DML_VBA_DEBUG__ 587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 593 #endif 594 595 *ViewportSizeSupport = true; 596 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 597 598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 600 #ifdef __DML_VBA_DEBUG__ 601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 602 DETBufferSizeInKByteForSwathCalculation); 603 #endif 604 605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 607 SwathHeightY[k] = MaximumSwathHeightY[k]; 608 SwathHeightC[k] = MaximumSwathHeightC[k]; 609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 615 SwathHeightC[k] = MaximumSwathHeightC[k]; 616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 621 SwathHeightY[k] = MaximumSwathHeightY[k]; 622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 625 } else { 626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 630 } 631 632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 636 *ViewportSizeSupport = false; 637 ViewportSizeSupportPerSurface[k] = false; 638 } else { 639 ViewportSizeSupportPerSurface[k] = true; 640 } 641 642 if (SwathHeightC[k] == 0) { 643 #ifdef __DML_VBA_DEBUG__ 644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 645 #endif 646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 647 DETBufferSizeC[k] = 0; 648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 649 #ifdef __DML_VBA_DEBUG__ 650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 651 #endif 652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 654 } else { 655 #ifdef __DML_VBA_DEBUG__ 656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 657 #endif 658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 660 } 661 662 #ifdef __DML_VBA_DEBUG__ 663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesY[k]); 667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 668 k, RoundedUpMaxSwathSizeBytesC[k]); 669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 675 ViewportSizeSupportPerSurface[k]); 676 #endif 677 678 } 679 } // CalculateSwathAndDETConfiguration 680 681 void dml32_CalculateSwathWidth( 682 bool ForceSingleDPP, 683 unsigned int NumberOfActiveSurfaces, 684 enum source_format_class SourcePixelFormat[], 685 enum dm_rotation_angle SourceRotation[], 686 bool ViewportStationary[], 687 unsigned int ViewportWidth[], 688 unsigned int ViewportHeight[], 689 unsigned int ViewportXStart[], 690 unsigned int ViewportYStart[], 691 unsigned int ViewportXStartC[], 692 unsigned int ViewportYStartC[], 693 unsigned int SurfaceWidthY[], 694 unsigned int SurfaceWidthC[], 695 unsigned int SurfaceHeightY[], 696 unsigned int SurfaceHeightC[], 697 enum odm_combine_mode ODMMode[], 698 unsigned int BytePerPixY[], 699 unsigned int BytePerPixC[], 700 unsigned int Read256BytesBlockHeightY[], 701 unsigned int Read256BytesBlockHeightC[], 702 unsigned int Read256BytesBlockWidthY[], 703 unsigned int Read256BytesBlockWidthC[], 704 unsigned int BlendingAndTiming[], 705 unsigned int HActive[], 706 double HRatio[], 707 unsigned int DPPPerSurface[], 708 709 /* Output */ 710 double SwathWidthdoubleDPPY[], 711 double SwathWidthdoubleDPPC[], 712 double SwathWidthY[], // per-pipe 713 double SwathWidthC[], // per-pipe 714 unsigned int MaximumSwathHeightY[], 715 unsigned int MaximumSwathHeightC[], 716 unsigned int swath_width_luma_ub[], // per-pipe 717 unsigned int swath_width_chroma_ub[]) // per-pipe 718 { 719 unsigned int k, j; 720 enum odm_combine_mode MainSurfaceODMMode; 721 722 unsigned int surface_width_ub_l; 723 unsigned int surface_height_ub_l; 724 unsigned int surface_width_ub_c = 0; 725 unsigned int surface_height_ub_c = 0; 726 727 #ifdef __DML_VBA_DEBUG__ 728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 730 #endif 731 732 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 733 if (!IsVertical(SourceRotation[k])) 734 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 735 else 736 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 737 738 #ifdef __DML_VBA_DEBUG__ 739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 741 #endif 742 743 MainSurfaceODMMode = ODMMode[k]; 744 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 745 if (BlendingAndTiming[k] == j) 746 MainSurfaceODMMode = ODMMode[j]; 747 } 748 749 if (ForceSingleDPP) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 751 } else { 752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 754 dml_round(HActive[k] / 4.0 * HRatio[k])); 755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 757 dml_round(HActive[k] / 2.0 * HRatio[k])); 758 } else if (DPPPerSurface[k] == 2) { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 760 } else { 761 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 762 } 763 } 764 765 #ifdef __DML_VBA_DEBUG__ 766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 771 #endif 772 773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 774 SourcePixelFormat[k] == dm_420_12) { 775 SwathWidthC[k] = SwathWidthY[k] / 2; 776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 777 } else { 778 SwathWidthC[k] = SwathWidthY[k]; 779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 780 } 781 782 if (ForceSingleDPP == true) { 783 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 784 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 785 } 786 787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 789 790 if (!IsVertical(SourceRotation[k])) { 791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 795 dml_floor(ViewportXStart[k] + 796 SwathWidthY[k] + 797 Read256BytesBlockWidthY[k] - 1, 798 Read256BytesBlockWidthY[k]) - 799 dml_floor(ViewportXStart[k], 800 Read256BytesBlockWidthY[k])); 801 } else { 802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 803 dml_ceil(SwathWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) + 805 Read256BytesBlockWidthY[k]); 806 } 807 if (BytePerPixC[k] > 0) { 808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 812 Read256BytesBlockWidthC[k] - 1, 813 Read256BytesBlockWidthC[k]) - 814 dml_floor(ViewportXStartC[k], 815 Read256BytesBlockWidthC[k])); 816 } else { 817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 818 dml_ceil(SwathWidthC[k] - 1, 819 Read256BytesBlockWidthC[k]) + 820 Read256BytesBlockWidthC[k]); 821 } 822 } else { 823 swath_width_chroma_ub[k] = 0; 824 } 825 } else { 826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 828 829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 832 Read256BytesBlockHeightY[k]) - 833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 834 } else { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 837 } 838 if (BytePerPixC[k] > 0) { 839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 843 Read256BytesBlockHeightC[k] - 1, 844 Read256BytesBlockHeightC[k]) - 845 dml_floor(ViewportYStartC[k], 846 Read256BytesBlockHeightC[k])); 847 } else { 848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 850 Read256BytesBlockHeightC[k]); 851 } 852 } else { 853 swath_width_chroma_ub[k] = 0; 854 } 855 } 856 857 #ifdef __DML_VBA_DEBUG__ 858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 872 #endif 873 874 } 875 } // CalculateSwathWidth 876 877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 878 unsigned int TotalNumberOfActiveDPP, 879 bool NoChroma, 880 enum output_encoder_class Output, 881 enum dm_swizzle_mode SurfaceTiling, 882 bool CompBufReservedSpaceNeedAdjustment, 883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 884 { 885 bool ret_val = false; 886 887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 888 TotalNumberOfActiveDPP == 1 && NoChroma); 889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 890 ret_val = false; 891 892 if (SurfaceTiling == dm_sw_linear) 893 ret_val = false; 894 895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 896 ret_val = false; 897 898 #ifdef __DML_VBA_DEBUG__ 899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 902 #endif 903 904 return (ret_val); 905 } 906 907 void dml32_CalculateDETBufferSize( 908 unsigned int DETSizeOverride[], 909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 910 bool ForceSingleDPP, 911 unsigned int NumberOfActiveSurfaces, 912 bool UnboundedRequestEnabled, 913 unsigned int nomDETInKByte, 914 unsigned int MaxTotalDETInKByte, 915 unsigned int ConfigReturnBufferSizeInKByte, 916 unsigned int MinCompressedBufferSizeInKByte, 917 unsigned int CompressedBufferSegmentSizeInkByteFinal, 918 enum source_format_class SourcePixelFormat[], 919 double ReadBandwidthLuma[], 920 double ReadBandwidthChroma[], 921 unsigned int RoundedUpMaxSwathSizeBytesY[], 922 unsigned int RoundedUpMaxSwathSizeBytesC[], 923 unsigned int DPPPerSurface[], 924 /* Output */ 925 unsigned int DETBufferSizeInKByte[], 926 unsigned int *CompressedBufferSizeInkByte) 927 { 928 unsigned int DETBufferSizePoolInKByte; 929 unsigned int NextDETBufferPieceInKByte; 930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 931 bool NextPotentialSurfaceToAssignDETPieceFound; 932 unsigned int NextSurfaceToAssignDETPiece; 933 double TotalBandwidth; 934 double BandwidthOfSurfacesNotAssignedDETPiece; 935 unsigned int max_minDET; 936 unsigned int minDET; 937 unsigned int minDET_pipe; 938 unsigned int j, k; 939 940 #ifdef __DML_VBA_DEBUG__ 941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 949 CompressedBufferSegmentSizeInkByteFinal); 950 #endif 951 952 // Note: Will use default det size if that fits 2 swaths 953 if (UnboundedRequestEnabled) { 954 if (DETSizeOverride[0] > 0) { 955 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 956 } else { 957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 958 ((double) RoundedUpMaxSwathSizeBytesY[0] + 959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 960 } 961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 962 } else { 963 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 964 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 965 DETBufferSizeInKByte[k] = nomDETInKByte; 966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 967 SourcePixelFormat[k] == dm_420_12) { 968 max_minDET = nomDETInKByte - 64; 969 } else { 970 max_minDET = nomDETInKByte; 971 } 972 minDET = 128; 973 minDET_pipe = 0; 974 975 // add DET resource until can hold 2 full swaths 976 while (minDET <= max_minDET && minDET_pipe == 0) { 977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 979 minDET_pipe = minDET; 980 minDET = minDET + 64; 981 } 982 983 #ifdef __DML_VBA_DEBUG__ 984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 988 RoundedUpMaxSwathSizeBytesY[k]); 989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 990 RoundedUpMaxSwathSizeBytesC[k]); 991 #endif 992 993 if (minDET_pipe == 0) { 994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 998 __func__, k, minDET_pipe); 999 #endif 1000 } 1001 1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1003 DETBufferSizeInKByte[k] = 0; 1004 } else if (DETSizeOverride[k] > 0) { 1005 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1009 DETBufferSizeInKByte[k] = minDET_pipe; 1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1012 } 1013 1014 #ifdef __DML_VBA_DEBUG__ 1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1019 #endif 1020 } 1021 1022 TotalBandwidth = 0; 1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1026 } 1027 #ifdef __DML_VBA_DEBUG__ 1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1033 #endif 1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1036 1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1038 DETPieceAssignedToThisSurfaceAlready[k] = true; 1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1042 DETPieceAssignedToThisSurfaceAlready[k] = true; 1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1045 } else { 1046 DETPieceAssignedToThisSurfaceAlready[k] = false; 1047 } 1048 #ifdef __DML_VBA_DEBUG__ 1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1050 DETPieceAssignedToThisSurfaceAlready[k]); 1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1052 BandwidthOfSurfacesNotAssignedDETPiece); 1053 #endif 1054 } 1055 1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1057 NextPotentialSurfaceToAssignDETPieceFound = false; 1058 NextSurfaceToAssignDETPiece = 0; 1059 1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1061 #ifdef __DML_VBA_DEBUG__ 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1063 ReadBandwidthLuma[k]); 1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1065 ReadBandwidthChroma[k]); 1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1071 NextSurfaceToAssignDETPiece); 1072 #endif 1073 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1074 (!NextPotentialSurfaceToAssignDETPieceFound || 1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1078 NextSurfaceToAssignDETPiece = k; 1079 NextPotentialSurfaceToAssignDETPieceFound = true; 1080 } 1081 #ifdef __DML_VBA_DEBUG__ 1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1086 #endif 1087 } 1088 1089 if (NextPotentialSurfaceToAssignDETPieceFound) { 1090 // Note: To show the banker's rounding behavior in VBA and also the fact 1091 // that the DET buffer size varies due to precision issue 1092 // 1093 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1096 // BandwidthOfSurfacesNotAssignedDETPiece / 1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1101 //BandwidthOfSurfacesNotAssignedDETPiece / 1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1103 // 1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1106 1107 NextDETBufferPieceInKByte = dml_min( 1108 dml_round((double) DETBufferSizePoolInKByte * 1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1111 BandwidthOfSurfacesNotAssignedDETPiece / 1112 ((ForceSingleDPP ? 1 : 1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1114 (ForceSingleDPP ? 1 : 1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1116 dml_floor((double) DETBufferSizePoolInKByte, 1117 (ForceSingleDPP ? 1 : 1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1119 1120 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1121 // We should limit the per-pipe DET size to the nominal / max per pipe. 1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1127 } else { 1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1129 // already has the max per-pipe value 1130 NextDETBufferPieceInKByte = 0; 1131 } 1132 } 1133 1134 #ifdef __DML_VBA_DEBUG__ 1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1136 DETBufferSizePoolInKByte); 1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1138 NextSurfaceToAssignDETPiece); 1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1146 NextDETBufferPieceInKByte); 1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1148 __func__, j, NextSurfaceToAssignDETPiece, 1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1150 #endif 1151 1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1154 + NextDETBufferPieceInKByte 1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1156 #ifdef __DML_VBA_DEBUG__ 1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1158 #endif 1159 1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1165 } 1166 } 1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1168 } 1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1170 1171 #ifdef __DML_VBA_DEBUG__ 1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1177 } 1178 #endif 1179 } // CalculateDETBufferSize 1180 1181 void dml32_CalculateODMMode( 1182 unsigned int MaximumPixelsPerLinePerDSCUnit, 1183 unsigned int HActive, 1184 enum output_format_class OutFormat, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 unsigned int NumberOfDSCSlices, 1197 1198 /* Output */ 1199 bool *TotalAvailablePipesSupport, 1200 unsigned int *NumberOfDPP, 1201 enum odm_combine_mode *ODMMode, 1202 double *RequiredDISPCLKPerSurface) 1203 { 1204 1205 double SurfaceRequiredDISPCLKWithoutODMCombine; 1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1208 1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1211 MaxDispclk); 1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1214 MaxDispclk); 1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1217 MaxDispclk); 1218 *TotalAvailablePipesSupport = true; 1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1220 1221 if (ODMUse == dm_odm_combine_policy_none) 1222 *ODMMode = dm_odm_combine_mode_disabled; 1223 1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1225 *NumberOfDPP = 0; 1226 1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1229 1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) 1233 || NumberOfDSCSlices > 8)))) { 1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1235 *ODMMode = dm_odm_combine_mode_4to1; 1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1237 *NumberOfDPP = 4; 1238 } else { 1239 *TotalAvailablePipesSupport = false; 1240 } 1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) 1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { 1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1247 *ODMMode = dm_odm_combine_mode_2to1; 1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1249 *NumberOfDPP = 2; 1250 } else { 1251 *TotalAvailablePipesSupport = false; 1252 } 1253 } else { 1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1255 *NumberOfDPP = 1; 1256 else 1257 *TotalAvailablePipesSupport = false; 1258 } 1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && 1260 ODMUse != dm_odm_combine_policy_4to1) { 1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { 1262 *ODMMode = dm_odm_combine_mode_disabled; 1263 *NumberOfDPP = 0; 1264 *TotalAvailablePipesSupport = false; 1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || 1266 *ODMMode == dm_odm_combine_mode_4to1) { 1267 *ODMMode = dm_odm_combine_mode_4to1; 1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1269 *NumberOfDPP = 4; 1270 } else { 1271 *ODMMode = dm_odm_combine_mode_2to1; 1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1273 *NumberOfDPP = 2; 1274 } 1275 } 1276 if (Output == dm_hdmi && OutFormat == dm_420 && 1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { 1278 *ODMMode = dm_odm_combine_mode_disabled; 1279 *NumberOfDPP = 0; 1280 *TotalAvailablePipesSupport = false; 1281 } 1282 } 1283 1284 double dml32_CalculateRequiredDispclk( 1285 enum odm_combine_mode ODMMode, 1286 double PixelClock, 1287 double DISPCLKDPPCLKDSCCLKDownSpreading, 1288 double DISPCLKRampingMargin, 1289 double DISPCLKDPPCLKVCOSpeed, 1290 double MaxDispclk) 1291 { 1292 double RequiredDispclk = 0.; 1293 double PixelClockAfterODM; 1294 double DISPCLKWithRampingRoundedToDFSGranularity; 1295 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1296 double MaxDispclkRoundedDownToDFSGranularity; 1297 1298 if (ODMMode == dm_odm_combine_mode_4to1) 1299 PixelClockAfterODM = PixelClock / 4; 1300 else if (ODMMode == dm_odm_combine_mode_2to1) 1301 PixelClockAfterODM = PixelClock / 2; 1302 else 1303 PixelClockAfterODM = PixelClock; 1304 1305 1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1309 1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1312 1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1314 1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1319 else 1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1321 1322 return RequiredDispclk; 1323 } 1324 1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1326 { 1327 if (Clock <= 0.0) 1328 return 0.0; 1329 1330 if (round_up) 1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1332 else 1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1334 } 1335 1336 void dml32_CalculateOutputLink( 1337 double PHYCLKPerState, 1338 double PHYCLKD18PerState, 1339 double PHYCLKD32PerState, 1340 double Downspreading, 1341 bool IsMainSurfaceUsingTheIndicatedTiming, 1342 enum output_encoder_class Output, 1343 enum output_format_class OutputFormat, 1344 unsigned int HTotal, 1345 unsigned int HActive, 1346 double PixelClockBackEnd, 1347 double ForcedOutputLinkBPP, 1348 unsigned int DSCInputBitPerComponent, 1349 unsigned int NumberOfDSCSlices, 1350 double AudioSampleRate, 1351 unsigned int AudioSampleLayout, 1352 enum odm_combine_mode ODMModeNoDSC, 1353 enum odm_combine_mode ODMModeDSC, 1354 bool DSCEnable, 1355 unsigned int OutputLinkDPLanes, 1356 enum dm_output_link_dp_rate OutputLinkDPRate, 1357 1358 /* Output */ 1359 bool *RequiresDSC, 1360 double *RequiresFEC, 1361 double *OutBpp, 1362 enum dm_output_type *OutputType, 1363 enum dm_output_rate *OutputRate, 1364 unsigned int *RequiredSlots) 1365 { 1366 bool LinkDSCEnable; 1367 unsigned int dummy; 1368 *RequiresDSC = false; 1369 *RequiresFEC = false; 1370 *OutBpp = 0; 1371 *OutputType = dm_output_type_unknown; 1372 *OutputRate = dm_output_rate_unknown; 1373 1374 if (IsMainSurfaceUsingTheIndicatedTiming) { 1375 if (Output == dm_hdmi) { 1376 *RequiresDSC = false; 1377 *RequiresFEC = false; 1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1381 ODMModeNoDSC, ODMModeDSC, &dummy); 1382 //OutputTypeAndRate = "HDMI"; 1383 *OutputType = dm_output_type_hdmi; 1384 1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1386 if (DSCEnable == true) { 1387 *RequiresDSC = true; 1388 LinkDSCEnable = true; 1389 if (Output == dm_dp || Output == dm_dp2p0) 1390 *RequiresFEC = true; 1391 else 1392 *RequiresFEC = false; 1393 } else { 1394 *RequiresDSC = false; 1395 LinkDSCEnable = false; 1396 if (Output == dm_dp2p0) 1397 *RequiresFEC = true; 1398 else 1399 *RequiresFEC = false; 1400 } 1401 if (Output == dm_dp2p0) { 1402 *OutBpp = 0; 1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1404 PHYCLKD32PerState >= 10000 / 32) { 1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1411 ForcedOutputLinkBPP == 0) { 1412 *RequiresDSC = true; 1413 LinkDSCEnable = true; 1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1416 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1417 OutputFormat, DSCInputBitPerComponent, 1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1420 } 1421 //OutputTypeAndRate = Output & " UHBR10"; 1422 *OutputType = dm_output_type_dp2p0; 1423 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1424 } 1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1432 1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1434 ForcedOutputLinkBPP == 0) { 1435 *RequiresDSC = true; 1436 LinkDSCEnable = true; 1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1439 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1440 OutputFormat, DSCInputBitPerComponent, 1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1443 } 1444 //OutputTypeAndRate = Output & " UHBR13p5"; 1445 *OutputType = dm_output_type_dp2p0; 1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1447 } 1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1456 *RequiresDSC = true; 1457 LinkDSCEnable = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " UHBR20"; 1466 *OutputType = dm_output_type_dp2p0; 1467 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1468 } 1469 } else { 1470 *OutBpp = 0; 1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1472 PHYCLKPerState >= 270) { 1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1479 ForcedOutputLinkBPP == 0) { 1480 *RequiresDSC = true; 1481 LinkDSCEnable = true; 1482 if (Output == dm_dp) 1483 *RequiresFEC = true; 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1496 *OutBpp == 0 && PHYCLKPerState >= 540) { 1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1502 1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1504 ForcedOutputLinkBPP == 0) { 1505 *RequiresDSC = true; 1506 LinkDSCEnable = true; 1507 if (Output == dm_dp) 1508 *RequiresFEC = true; 1509 1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1512 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1513 OutputFormat, DSCInputBitPerComponent, 1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1516 } 1517 //OutputTypeAndRate = Output & " HBR2"; 1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1519 *OutputRate = dm_output_rate_dp_rate_hbr2; 1520 } 1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1524 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1527 RequiredSlots); 1528 1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1530 *RequiresDSC = true; 1531 LinkDSCEnable = true; 1532 if (Output == dm_dp) 1533 *RequiresFEC = true; 1534 1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1537 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1538 OutputFormat, DSCInputBitPerComponent, 1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1541 } 1542 //OutputTypeAndRate = Output & " HBR3"; 1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1544 *OutputRate = dm_output_rate_dp_rate_hbr3; 1545 } 1546 } 1547 } 1548 } 1549 } 1550 1551 void dml32_CalculateDPPCLK( 1552 unsigned int NumberOfActiveSurfaces, 1553 double DISPCLKDPPCLKDSCCLKDownSpreading, 1554 double DISPCLKDPPCLKVCOSpeed, 1555 double DPPCLKUsingSingleDPP[], 1556 unsigned int DPPPerSurface[], 1557 1558 /* output */ 1559 double *GlobalDPPCLK, 1560 double Dppclk[]) 1561 { 1562 unsigned int k; 1563 *GlobalDPPCLK = 0; 1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1567 } 1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1569 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1571 } 1572 1573 double dml32_TruncToValidBPP( 1574 double LinkBitRate, 1575 unsigned int Lanes, 1576 unsigned int HTotal, 1577 unsigned int HActive, 1578 double PixelClock, 1579 double DesiredBPP, 1580 bool DSCEnable, 1581 enum output_encoder_class Output, 1582 enum output_format_class Format, 1583 unsigned int DSCInputBitPerComponent, 1584 unsigned int DSCSlices, 1585 unsigned int AudioRate, 1586 unsigned int AudioLayout, 1587 enum odm_combine_mode ODMModeNoDSC, 1588 enum odm_combine_mode ODMModeDSC, 1589 /* Output */ 1590 unsigned int *RequiredSlots) 1591 { 1592 double MaxLinkBPP; 1593 unsigned int MinDSCBPP; 1594 double MaxDSCBPP; 1595 unsigned int NonDSCBPP0; 1596 unsigned int NonDSCBPP1; 1597 unsigned int NonDSCBPP2; 1598 unsigned int NonDSCBPP3; 1599 1600 if (Format == dm_420) { 1601 NonDSCBPP0 = 12; 1602 NonDSCBPP1 = 15; 1603 NonDSCBPP2 = 18; 1604 MinDSCBPP = 6; 1605 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1606 } else if (Format == dm_444) { 1607 NonDSCBPP0 = 18; 1608 NonDSCBPP1 = 24; 1609 NonDSCBPP2 = 30; 1610 NonDSCBPP3 = 36; 1611 MinDSCBPP = 8; 1612 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1613 } else { 1614 if (Output == dm_hdmi) { 1615 NonDSCBPP0 = 24; 1616 NonDSCBPP1 = 24; 1617 NonDSCBPP2 = 24; 1618 } else { 1619 NonDSCBPP0 = 16; 1620 NonDSCBPP1 = 20; 1621 NonDSCBPP2 = 24; 1622 } 1623 if (Format == dm_n422) { 1624 MinDSCBPP = 7; 1625 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1626 } else { 1627 MinDSCBPP = 8; 1628 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1629 } 1630 } 1631 if (Output == dm_dp2p0) { 1632 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1633 } else if (DSCEnable && Output == dm_dp) { 1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1635 } else { 1636 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1637 } 1638 1639 if (DSCEnable) { 1640 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1641 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1642 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1643 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1644 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1645 MaxLinkBPP = 2 * MaxLinkBPP; 1646 } else { 1647 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1648 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1649 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1650 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1651 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1652 MaxLinkBPP = 2 * MaxLinkBPP; 1653 } 1654 1655 if (DesiredBPP == 0) { 1656 if (DSCEnable) { 1657 if (MaxLinkBPP < MinDSCBPP) 1658 return BPP_INVALID; 1659 else if (MaxLinkBPP >= MaxDSCBPP) 1660 return MaxDSCBPP; 1661 else 1662 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1663 } else { 1664 if (MaxLinkBPP >= NonDSCBPP3) 1665 return NonDSCBPP3; 1666 else if (MaxLinkBPP >= NonDSCBPP2) 1667 return NonDSCBPP2; 1668 else if (MaxLinkBPP >= NonDSCBPP1) 1669 return NonDSCBPP1; 1670 else if (MaxLinkBPP >= NonDSCBPP0) 1671 return 16.0; 1672 else 1673 return BPP_INVALID; 1674 } 1675 } else { 1676 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1677 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) || 1678 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1679 return BPP_INVALID; 1680 else 1681 return DesiredBPP; 1682 } 1683 1684 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1685 1686 return BPP_INVALID; 1687 } // TruncToValidBPP 1688 1689 double dml32_RequiredDTBCLK( 1690 bool DSCEnable, 1691 double PixelClock, 1692 enum output_format_class OutputFormat, 1693 double OutputBpp, 1694 unsigned int DSCSlices, 1695 unsigned int HTotal, 1696 unsigned int HActive, 1697 unsigned int AudioRate, 1698 unsigned int AudioLayout) 1699 { 1700 double PixelWordRate; 1701 double HCActive; 1702 double HCBlank; 1703 double AverageTribyteRate; 1704 double HActiveTribyteRate; 1705 1706 if (DSCEnable != true) 1707 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1708 1709 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1710 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1711 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1712 HCBlank = 64 + 32 * 1713 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1714 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1715 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1716 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1717 } 1718 1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1720 enum odm_combine_mode ODMMode, 1721 unsigned int DSCInputBitPerComponent, 1722 double OutputBpp, 1723 unsigned int HActive, 1724 unsigned int HTotal, 1725 unsigned int NumberOfDSCSlices, 1726 enum output_format_class OutputFormat, 1727 enum output_encoder_class Output, 1728 double PixelClock, 1729 double PixelClockBackEnd) 1730 { 1731 unsigned int DSCDelayRequirement_val; 1732 1733 if (DSCEnabled == true && OutputBpp != 0) { 1734 if (ODMMode == dm_odm_combine_mode_4to1) { 1735 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1736 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1737 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1738 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1739 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1740 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1741 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1742 } else { 1743 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1744 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1745 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1746 } 1747 1748 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1749 dml_ceil(DSCDelayRequirement_val / HActive, 1); 1750 1751 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1752 1753 } else { 1754 DSCDelayRequirement_val = 0; 1755 } 1756 1757 #ifdef __DML_VBA_DEBUG__ 1758 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1759 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1760 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1761 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1762 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1763 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1764 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1765 #endif 1766 1767 return DSCDelayRequirement_val; 1768 } 1769 1770 void dml32_CalculateSurfaceSizeInMall( 1771 unsigned int NumberOfActiveSurfaces, 1772 unsigned int MALLAllocatedForDCN, 1773 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1774 bool DCCEnable[], 1775 bool ViewportStationary[], 1776 unsigned int ViewportXStartY[], 1777 unsigned int ViewportYStartY[], 1778 unsigned int ViewportXStartC[], 1779 unsigned int ViewportYStartC[], 1780 unsigned int ViewportWidthY[], 1781 unsigned int ViewportHeightY[], 1782 unsigned int BytesPerPixelY[], 1783 unsigned int ViewportWidthC[], 1784 unsigned int ViewportHeightC[], 1785 unsigned int BytesPerPixelC[], 1786 unsigned int SurfaceWidthY[], 1787 unsigned int SurfaceWidthC[], 1788 unsigned int SurfaceHeightY[], 1789 unsigned int SurfaceHeightC[], 1790 unsigned int Read256BytesBlockWidthY[], 1791 unsigned int Read256BytesBlockWidthC[], 1792 unsigned int Read256BytesBlockHeightY[], 1793 unsigned int Read256BytesBlockHeightC[], 1794 unsigned int ReadBlockWidthY[], 1795 unsigned int ReadBlockWidthC[], 1796 unsigned int ReadBlockHeightY[], 1797 unsigned int ReadBlockHeightC[], 1798 1799 /* Output */ 1800 unsigned int SurfaceSizeInMALL[], 1801 bool *ExceededMALLSize) 1802 { 1803 unsigned int TotalSurfaceSizeInMALL = 0; 1804 unsigned int k; 1805 1806 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1807 if (ViewportStationary[k]) { 1808 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1809 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1810 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1811 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1812 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1813 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1814 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1815 1816 if (ReadBlockWidthC[k] > 0) { 1817 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1818 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1819 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1820 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1821 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1822 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1823 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1824 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1825 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1826 BytesPerPixelC[k]; 1827 } 1828 if (DCCEnable[k] == true) { 1829 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1830 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), 1831 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1832 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1833 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1834 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1835 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1836 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1837 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 1838 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256; 1839 if (Read256BytesBlockWidthC[k] > 0) { 1840 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1841 dml_min(dml_ceil(SurfaceWidthC[k], 8 * 1842 Read256BytesBlockWidthC[k]), 1843 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1844 * Read256BytesBlockWidthC[k] - 1, 8 * 1845 Read256BytesBlockWidthC[k]) - 1846 dml_floor(ViewportXStartC[k], 8 * 1847 Read256BytesBlockWidthC[k])) * 1848 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1849 Read256BytesBlockHeightC[k]), 1850 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1851 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1852 Read256BytesBlockHeightC[k]) - 1853 dml_floor(ViewportYStartC[k], 8 * 1854 Read256BytesBlockHeightC[k])) * 1855 BytesPerPixelC[k] / 256; 1856 } 1857 } 1858 } else { 1859 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1860 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1861 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1862 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1863 BytesPerPixelY[k]; 1864 if (ReadBlockWidthC[k] > 0) { 1865 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1866 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1867 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1868 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1869 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1870 BytesPerPixelC[k]; 1871 } 1872 if (DCCEnable[k] == true) { 1873 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1874 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * 1875 Read256BytesBlockWidthY[k] - 1), 8 * 1876 Read256BytesBlockWidthY[k]) * 1877 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1878 Read256BytesBlockHeightY[k] - 1), 8 * 1879 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256; 1880 1881 if (Read256BytesBlockWidthC[k] > 0) { 1882 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1883 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * 1884 Read256BytesBlockWidthC[k] - 1), 8 * 1885 Read256BytesBlockWidthC[k]) * 1886 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1887 Read256BytesBlockHeightC[k] - 1), 8 * 1888 Read256BytesBlockHeightC[k]) * 1889 BytesPerPixelC[k] / 256; 1890 } 1891 } 1892 } 1893 } 1894 1895 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1896 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1897 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 1898 } 1899 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); 1900 } // CalculateSurfaceSizeInMall 1901 1902 void dml32_CalculateVMRowAndSwath( 1903 unsigned int NumberOfActiveSurfaces, 1904 DmlPipe myPipe[], 1905 unsigned int SurfaceSizeInMALL[], 1906 unsigned int PTEBufferSizeInRequestsLuma, 1907 unsigned int PTEBufferSizeInRequestsChroma, 1908 unsigned int DCCMetaBufferSizeBytes, 1909 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1910 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1911 unsigned int MALLAllocatedForDCN, 1912 double SwathWidthY[], 1913 double SwathWidthC[], 1914 bool GPUVMEnable, 1915 bool HostVMEnable, 1916 unsigned int HostVMMaxNonCachedPageTableLevels, 1917 unsigned int GPUVMMaxPageTableLevels, 1918 unsigned int GPUVMMinPageSizeKBytes[], 1919 unsigned int HostVMMinPageSize, 1920 1921 /* Output */ 1922 bool PTEBufferSizeNotExceeded[], 1923 bool DCCMetaBufferSizeNotExceeded[], 1924 unsigned int dpte_row_width_luma_ub[], 1925 unsigned int dpte_row_width_chroma_ub[], 1926 unsigned int dpte_row_height_luma[], 1927 unsigned int dpte_row_height_chroma[], 1928 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1929 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1930 unsigned int meta_req_width[], 1931 unsigned int meta_req_width_chroma[], 1932 unsigned int meta_req_height[], 1933 unsigned int meta_req_height_chroma[], 1934 unsigned int meta_row_width[], 1935 unsigned int meta_row_width_chroma[], 1936 unsigned int meta_row_height[], 1937 unsigned int meta_row_height_chroma[], 1938 unsigned int vm_group_bytes[], 1939 unsigned int dpte_group_bytes[], 1940 unsigned int PixelPTEReqWidthY[], 1941 unsigned int PixelPTEReqHeightY[], 1942 unsigned int PTERequestSizeY[], 1943 unsigned int PixelPTEReqWidthC[], 1944 unsigned int PixelPTEReqHeightC[], 1945 unsigned int PTERequestSizeC[], 1946 unsigned int dpde0_bytes_per_frame_ub_l[], 1947 unsigned int meta_pte_bytes_per_frame_ub_l[], 1948 unsigned int dpde0_bytes_per_frame_ub_c[], 1949 unsigned int meta_pte_bytes_per_frame_ub_c[], 1950 double PrefetchSourceLinesY[], 1951 double PrefetchSourceLinesC[], 1952 double VInitPreFillY[], 1953 double VInitPreFillC[], 1954 unsigned int MaxNumSwathY[], 1955 unsigned int MaxNumSwathC[], 1956 double meta_row_bw[], 1957 double dpte_row_bw[], 1958 double PixelPTEBytesPerRow[], 1959 double PDEAndMetaPTEBytesFrame[], 1960 double MetaRowByte[], 1961 bool use_one_row_for_frame[], 1962 bool use_one_row_for_frame_flip[], 1963 bool UsesMALLForStaticScreen[], 1964 bool PTE_BUFFER_MODE[], 1965 unsigned int BIGK_FRAGMENT_SIZE[]) 1966 { 1967 unsigned int k; 1968 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1969 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1970 unsigned int PDEAndMetaPTEBytesFrameY; 1971 unsigned int PDEAndMetaPTEBytesFrameC; 1972 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1973 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1974 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1975 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1976 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1977 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1978 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1979 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1980 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1981 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1982 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1983 1984 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1985 if (HostVMEnable == true) { 1986 vm_group_bytes[k] = 512; 1987 dpte_group_bytes[k] = 512; 1988 } else if (GPUVMEnable == true) { 1989 vm_group_bytes[k] = 2048; 1990 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1991 dpte_group_bytes[k] = 512; 1992 else 1993 dpte_group_bytes[k] = 2048; 1994 } else { 1995 vm_group_bytes[k] = 0; 1996 dpte_group_bytes[k] = 0; 1997 } 1998 1999 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 2000 myPipe[k].SourcePixelFormat == dm_420_12 || 2001 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 2002 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 2003 !IsVertical(myPipe[k].SourceRotation)) { 2004 PTEBufferSizeInRequestsForLuma[k] = 2005 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 2006 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 2007 } else { 2008 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 2009 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 2010 } 2011 2012 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 2013 myPipe[k].ViewportStationary, 2014 myPipe[k].DCCEnable, 2015 myPipe[k].DPPPerSurface, 2016 myPipe[k].BlockHeight256BytesC, 2017 myPipe[k].BlockWidth256BytesC, 2018 myPipe[k].SourcePixelFormat, 2019 myPipe[k].SurfaceTiling, 2020 myPipe[k].BytePerPixelC, 2021 myPipe[k].SourceRotation, 2022 SwathWidthC[k], 2023 myPipe[k].ViewportHeightChroma, 2024 myPipe[k].ViewportXStartC, 2025 myPipe[k].ViewportYStartC, 2026 GPUVMEnable, 2027 HostVMEnable, 2028 HostVMMaxNonCachedPageTableLevels, 2029 GPUVMMaxPageTableLevels, 2030 GPUVMMinPageSizeKBytes[k], 2031 HostVMMinPageSize, 2032 PTEBufferSizeInRequestsForChroma[k], 2033 myPipe[k].PitchC, 2034 myPipe[k].DCCMetaPitchC, 2035 myPipe[k].BlockWidthC, 2036 myPipe[k].BlockHeightC, 2037 2038 /* Output */ 2039 &MetaRowByteC[k], 2040 &PixelPTEBytesPerRowC[k], 2041 &dpte_row_width_chroma_ub[k], 2042 &dpte_row_height_chroma[k], 2043 &dpte_row_height_linear_chroma[k], 2044 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2045 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2046 &dpte_row_height_chroma_one_row_per_frame[k], 2047 &meta_req_width_chroma[k], 2048 &meta_req_height_chroma[k], 2049 &meta_row_width_chroma[k], 2050 &meta_row_height_chroma[k], 2051 &PixelPTEReqWidthC[k], 2052 &PixelPTEReqHeightC[k], 2053 &PTERequestSizeC[k], 2054 &dpde0_bytes_per_frame_ub_c[k], 2055 &meta_pte_bytes_per_frame_ub_c[k]); 2056 2057 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2058 myPipe[k].VRatioChroma, 2059 myPipe[k].VTapsChroma, 2060 myPipe[k].InterlaceEnable, 2061 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2062 myPipe[k].SwathHeightC, 2063 myPipe[k].SourceRotation, 2064 myPipe[k].ViewportStationary, 2065 SwathWidthC[k], 2066 myPipe[k].ViewportHeightChroma, 2067 myPipe[k].ViewportXStartC, 2068 myPipe[k].ViewportYStartC, 2069 2070 /* Output */ 2071 &VInitPreFillC[k], 2072 &MaxNumSwathC[k]); 2073 } else { 2074 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2075 PTEBufferSizeInRequestsForChroma[k] = 0; 2076 PixelPTEBytesPerRowC[k] = 0; 2077 PDEAndMetaPTEBytesFrameC = 0; 2078 MetaRowByteC[k] = 0; 2079 MaxNumSwathC[k] = 0; 2080 PrefetchSourceLinesC[k] = 0; 2081 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2082 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2083 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2084 } 2085 2086 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2087 myPipe[k].ViewportStationary, 2088 myPipe[k].DCCEnable, 2089 myPipe[k].DPPPerSurface, 2090 myPipe[k].BlockHeight256BytesY, 2091 myPipe[k].BlockWidth256BytesY, 2092 myPipe[k].SourcePixelFormat, 2093 myPipe[k].SurfaceTiling, 2094 myPipe[k].BytePerPixelY, 2095 myPipe[k].SourceRotation, 2096 SwathWidthY[k], 2097 myPipe[k].ViewportHeight, 2098 myPipe[k].ViewportXStart, 2099 myPipe[k].ViewportYStart, 2100 GPUVMEnable, 2101 HostVMEnable, 2102 HostVMMaxNonCachedPageTableLevels, 2103 GPUVMMaxPageTableLevels, 2104 GPUVMMinPageSizeKBytes[k], 2105 HostVMMinPageSize, 2106 PTEBufferSizeInRequestsForLuma[k], 2107 myPipe[k].PitchY, 2108 myPipe[k].DCCMetaPitchY, 2109 myPipe[k].BlockWidthY, 2110 myPipe[k].BlockHeightY, 2111 2112 /* Output */ 2113 &MetaRowByteY[k], 2114 &PixelPTEBytesPerRowY[k], 2115 &dpte_row_width_luma_ub[k], 2116 &dpte_row_height_luma[k], 2117 &dpte_row_height_linear_luma[k], 2118 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2119 &dpte_row_width_luma_ub_one_row_per_frame[k], 2120 &dpte_row_height_luma_one_row_per_frame[k], 2121 &meta_req_width[k], 2122 &meta_req_height[k], 2123 &meta_row_width[k], 2124 &meta_row_height[k], 2125 &PixelPTEReqWidthY[k], 2126 &PixelPTEReqHeightY[k], 2127 &PTERequestSizeY[k], 2128 &dpde0_bytes_per_frame_ub_l[k], 2129 &meta_pte_bytes_per_frame_ub_l[k]); 2130 2131 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2132 myPipe[k].VRatio, 2133 myPipe[k].VTaps, 2134 myPipe[k].InterlaceEnable, 2135 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2136 myPipe[k].SwathHeightY, 2137 myPipe[k].SourceRotation, 2138 myPipe[k].ViewportStationary, 2139 SwathWidthY[k], 2140 myPipe[k].ViewportHeight, 2141 myPipe[k].ViewportXStart, 2142 myPipe[k].ViewportYStart, 2143 2144 /* Output */ 2145 &VInitPreFillY[k], 2146 &MaxNumSwathY[k]); 2147 2148 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2149 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2150 2151 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2152 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2153 PTEBufferSizeNotExceeded[k] = true; 2154 } else { 2155 PTEBufferSizeNotExceeded[k] = false; 2156 } 2157 2158 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2159 PTEBufferSizeInRequestsForLuma[k] && 2160 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2161 } 2162 2163 dml32_CalculateMALLUseForStaticScreen( 2164 NumberOfActiveSurfaces, 2165 MALLAllocatedForDCN, 2166 UseMALLForStaticScreen, // mode 2167 SurfaceSizeInMALL, 2168 one_row_per_frame_fits_in_buffer, 2169 /* Output */ 2170 UsesMALLForStaticScreen); // boolen 2171 2172 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2173 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2174 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2175 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2176 (GPUVMMinPageSizeKBytes[k] > 64); 2177 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2178 } 2179 2180 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2181 #ifdef __DML_VBA_DEBUG__ 2182 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2183 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2184 #endif 2185 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2186 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2187 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2188 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2189 2190 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2191 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2192 2193 if (use_one_row_for_frame[k]) { 2194 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2195 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2196 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2197 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2198 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2199 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2200 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2201 } 2202 2203 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2204 DCCMetaBufferSizeNotExceeded[k] = true; 2205 else 2206 DCCMetaBufferSizeNotExceeded[k] = false; 2207 2208 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2209 if (use_one_row_for_frame[k]) 2210 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2211 2212 dml32_CalculateRowBandwidth( 2213 GPUVMEnable, 2214 myPipe[k].SourcePixelFormat, 2215 myPipe[k].VRatio, 2216 myPipe[k].VRatioChroma, 2217 myPipe[k].DCCEnable, 2218 myPipe[k].HTotal / myPipe[k].PixelClock, 2219 MetaRowByteY[k], MetaRowByteC[k], 2220 meta_row_height[k], 2221 meta_row_height_chroma[k], 2222 PixelPTEBytesPerRowY[k], 2223 PixelPTEBytesPerRowC[k], 2224 dpte_row_height_luma[k], 2225 dpte_row_height_chroma[k], 2226 2227 /* Output */ 2228 &meta_row_bw[k], 2229 &dpte_row_bw[k]); 2230 #ifdef __DML_VBA_DEBUG__ 2231 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2232 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2233 __func__, k, use_one_row_for_frame_flip[k]); 2234 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2235 __func__, k, UseMALLForPStateChange[k]); 2236 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2237 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2238 __func__, k, dpte_row_width_luma_ub[k]); 2239 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2240 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2241 __func__, k, dpte_row_height_chroma[k]); 2242 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2243 __func__, k, dpte_row_width_chroma_ub[k]); 2244 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2246 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2247 __func__, k, PTEBufferSizeNotExceeded[k]); 2248 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2249 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2250 #endif 2251 } 2252 } // CalculateVMRowAndSwath 2253 2254 unsigned int dml32_CalculateVMAndRowBytes( 2255 bool ViewportStationary, 2256 bool DCCEnable, 2257 unsigned int NumberOfDPPs, 2258 unsigned int BlockHeight256Bytes, 2259 unsigned int BlockWidth256Bytes, 2260 enum source_format_class SourcePixelFormat, 2261 unsigned int SurfaceTiling, 2262 unsigned int BytePerPixel, 2263 enum dm_rotation_angle SourceRotation, 2264 double SwathWidth, 2265 unsigned int ViewportHeight, 2266 unsigned int ViewportXStart, 2267 unsigned int ViewportYStart, 2268 bool GPUVMEnable, 2269 bool HostVMEnable, 2270 unsigned int HostVMMaxNonCachedPageTableLevels, 2271 unsigned int GPUVMMaxPageTableLevels, 2272 unsigned int GPUVMMinPageSizeKBytes, 2273 unsigned int HostVMMinPageSize, 2274 unsigned int PTEBufferSizeInRequests, 2275 unsigned int Pitch, 2276 unsigned int DCCMetaPitch, 2277 unsigned int MacroTileWidth, 2278 unsigned int MacroTileHeight, 2279 2280 /* Output */ 2281 unsigned int *MetaRowByte, 2282 unsigned int *PixelPTEBytesPerRow, 2283 unsigned int *dpte_row_width_ub, 2284 unsigned int *dpte_row_height, 2285 unsigned int *dpte_row_height_linear, 2286 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2287 unsigned int *dpte_row_width_ub_one_row_per_frame, 2288 unsigned int *dpte_row_height_one_row_per_frame, 2289 unsigned int *MetaRequestWidth, 2290 unsigned int *MetaRequestHeight, 2291 unsigned int *meta_row_width, 2292 unsigned int *meta_row_height, 2293 unsigned int *PixelPTEReqWidth, 2294 unsigned int *PixelPTEReqHeight, 2295 unsigned int *PTERequestSize, 2296 unsigned int *DPDE0BytesFrame, 2297 unsigned int *MetaPTEBytesFrame) 2298 { 2299 unsigned int MPDEBytesFrame; 2300 unsigned int DCCMetaSurfaceBytes; 2301 unsigned int ExtraDPDEBytesFrame; 2302 unsigned int PDEAndMetaPTEBytesFrame; 2303 unsigned int HostVMDynamicLevels = 0; 2304 unsigned int MacroTileSizeBytes; 2305 unsigned int vp_height_meta_ub; 2306 unsigned int vp_height_dpte_ub; 2307 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2308 2309 if (GPUVMEnable == true && HostVMEnable == true) { 2310 if (HostVMMinPageSize < 2048) 2311 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2312 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2313 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2314 else 2315 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2316 } 2317 2318 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2319 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2320 if (SurfaceTiling == dm_sw_linear) { 2321 *meta_row_height = 32; 2322 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2323 - dml_floor(ViewportXStart, *MetaRequestWidth); 2324 } else if (!IsVertical(SourceRotation)) { 2325 *meta_row_height = *MetaRequestHeight; 2326 if (ViewportStationary && NumberOfDPPs == 1) { 2327 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2328 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2329 } else { 2330 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2331 } 2332 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2333 } else { 2334 *meta_row_height = *MetaRequestWidth; 2335 if (ViewportStationary && NumberOfDPPs == 1) { 2336 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2337 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2338 } else { 2339 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2340 } 2341 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2342 } 2343 2344 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2345 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2346 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2347 } else if (!IsVertical(SourceRotation)) { 2348 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2349 } else { 2350 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2351 } 2352 2353 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2354 2355 if (GPUVMEnable == true) { 2356 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2357 (8 * 4.0 * 1024), 1) + 1) * 64; 2358 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2359 } else { 2360 *MetaPTEBytesFrame = 0; 2361 MPDEBytesFrame = 0; 2362 } 2363 2364 if (DCCEnable != true) { 2365 *MetaPTEBytesFrame = 0; 2366 MPDEBytesFrame = 0; 2367 *MetaRowByte = 0; 2368 } 2369 2370 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2371 2372 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2373 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2374 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2375 MacroTileHeight - 1, MacroTileHeight) - 2376 dml_floor(ViewportYStart, MacroTileHeight); 2377 } else if (!IsVertical(SourceRotation)) { 2378 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2379 } else { 2380 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2381 } 2382 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2383 (8 * 2097152), 1) + 1); 2384 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2385 } else { 2386 *DPDE0BytesFrame = 0; 2387 ExtraDPDEBytesFrame = 0; 2388 vp_height_dpte_ub = 0; 2389 } 2390 2391 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2392 2393 #ifdef __DML_VBA_DEBUG__ 2394 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2395 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2396 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2397 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2398 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2399 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2400 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2401 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2402 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2403 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2404 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2405 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2406 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2407 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2408 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2409 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2410 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2411 #endif 2412 2413 if (HostVMEnable == true) 2414 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2415 2416 if (SurfaceTiling == dm_sw_linear) { 2417 *PixelPTEReqHeight = 1; 2418 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2419 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2420 *PTERequestSize = 64; 2421 } else if (GPUVMMinPageSizeKBytes == 4) { 2422 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2423 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2424 *PTERequestSize = 128; 2425 } else { 2426 *PixelPTEReqHeight = MacroTileHeight; 2427 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2428 *PTERequestSize = 64; 2429 } 2430 #ifdef __DML_VBA_DEBUG__ 2431 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2432 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2433 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2434 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2435 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2436 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2437 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2438 #endif 2439 2440 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2441 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2442 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2443 (double) *PixelPTEReqWidth; 2444 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2445 *PTERequestSize; 2446 2447 if (SurfaceTiling == dm_sw_linear) { 2448 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2449 *PixelPTEReqWidth / Pitch), 1)); 2450 #ifdef __DML_VBA_DEBUG__ 2451 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2452 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2453 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2454 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2455 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2456 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2457 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2458 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2459 *PixelPTEReqWidth / Pitch), 1)); 2460 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2461 #endif 2462 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2463 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2464 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2465 2466 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2467 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2468 PixelPTEReqWidth_linear / Pitch), 1); 2469 if (*dpte_row_height_linear > 128) 2470 *dpte_row_height_linear = 128; 2471 2472 } else if (!IsVertical(SourceRotation)) { 2473 *dpte_row_height = *PixelPTEReqHeight; 2474 2475 if (GPUVMMinPageSizeKBytes > 64) { 2476 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2477 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2478 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2479 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2480 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2481 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2482 } else { 2483 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2484 *PixelPTEReqWidth; 2485 } 2486 2487 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2488 } else { 2489 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2490 2491 if (ViewportStationary && (NumberOfDPPs == 1)) { 2492 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2493 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2494 } else { 2495 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2496 * *PixelPTEReqHeight; 2497 } 2498 2499 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2500 } 2501 2502 if (GPUVMEnable != true) 2503 *PixelPTEBytesPerRow = 0; 2504 if (HostVMEnable == true) 2505 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2506 2507 #ifdef __DML_VBA_DEBUG__ 2508 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2509 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2510 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2511 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2512 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2513 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2514 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2515 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2516 __func__, *dpte_row_width_ub_one_row_per_frame); 2517 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2518 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2519 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2520 *MetaPTEBytesFrame); 2521 #endif 2522 2523 return PDEAndMetaPTEBytesFrame; 2524 } // CalculateVMAndRowBytes 2525 2526 double dml32_CalculatePrefetchSourceLines( 2527 double VRatio, 2528 unsigned int VTaps, 2529 bool Interlace, 2530 bool ProgressiveToInterlaceUnitInOPP, 2531 unsigned int SwathHeight, 2532 enum dm_rotation_angle SourceRotation, 2533 bool ViewportStationary, 2534 double SwathWidth, 2535 unsigned int ViewportHeight, 2536 unsigned int ViewportXStart, 2537 unsigned int ViewportYStart, 2538 2539 /* Output */ 2540 double *VInitPreFill, 2541 unsigned int *MaxNumSwath) 2542 { 2543 2544 unsigned int vp_start_rot; 2545 unsigned int sw0_tmp; 2546 unsigned int MaxPartialSwath; 2547 double numLines; 2548 2549 #ifdef __DML_VBA_DEBUG__ 2550 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2551 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2552 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2553 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2554 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2555 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2556 #endif 2557 if (ProgressiveToInterlaceUnitInOPP) 2558 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2559 else 2560 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2561 2562 if (ViewportStationary) { 2563 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2564 vp_start_rot = SwathHeight - 2565 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2566 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2567 vp_start_rot = ViewportXStart; 2568 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2569 vp_start_rot = SwathHeight - 2570 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2571 } else { 2572 vp_start_rot = ViewportYStart; 2573 } 2574 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2575 if (sw0_tmp < *VInitPreFill) 2576 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2577 else 2578 *MaxNumSwath = 1; 2579 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2580 } else { 2581 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2582 if (*VInitPreFill > 1) 2583 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2584 else 2585 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2586 } 2587 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2588 2589 #ifdef __DML_VBA_DEBUG__ 2590 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2591 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2592 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2593 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2594 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2595 #endif 2596 return numLines; 2597 2598 } // CalculatePrefetchSourceLines 2599 2600 void dml32_CalculateMALLUseForStaticScreen( 2601 unsigned int NumberOfActiveSurfaces, 2602 unsigned int MALLAllocatedForDCNFinal, 2603 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2604 unsigned int SurfaceSizeInMALL[], 2605 bool one_row_per_frame_fits_in_buffer[], 2606 2607 /* output */ 2608 bool UsesMALLForStaticScreen[]) 2609 { 2610 unsigned int k; 2611 unsigned int SurfaceToAddToMALL; 2612 bool CanAddAnotherSurfaceToMALL; 2613 unsigned int TotalSurfaceSizeInMALL; 2614 2615 TotalSurfaceSizeInMALL = 0; 2616 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2617 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2618 if (UsesMALLForStaticScreen[k]) 2619 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2620 #ifdef __DML_VBA_DEBUG__ 2621 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2622 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2623 #endif 2624 } 2625 2626 SurfaceToAddToMALL = 0; 2627 CanAddAnotherSurfaceToMALL = true; 2628 while (CanAddAnotherSurfaceToMALL) { 2629 CanAddAnotherSurfaceToMALL = false; 2630 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2631 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2632 !UsesMALLForStaticScreen[k] && 2633 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2634 one_row_per_frame_fits_in_buffer[k] && 2635 (!CanAddAnotherSurfaceToMALL || 2636 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2637 CanAddAnotherSurfaceToMALL = true; 2638 SurfaceToAddToMALL = k; 2639 #ifdef __DML_VBA_DEBUG__ 2640 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2641 __func__, k, UseMALLForStaticScreen[k]); 2642 #endif 2643 } 2644 } 2645 if (CanAddAnotherSurfaceToMALL) { 2646 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2647 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2648 2649 #ifdef __DML_VBA_DEBUG__ 2650 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2651 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2652 #endif 2653 2654 } 2655 } 2656 } 2657 2658 void dml32_CalculateRowBandwidth( 2659 bool GPUVMEnable, 2660 enum source_format_class SourcePixelFormat, 2661 double VRatio, 2662 double VRatioChroma, 2663 bool DCCEnable, 2664 double LineTime, 2665 unsigned int MetaRowByteLuma, 2666 unsigned int MetaRowByteChroma, 2667 unsigned int meta_row_height_luma, 2668 unsigned int meta_row_height_chroma, 2669 unsigned int PixelPTEBytesPerRowLuma, 2670 unsigned int PixelPTEBytesPerRowChroma, 2671 unsigned int dpte_row_height_luma, 2672 unsigned int dpte_row_height_chroma, 2673 /* Output */ 2674 double *meta_row_bw, 2675 double *dpte_row_bw) 2676 { 2677 if (DCCEnable != true) { 2678 *meta_row_bw = 0; 2679 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2680 SourcePixelFormat == dm_rgbe_alpha) { 2681 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2682 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2683 } else { 2684 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2685 } 2686 2687 if (GPUVMEnable != true) { 2688 *dpte_row_bw = 0; 2689 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2690 SourcePixelFormat == dm_rgbe_alpha) { 2691 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2692 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2693 } else { 2694 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2695 } 2696 } 2697 2698 double dml32_CalculateUrgentLatency( 2699 double UrgentLatencyPixelDataOnly, 2700 double UrgentLatencyPixelMixedWithVMData, 2701 double UrgentLatencyVMDataOnly, 2702 bool DoUrgentLatencyAdjustment, 2703 double UrgentLatencyAdjustmentFabricClockComponent, 2704 double UrgentLatencyAdjustmentFabricClockReference, 2705 double FabricClock) 2706 { 2707 double ret; 2708 2709 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2710 if (DoUrgentLatencyAdjustment == true) { 2711 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2712 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2713 } 2714 return ret; 2715 } 2716 2717 void dml32_CalculateUrgentBurstFactor( 2718 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2719 unsigned int swath_width_luma_ub, 2720 unsigned int swath_width_chroma_ub, 2721 unsigned int SwathHeightY, 2722 unsigned int SwathHeightC, 2723 double LineTime, 2724 double UrgentLatency, 2725 double CursorBufferSize, 2726 unsigned int CursorWidth, 2727 unsigned int CursorBPP, 2728 double VRatio, 2729 double VRatioC, 2730 double BytePerPixelInDETY, 2731 double BytePerPixelInDETC, 2732 unsigned int DETBufferSizeY, 2733 unsigned int DETBufferSizeC, 2734 /* Output */ 2735 double *UrgentBurstFactorCursor, 2736 double *UrgentBurstFactorLuma, 2737 double *UrgentBurstFactorChroma, 2738 bool *NotEnoughUrgentLatencyHiding) 2739 { 2740 double LinesInDETLuma; 2741 double LinesInDETChroma; 2742 unsigned int LinesInCursorBuffer; 2743 double CursorBufferSizeInTime; 2744 double DETBufferSizeInTimeLuma; 2745 double DETBufferSizeInTimeChroma; 2746 2747 *NotEnoughUrgentLatencyHiding = 0; 2748 2749 if (CursorWidth > 0) { 2750 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2751 (CursorWidth * CursorBPP / 8.0)), 1.0); 2752 if (VRatio > 0) { 2753 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2754 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2755 *NotEnoughUrgentLatencyHiding = 1; 2756 *UrgentBurstFactorCursor = 0; 2757 } else { 2758 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2759 (CursorBufferSizeInTime - UrgentLatency); 2760 } 2761 } else { 2762 *UrgentBurstFactorCursor = 1; 2763 } 2764 } 2765 2766 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2767 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2768 2769 if (VRatio > 0) { 2770 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2771 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2772 *NotEnoughUrgentLatencyHiding = 1; 2773 *UrgentBurstFactorLuma = 0; 2774 } else { 2775 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2776 } 2777 } else { 2778 *UrgentBurstFactorLuma = 1; 2779 } 2780 2781 if (BytePerPixelInDETC > 0) { 2782 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2783 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2784 / swath_width_chroma_ub; 2785 2786 if (VRatio > 0) { 2787 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2788 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2789 *NotEnoughUrgentLatencyHiding = 1; 2790 *UrgentBurstFactorChroma = 0; 2791 } else { 2792 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2793 / (DETBufferSizeInTimeChroma - UrgentLatency); 2794 } 2795 } else { 2796 *UrgentBurstFactorChroma = 1; 2797 } 2798 } 2799 } // CalculateUrgentBurstFactor 2800 2801 void dml32_CalculateDCFCLKDeepSleep( 2802 unsigned int NumberOfActiveSurfaces, 2803 unsigned int BytePerPixelY[], 2804 unsigned int BytePerPixelC[], 2805 double VRatio[], 2806 double VRatioChroma[], 2807 double SwathWidthY[], 2808 double SwathWidthC[], 2809 unsigned int DPPPerSurface[], 2810 double HRatio[], 2811 double HRatioChroma[], 2812 double PixelClock[], 2813 double PSCL_THROUGHPUT[], 2814 double PSCL_THROUGHPUT_CHROMA[], 2815 double Dppclk[], 2816 double ReadBandwidthLuma[], 2817 double ReadBandwidthChroma[], 2818 unsigned int ReturnBusWidth, 2819 2820 /* Output */ 2821 double *DCFClkDeepSleep) 2822 { 2823 unsigned int k; 2824 double DisplayPipeLineDeliveryTimeLuma; 2825 double DisplayPipeLineDeliveryTimeChroma; 2826 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2827 double ReadBandwidth = 0.0; 2828 2829 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2830 2831 if (VRatio[k] <= 1) { 2832 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2833 / PixelClock[k]; 2834 } else { 2835 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2836 } 2837 if (BytePerPixelC[k] == 0) { 2838 DisplayPipeLineDeliveryTimeChroma = 0; 2839 } else { 2840 if (VRatioChroma[k] <= 1) { 2841 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2842 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2843 } else { 2844 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2845 / Dppclk[k]; 2846 } 2847 } 2848 2849 if (BytePerPixelC[k] > 0) { 2850 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2851 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2852 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2853 32.0 / DisplayPipeLineDeliveryTimeChroma); 2854 } else { 2855 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2856 64.0 / DisplayPipeLineDeliveryTimeLuma; 2857 } 2858 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2859 2860 #ifdef __DML_VBA_DEBUG__ 2861 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2862 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2863 #endif 2864 } 2865 2866 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2867 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2868 2869 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2870 2871 #ifdef __DML_VBA_DEBUG__ 2872 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2873 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2874 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2875 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2876 #endif 2877 2878 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2879 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2880 #ifdef __DML_VBA_DEBUG__ 2881 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2882 #endif 2883 } // CalculateDCFCLKDeepSleep 2884 2885 double dml32_CalculateWriteBackDelay( 2886 enum source_format_class WritebackPixelFormat, 2887 double WritebackHRatio, 2888 double WritebackVRatio, 2889 unsigned int WritebackVTaps, 2890 unsigned int WritebackDestinationWidth, 2891 unsigned int WritebackDestinationHeight, 2892 unsigned int WritebackSourceHeight, 2893 unsigned int HTotal) 2894 { 2895 double CalculateWriteBackDelay; 2896 double Line_length; 2897 double Output_lines_last_notclamped; 2898 double WritebackVInit; 2899 2900 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2901 Line_length = dml_max((double) WritebackDestinationWidth, 2902 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2903 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2904 dml_ceil(((double)WritebackSourceHeight - 2905 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2906 if (Output_lines_last_notclamped < 0) { 2907 CalculateWriteBackDelay = 0; 2908 } else { 2909 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2910 (HTotal - WritebackDestinationWidth) + 80; 2911 } 2912 return CalculateWriteBackDelay; 2913 } 2914 2915 void dml32_UseMinimumDCFCLK( 2916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2917 bool DRRDisplay[], 2918 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2919 unsigned int MaxInterDCNTileRepeaters, 2920 unsigned int MaxPrefetchMode, 2921 double DRAMClockChangeLatencyFinal, 2922 double FCLKChangeLatency, 2923 double SREnterPlusExitTime, 2924 unsigned int ReturnBusWidth, 2925 unsigned int RoundTripPingLatencyCycles, 2926 unsigned int ReorderingBytes, 2927 unsigned int PixelChunkSizeInKByte, 2928 unsigned int MetaChunkSize, 2929 bool GPUVMEnable, 2930 unsigned int GPUVMMaxPageTableLevels, 2931 bool HostVMEnable, 2932 unsigned int NumberOfActiveSurfaces, 2933 double HostVMMinPageSize, 2934 unsigned int HostVMMaxNonCachedPageTableLevels, 2935 bool DynamicMetadataVMEnabled, 2936 bool ImmediateFlipRequirement, 2937 bool ProgressiveToInterlaceUnitInOPP, 2938 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2939 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2940 unsigned int VTotal[], 2941 unsigned int VActive[], 2942 unsigned int DynamicMetadataTransmittedBytes[], 2943 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2944 bool Interlace[], 2945 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2946 double RequiredDISPCLK[][2], 2947 double UrgLatency[], 2948 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2949 double ProjectedDCFClkDeepSleep[][2], 2950 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2951 unsigned int TotalNumberOfActiveDPP[][2], 2952 unsigned int TotalNumberOfDCCActiveDPP[][2], 2953 unsigned int dpte_group_bytes[], 2954 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2955 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2956 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2957 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2958 unsigned int BytePerPixelY[], 2959 unsigned int BytePerPixelC[], 2960 unsigned int HTotal[], 2961 double PixelClock[], 2962 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2963 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2964 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2965 bool DynamicMetadataEnable[], 2966 double ReadBandwidthLuma[], 2967 double ReadBandwidthChroma[], 2968 double DCFCLKPerState[], 2969 /* Output */ 2970 double DCFCLKState[][2]) 2971 { 2972 unsigned int i, j, k; 2973 unsigned int dummy1; 2974 double dummy2, dummy3; 2975 double NormalEfficiency; 2976 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2977 2978 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2979 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2980 for (j = 0; j <= 1; ++j) { 2981 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2982 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2983 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2984 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2985 double MinimumTWait = 0.0; 2986 double DPTEBandwidth; 2987 double DCFCLKRequiredForAverageBandwidth; 2988 unsigned int ExtraLatencyBytes; 2989 double ExtraLatencyCycles; 2990 double DCFCLKRequiredForPeakBandwidth; 2991 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2992 double MinimumTvmPlus2Tr0; 2993 2994 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 2995 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2996 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 2997 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 2998 / (15.75 * HTotal[k] / PixelClock[k]); 2999 } 3000 3001 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 3002 NoOfDPPState[k] = NoOfDPP[i][j][k]; 3003 3004 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 3005 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 3006 3007 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 3008 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 3009 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 3010 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 3011 HostVMMaxNonCachedPageTableLevels); 3012 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 3013 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 3014 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3015 double DCFCLKCyclesRequiredInPrefetch; 3016 double PrefetchTime; 3017 3018 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 3019 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 3020 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 3021 * BytePerPixelC[k]) / NormalEfficiency 3022 / ReturnBusWidth; 3023 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 3024 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 3025 / NormalEfficiency / ReturnBusWidth 3026 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3027 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3028 / ReturnBusWidth 3029 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3030 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3031 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3032 * HTotal[k] / PixelClock[k]; 3033 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3034 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3035 UrgLatency[i] * GPUVMMaxPageTableLevels * 3036 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3037 3038 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3039 UseMALLForPStateChange[k], 3040 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3041 DRRDisplay[k], 3042 DRAMClockChangeLatencyFinal, 3043 FCLKChangeLatency, 3044 UrgLatency[i], 3045 SREnterPlusExitTime); 3046 3047 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3048 MinimumTWait - UrgLatency[i] * 3049 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3050 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3051 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3052 DynamicMetadataVMExtraLatency[k]; 3053 3054 if (PrefetchTime > 0) { 3055 double ExpectedVRatioPrefetch; 3056 3057 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3058 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3059 DCFCLKCyclesRequiredInPrefetch); 3060 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3061 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3062 PrefetchPixelLinesTime[k] * 3063 dml_max(1.0, ExpectedVRatioPrefetch) * 3064 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3065 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3067 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3068 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3069 NormalEfficiency / ReturnBusWidth; 3070 } 3071 } else { 3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3073 } 3074 if (DynamicMetadataEnable[k] == true) { 3075 double TSetupPipe; 3076 double TdmbfPipe; 3077 double TdmsksPipe; 3078 double TdmecPipe; 3079 double AllowedTimeForUrgentExtraLatency; 3080 3081 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3082 MaxInterDCNTileRepeaters, 3083 RequiredDPPCLKPerSurface[i][j][k], 3084 RequiredDISPCLK[i][j], 3085 ProjectedDCFClkDeepSleep[i][j], 3086 PixelClock[k], 3087 HTotal[k], 3088 VTotal[k] - VActive[k], 3089 DynamicMetadataTransmittedBytes[k], 3090 DynamicMetadataLinesBeforeActiveRequired[k], 3091 Interlace[k], 3092 ProgressiveToInterlaceUnitInOPP, 3093 3094 /* output */ 3095 &TSetupPipe, 3096 &TdmbfPipe, 3097 &TdmecPipe, 3098 &TdmsksPipe, 3099 &dummy1, 3100 &dummy2, 3101 &dummy3); 3102 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3103 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3104 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3105 if (AllowedTimeForUrgentExtraLatency > 0) 3106 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3107 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3108 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3109 else 3110 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3111 } 3112 } 3113 DCFCLKRequiredForPeakBandwidth = 0; 3114 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3115 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3116 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3117 } 3118 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3119 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3120 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3121 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3122 double MaximumTvmPlus2Tr0PlusTsw; 3123 3124 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3125 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3126 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3127 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3128 } else { 3129 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3130 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3131 MinimumTvmPlus2Tr0 - 3132 PrefetchPixelLinesTime[k] / 4), 3133 (2 * ExtraLatencyCycles + 3134 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3135 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3136 } 3137 } 3138 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3139 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3140 } 3141 } 3142 } 3143 3144 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3145 unsigned int TotalNumberOfActiveDPP, 3146 unsigned int PixelChunkSizeInKByte, 3147 unsigned int TotalNumberOfDCCActiveDPP, 3148 unsigned int MetaChunkSize, 3149 bool GPUVMEnable, 3150 bool HostVMEnable, 3151 unsigned int NumberOfActiveSurfaces, 3152 unsigned int NumberOfDPP[], 3153 unsigned int dpte_group_bytes[], 3154 double HostVMInefficiencyFactor, 3155 double HostVMMinPageSize, 3156 unsigned int HostVMMaxNonCachedPageTableLevels) 3157 { 3158 unsigned int k; 3159 double ret; 3160 unsigned int HostVMDynamicLevels; 3161 3162 if (GPUVMEnable == true && HostVMEnable == true) { 3163 if (HostVMMinPageSize < 2048) 3164 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3165 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3166 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3167 else 3168 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3169 } else { 3170 HostVMDynamicLevels = 0; 3171 } 3172 3173 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3174 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3175 3176 if (GPUVMEnable == true) { 3177 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3178 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3179 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3180 } 3181 } 3182 return ret; 3183 } 3184 3185 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3186 unsigned int MaxInterDCNTileRepeaters, 3187 double Dppclk, 3188 double Dispclk, 3189 double DCFClkDeepSleep, 3190 double PixelClock, 3191 unsigned int HTotal, 3192 unsigned int VBlank, 3193 unsigned int DynamicMetadataTransmittedBytes, 3194 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3195 unsigned int InterlaceEnable, 3196 bool ProgressiveToInterlaceUnitInOPP, 3197 3198 /* output */ 3199 double *TSetup, 3200 double *Tdmbf, 3201 double *Tdmec, 3202 double *Tdmsks, 3203 unsigned int *VUpdateOffsetPix, 3204 double *VUpdateWidthPix, 3205 double *VReadyOffsetPix) 3206 { 3207 double TotalRepeaterDelayTime; 3208 3209 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3210 *VUpdateWidthPix = 3211 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3212 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3213 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3214 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3215 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3216 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3217 *Tdmec = HTotal / PixelClock; 3218 3219 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3220 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3221 else 3222 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3223 3224 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3225 *Tdmsks = *Tdmsks / 2; 3226 #ifdef __DML_VBA_DEBUG__ 3227 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3228 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3229 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3230 3231 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3232 __func__, DynamicMetadataLinesBeforeActiveRequired); 3233 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3234 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3235 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3236 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3237 #endif 3238 } 3239 3240 double dml32_CalculateTWait( 3241 unsigned int PrefetchMode, 3242 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3243 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3244 bool DRRDisplay, 3245 double DRAMClockChangeLatency, 3246 double FCLKChangeLatency, 3247 double UrgentLatency, 3248 double SREnterPlusExitTime) 3249 { 3250 double TWait = 0.0; 3251 3252 if (PrefetchMode == 0 && 3253 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3254 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3255 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3256 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3257 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3258 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3259 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3260 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3261 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3262 } else { 3263 TWait = UrgentLatency; 3264 } 3265 3266 #ifdef __DML_VBA_DEBUG__ 3267 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3268 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3269 #endif 3270 return TWait; 3271 } // CalculateTWait 3272 3273 // Function: get_return_bw_mbps 3274 // Megabyte per second 3275 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3276 const int VoltageLevel, 3277 const bool HostVMEnable, 3278 const double DCFCLK, 3279 const double FabricClock, 3280 const double DRAMSpeed) 3281 { 3282 double ReturnBW = 0.; 3283 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3284 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3285 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3286 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3287 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3288 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3289 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3290 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3291 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3292 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3293 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3294 3295 if (HostVMEnable != true) 3296 ReturnBW = PixelDataOnlyReturnBW; 3297 else 3298 ReturnBW = PixelMixedWithVMDataReturnBW; 3299 3300 #ifdef __DML_VBA_DEBUG__ 3301 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3302 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3303 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3304 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3305 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3306 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3307 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3308 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3309 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3310 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3311 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3312 #endif 3313 return ReturnBW; 3314 } 3315 3316 // Function: get_return_bw_mbps_vm_only 3317 // Megabyte per second 3318 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3319 const int VoltageLevel, 3320 const double DCFCLK, 3321 const double FabricClock, 3322 const double DRAMSpeed) 3323 { 3324 double VMDataOnlyReturnBW = dml_min3( 3325 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3326 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3327 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3328 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3329 * (VoltageLevel < 2 ? 3330 soc->pct_ideal_dram_bw_after_urgent_strobe : 3331 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3332 #ifdef __DML_VBA_DEBUG__ 3333 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3334 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3335 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3336 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3337 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3338 #endif 3339 return VMDataOnlyReturnBW; 3340 } 3341 3342 double dml32_CalculateExtraLatency( 3343 unsigned int RoundTripPingLatencyCycles, 3344 unsigned int ReorderingBytes, 3345 double DCFCLK, 3346 unsigned int TotalNumberOfActiveDPP, 3347 unsigned int PixelChunkSizeInKByte, 3348 unsigned int TotalNumberOfDCCActiveDPP, 3349 unsigned int MetaChunkSize, 3350 double ReturnBW, 3351 bool GPUVMEnable, 3352 bool HostVMEnable, 3353 unsigned int NumberOfActiveSurfaces, 3354 unsigned int NumberOfDPP[], 3355 unsigned int dpte_group_bytes[], 3356 double HostVMInefficiencyFactor, 3357 double HostVMMinPageSize, 3358 unsigned int HostVMMaxNonCachedPageTableLevels) 3359 { 3360 double ExtraLatencyBytes; 3361 double ExtraLatency; 3362 3363 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3364 ReorderingBytes, 3365 TotalNumberOfActiveDPP, 3366 PixelChunkSizeInKByte, 3367 TotalNumberOfDCCActiveDPP, 3368 MetaChunkSize, 3369 GPUVMEnable, 3370 HostVMEnable, 3371 NumberOfActiveSurfaces, 3372 NumberOfDPP, 3373 dpte_group_bytes, 3374 HostVMInefficiencyFactor, 3375 HostVMMinPageSize, 3376 HostVMMaxNonCachedPageTableLevels); 3377 3378 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3379 3380 #ifdef __DML_VBA_DEBUG__ 3381 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3382 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3383 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3384 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3385 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3386 #endif 3387 3388 return ExtraLatency; 3389 } // CalculateExtraLatency 3390 3391 bool dml32_CalculatePrefetchSchedule( 3392 struct vba_vars_st *v, 3393 unsigned int k, 3394 double HostVMInefficiencyFactor, 3395 DmlPipe *myPipe, 3396 unsigned int DSCDelay, 3397 unsigned int DPP_RECOUT_WIDTH, 3398 unsigned int VStartup, 3399 unsigned int MaxVStartup, 3400 double UrgentLatency, 3401 double UrgentExtraLatency, 3402 double TCalc, 3403 unsigned int PDEAndMetaPTEBytesFrame, 3404 unsigned int MetaRowByte, 3405 unsigned int PixelPTEBytesPerRow, 3406 double PrefetchSourceLinesY, 3407 unsigned int SwathWidthY, 3408 unsigned int VInitPreFillY, 3409 unsigned int MaxNumSwathY, 3410 double PrefetchSourceLinesC, 3411 unsigned int SwathWidthC, 3412 unsigned int VInitPreFillC, 3413 unsigned int MaxNumSwathC, 3414 unsigned int swath_width_luma_ub, 3415 unsigned int swath_width_chroma_ub, 3416 unsigned int SwathHeightY, 3417 unsigned int SwathHeightC, 3418 double TWait, 3419 /* Output */ 3420 double *DSTXAfterScaler, 3421 double *DSTYAfterScaler, 3422 double *DestinationLinesForPrefetch, 3423 double *PrefetchBandwidth, 3424 double *DestinationLinesToRequestVMInVBlank, 3425 double *DestinationLinesToRequestRowInVBlank, 3426 double *VRatioPrefetchY, 3427 double *VRatioPrefetchC, 3428 double *RequiredPrefetchPixDataBWLuma, 3429 double *RequiredPrefetchPixDataBWChroma, 3430 bool *NotEnoughTimeForDynamicMetadata, 3431 double *Tno_bw, 3432 double *prefetch_vmrow_bw, 3433 double *Tdmdl_vm, 3434 double *Tdmdl, 3435 double *TSetup, 3436 unsigned int *VUpdateOffsetPix, 3437 double *VUpdateWidthPix, 3438 double *VReadyOffsetPix) 3439 { 3440 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 3441 bool MyError = false; 3442 unsigned int DPPCycles, DISPCLKCycles; 3443 double DSTTotalPixelsAfterScaler; 3444 double LineTime; 3445 double dst_y_prefetch_equ; 3446 double prefetch_bw_oto; 3447 double Tvm_oto; 3448 double Tr0_oto; 3449 double Tvm_oto_lines; 3450 double Tr0_oto_lines; 3451 double dst_y_prefetch_oto; 3452 double TimeForFetchingMetaPTE = 0; 3453 double TimeForFetchingRowInVBlank = 0; 3454 double LinesToRequestPrefetchPixelData = 0; 3455 unsigned int HostVMDynamicLevelsTrips; 3456 double trip_to_mem; 3457 double Tvm_trips; 3458 double Tr0_trips; 3459 double Tvm_trips_rounded; 3460 double Tr0_trips_rounded; 3461 double Lsw_oto; 3462 double Tpre_rounded; 3463 double prefetch_bw_equ; 3464 double Tvm_equ; 3465 double Tr0_equ; 3466 double Tdmbf; 3467 double Tdmec; 3468 double Tdmsks; 3469 double prefetch_sw_bytes; 3470 double bytes_pp; 3471 double dep_bytes; 3472 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3473 double min_Lsw; 3474 double Tsw_est1 = 0; 3475 double Tsw_est3 = 0; 3476 3477 if (v->GPUVMEnable == true && v->HostVMEnable == true) 3478 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3479 else 3480 HostVMDynamicLevelsTrips = 0; 3481 #ifdef __DML_VBA_DEBUG__ 3482 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); 3483 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); 3484 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3485 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3486 __func__, v->HostVMEnable, HostVMInefficiencyFactor); 3487 #endif 3488 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3489 v->MaxInterDCNTileRepeaters, 3490 myPipe->Dppclk, 3491 myPipe->Dispclk, 3492 myPipe->DCFClkDeepSleep, 3493 myPipe->PixelClock, 3494 myPipe->HTotal, 3495 myPipe->VBlank, 3496 v->DynamicMetadataTransmittedBytes[k], 3497 v->DynamicMetadataLinesBeforeActiveRequired[k], 3498 myPipe->InterlaceEnable, 3499 myPipe->ProgressiveToInterlaceUnitInOPP, 3500 TSetup, 3501 3502 /* output */ 3503 &Tdmbf, 3504 &Tdmec, 3505 &Tdmsks, 3506 VUpdateOffsetPix, 3507 VUpdateWidthPix, 3508 VReadyOffsetPix); 3509 3510 LineTime = myPipe->HTotal / myPipe->PixelClock; 3511 trip_to_mem = UrgentLatency; 3512 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3513 3514 if (v->DynamicMetadataVMEnabled == true) 3515 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3516 else 3517 *Tdmdl = TWait + UrgentExtraLatency; 3518 3519 #ifdef __DML_VBA_ALLOW_DELTA__ 3520 if (v->DynamicMetadataEnable[k] == false) 3521 *Tdmdl = 0.0; 3522 #endif 3523 3524 if (v->DynamicMetadataEnable[k] == true) { 3525 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3526 *NotEnoughTimeForDynamicMetadata = true; 3527 #ifdef __DML_VBA_DEBUG__ 3528 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3529 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3530 __func__, Tdmbf); 3531 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3532 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3533 __func__, Tdmsks); 3534 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3535 __func__, *Tdmdl); 3536 #endif 3537 } else { 3538 *NotEnoughTimeForDynamicMetadata = false; 3539 } 3540 } else { 3541 *NotEnoughTimeForDynamicMetadata = false; 3542 } 3543 3544 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && 3545 v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 3546 3547 if (myPipe->ScalerEnabled) 3548 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 3549 else 3550 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 3551 3552 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 3553 3554 DISPCLKCycles = v->DISPCLKDelaySubtotal; 3555 3556 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3557 return true; 3558 3559 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3560 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3561 3562 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3563 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3564 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3565 myPipe->HActive / 2 : 0) 3566 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3567 3568 #ifdef __DML_VBA_DEBUG__ 3569 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3570 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3571 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3572 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3573 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3574 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3575 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3576 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3577 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3578 #endif 3579 3580 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3581 *DSTYAfterScaler = 1; 3582 else 3583 *DSTYAfterScaler = 0; 3584 3585 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3586 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3587 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3588 #ifdef __DML_VBA_DEBUG__ 3589 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3590 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3591 #endif 3592 3593 MyError = false; 3594 3595 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3596 3597 if (v->GPUVMEnable == true) { 3598 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3599 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3600 if (v->GPUVMMaxPageTableLevels >= 3) { 3601 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3602 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3603 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { 3604 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3605 4.0 * LineTime; // VBA_ERROR 3606 *Tno_bw = UrgentExtraLatency; 3607 } else { 3608 *Tno_bw = 0; 3609 } 3610 } else if (myPipe->DCCEnable == true) { 3611 Tvm_trips_rounded = LineTime / 4.0; 3612 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3613 *Tno_bw = 0; 3614 } else { 3615 Tvm_trips_rounded = LineTime / 4.0; 3616 Tr0_trips_rounded = LineTime / 2.0; 3617 *Tno_bw = 0; 3618 } 3619 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3620 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3621 3622 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3623 || myPipe->SourcePixelFormat == dm_420_12) { 3624 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3625 } else { 3626 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3627 } 3628 3629 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3630 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3631 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3632 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3633 3634 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3635 min_Lsw = dml_max(min_Lsw, 1.0); 3636 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3637 3638 if (v->GPUVMEnable == true) { 3639 Tvm_oto = dml_max3( 3640 Tvm_trips, 3641 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3642 LineTime / 4.0); 3643 } else 3644 Tvm_oto = LineTime / 4.0; 3645 3646 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3647 Tr0_oto = dml_max4( 3648 Tr0_trips, 3649 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3650 (LineTime - Tvm_oto)/2.0, 3651 LineTime / 4.0); 3652 #ifdef __DML_VBA_DEBUG__ 3653 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3654 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3655 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3656 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3657 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3658 #endif 3659 } else 3660 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3661 3662 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3663 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3664 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3665 3666 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3667 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3668 3669 #ifdef __DML_VBA_DEBUG__ 3670 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3671 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3672 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3673 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3674 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3675 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3676 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3677 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3678 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3679 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3680 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3681 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3682 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3683 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3684 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3685 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3686 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3687 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3688 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3689 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3690 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3691 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3692 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3693 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3694 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3695 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3696 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3697 #endif 3698 3699 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3700 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3701 #ifdef __DML_VBA_DEBUG__ 3702 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3703 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3704 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3705 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3706 __func__, VStartup * LineTime); 3707 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3708 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3709 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3710 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3711 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3712 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3713 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3714 __func__, *DSTYAfterScaler); 3715 #endif 3716 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3717 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3718 3719 if (prefetch_sw_bytes < dep_bytes) 3720 prefetch_sw_bytes = 2 * dep_bytes; 3721 3722 *PrefetchBandwidth = 0; 3723 *DestinationLinesToRequestVMInVBlank = 0; 3724 *DestinationLinesToRequestRowInVBlank = 0; 3725 *VRatioPrefetchY = 0; 3726 *VRatioPrefetchC = 0; 3727 *RequiredPrefetchPixDataBWLuma = 0; 3728 if (dst_y_prefetch_equ > 1) { 3729 double PrefetchBandwidth1; 3730 double PrefetchBandwidth2; 3731 double PrefetchBandwidth3; 3732 double PrefetchBandwidth4; 3733 3734 if (Tpre_rounded - *Tno_bw > 0) { 3735 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3736 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3737 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3738 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3739 } else 3740 PrefetchBandwidth1 = 0; 3741 3742 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3743 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3744 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3745 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3746 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3747 } 3748 3749 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3750 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3751 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3752 else 3753 PrefetchBandwidth2 = 0; 3754 3755 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3756 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3757 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3758 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3759 } else 3760 PrefetchBandwidth3 = 0; 3761 3762 3763 if (VStartup == MaxVStartup && 3764 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3765 LineTime - Tvm_trips_rounded > 0) { 3766 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3767 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3768 } 3769 3770 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3771 PrefetchBandwidth4 = prefetch_sw_bytes / 3772 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3773 } else { 3774 PrefetchBandwidth4 = 0; 3775 } 3776 3777 #ifdef __DML_VBA_DEBUG__ 3778 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3779 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3780 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3781 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3782 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3783 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3784 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3785 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3786 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3787 #endif 3788 { 3789 bool Case1OK; 3790 bool Case2OK; 3791 bool Case3OK; 3792 3793 if (PrefetchBandwidth1 > 0) { 3794 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3795 >= Tvm_trips_rounded 3796 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3797 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3798 Case1OK = true; 3799 } else { 3800 Case1OK = false; 3801 } 3802 } else { 3803 Case1OK = false; 3804 } 3805 3806 if (PrefetchBandwidth2 > 0) { 3807 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3808 >= Tvm_trips_rounded 3809 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3810 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3811 Case2OK = true; 3812 } else { 3813 Case2OK = false; 3814 } 3815 } else { 3816 Case2OK = false; 3817 } 3818 3819 if (PrefetchBandwidth3 > 0) { 3820 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3821 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3822 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3823 Tr0_trips_rounded) { 3824 Case3OK = true; 3825 } else { 3826 Case3OK = false; 3827 } 3828 } else { 3829 Case3OK = false; 3830 } 3831 3832 if (Case1OK) 3833 prefetch_bw_equ = PrefetchBandwidth1; 3834 else if (Case2OK) 3835 prefetch_bw_equ = PrefetchBandwidth2; 3836 else if (Case3OK) 3837 prefetch_bw_equ = PrefetchBandwidth3; 3838 else 3839 prefetch_bw_equ = PrefetchBandwidth4; 3840 3841 #ifdef __DML_VBA_DEBUG__ 3842 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3843 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3844 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3845 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3846 #endif 3847 3848 if (prefetch_bw_equ > 0) { 3849 if (v->GPUVMEnable == true) { 3850 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3851 HostVMInefficiencyFactor / prefetch_bw_equ, 3852 Tvm_trips, LineTime / 4); 3853 } else { 3854 Tvm_equ = LineTime / 4; 3855 } 3856 3857 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3858 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3859 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3860 (LineTime - Tvm_equ) / 2, LineTime / 4); 3861 } else { 3862 Tr0_equ = (LineTime - Tvm_equ) / 2; 3863 } 3864 } else { 3865 Tvm_equ = 0; 3866 Tr0_equ = 0; 3867 #ifdef __DML_VBA_DEBUG__ 3868 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3869 #endif 3870 } 3871 } 3872 3873 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3874 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3875 TimeForFetchingMetaPTE = Tvm_oto; 3876 TimeForFetchingRowInVBlank = Tr0_oto; 3877 *PrefetchBandwidth = prefetch_bw_oto; 3878 } else { 3879 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3880 TimeForFetchingMetaPTE = Tvm_equ; 3881 TimeForFetchingRowInVBlank = Tr0_equ; 3882 *PrefetchBandwidth = prefetch_bw_equ; 3883 } 3884 3885 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3886 3887 *DestinationLinesToRequestRowInVBlank = 3888 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3889 3890 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3891 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3892 3893 #ifdef __DML_VBA_DEBUG__ 3894 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3895 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3896 __func__, *DestinationLinesToRequestVMInVBlank); 3897 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3898 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3899 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3900 __func__, *DestinationLinesToRequestRowInVBlank); 3901 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3902 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3903 #endif 3904 3905 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3906 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3907 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3908 #ifdef __DML_VBA_DEBUG__ 3909 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3910 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3911 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3912 #endif 3913 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3914 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3915 *VRatioPrefetchY = 3916 dml_max((double) PrefetchSourceLinesY / 3917 LinesToRequestPrefetchPixelData, 3918 (double) MaxNumSwathY * SwathHeightY / 3919 (LinesToRequestPrefetchPixelData - 3920 (VInitPreFillY - 3.0) / 2.0)); 3921 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3922 } else { 3923 MyError = true; 3924 *VRatioPrefetchY = 0; 3925 } 3926 #ifdef __DML_VBA_DEBUG__ 3927 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3928 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3929 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3930 #endif 3931 } 3932 3933 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3934 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3935 3936 #ifdef __DML_VBA_DEBUG__ 3937 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3938 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3939 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3940 #endif 3941 if ((SwathHeightC > 4)) { 3942 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3943 *VRatioPrefetchC = 3944 dml_max(*VRatioPrefetchC, 3945 (double) MaxNumSwathC * SwathHeightC / 3946 (LinesToRequestPrefetchPixelData - 3947 (VInitPreFillC - 3.0) / 2.0)); 3948 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3949 } else { 3950 MyError = true; 3951 *VRatioPrefetchC = 0; 3952 } 3953 #ifdef __DML_VBA_DEBUG__ 3954 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3955 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3956 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3957 #endif 3958 } 3959 3960 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3961 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3962 / LineTime; 3963 3964 #ifdef __DML_VBA_DEBUG__ 3965 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3966 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3967 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3968 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3969 __func__, *RequiredPrefetchPixDataBWLuma); 3970 #endif 3971 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3972 LinesToRequestPrefetchPixelData 3973 * myPipe->BytePerPixelC 3974 * swath_width_chroma_ub / LineTime; 3975 } else { 3976 MyError = true; 3977 #ifdef __DML_VBA_DEBUG__ 3978 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3979 __func__, LinesToRequestPrefetchPixelData); 3980 #endif 3981 *VRatioPrefetchY = 0; 3982 *VRatioPrefetchC = 0; 3983 *RequiredPrefetchPixDataBWLuma = 0; 3984 *RequiredPrefetchPixDataBWChroma = 0; 3985 } 3986 #ifdef __DML_VBA_DEBUG__ 3987 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3988 (double)LinesToRequestPrefetchPixelData * LineTime + 3989 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 3990 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 3991 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 3992 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 3993 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 3994 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 3995 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3996 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 3997 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 3998 PixelPTEBytesPerRow); 3999 #endif 4000 } else { 4001 MyError = true; 4002 #ifdef __DML_VBA_DEBUG__ 4003 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 4004 __func__, dst_y_prefetch_equ); 4005 #endif 4006 } 4007 4008 { 4009 double prefetch_vm_bw; 4010 double prefetch_row_bw; 4011 4012 if (PDEAndMetaPTEBytesFrame == 0) { 4013 prefetch_vm_bw = 0; 4014 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4015 #ifdef __DML_VBA_DEBUG__ 4016 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4017 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4018 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4019 __func__, *DestinationLinesToRequestVMInVBlank); 4020 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4021 #endif 4022 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4023 (*DestinationLinesToRequestVMInVBlank * LineTime); 4024 #ifdef __DML_VBA_DEBUG__ 4025 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4026 #endif 4027 } else { 4028 prefetch_vm_bw = 0; 4029 MyError = true; 4030 #ifdef __DML_VBA_DEBUG__ 4031 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4032 __func__, *DestinationLinesToRequestVMInVBlank); 4033 #endif 4034 } 4035 4036 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4037 prefetch_row_bw = 0; 4038 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4039 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4040 (*DestinationLinesToRequestRowInVBlank * LineTime); 4041 4042 #ifdef __DML_VBA_DEBUG__ 4043 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4044 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4045 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4046 __func__, *DestinationLinesToRequestRowInVBlank); 4047 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4048 #endif 4049 } else { 4050 prefetch_row_bw = 0; 4051 MyError = true; 4052 #ifdef __DML_VBA_DEBUG__ 4053 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4054 __func__, *DestinationLinesToRequestRowInVBlank); 4055 #endif 4056 } 4057 4058 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4059 } 4060 4061 if (MyError) { 4062 *PrefetchBandwidth = 0; 4063 TimeForFetchingMetaPTE = 0; 4064 TimeForFetchingRowInVBlank = 0; 4065 *DestinationLinesToRequestVMInVBlank = 0; 4066 *DestinationLinesToRequestRowInVBlank = 0; 4067 *DestinationLinesForPrefetch = 0; 4068 LinesToRequestPrefetchPixelData = 0; 4069 *VRatioPrefetchY = 0; 4070 *VRatioPrefetchC = 0; 4071 *RequiredPrefetchPixDataBWLuma = 0; 4072 *RequiredPrefetchPixDataBWChroma = 0; 4073 } 4074 4075 return MyError; 4076 } // CalculatePrefetchSchedule 4077 4078 void dml32_CalculateFlipSchedule( 4079 double HostVMInefficiencyFactor, 4080 double UrgentExtraLatency, 4081 double UrgentLatency, 4082 unsigned int GPUVMMaxPageTableLevels, 4083 bool HostVMEnable, 4084 unsigned int HostVMMaxNonCachedPageTableLevels, 4085 bool GPUVMEnable, 4086 double HostVMMinPageSize, 4087 double PDEAndMetaPTEBytesPerFrame, 4088 double MetaRowBytes, 4089 double DPTEBytesPerRow, 4090 double BandwidthAvailableForImmediateFlip, 4091 unsigned int TotImmediateFlipBytes, 4092 enum source_format_class SourcePixelFormat, 4093 double LineTime, 4094 double VRatio, 4095 double VRatioChroma, 4096 double Tno_bw, 4097 bool DCCEnable, 4098 unsigned int dpte_row_height, 4099 unsigned int meta_row_height, 4100 unsigned int dpte_row_height_chroma, 4101 unsigned int meta_row_height_chroma, 4102 bool use_one_row_for_frame_flip, 4103 4104 /* Output */ 4105 double *DestinationLinesToRequestVMInImmediateFlip, 4106 double *DestinationLinesToRequestRowInImmediateFlip, 4107 double *final_flip_bw, 4108 bool *ImmediateFlipSupportedForPipe) 4109 { 4110 double min_row_time = 0.0; 4111 unsigned int HostVMDynamicLevelsTrips; 4112 double TimeForFetchingMetaPTEImmediateFlip; 4113 double TimeForFetchingRowInVBlankImmediateFlip; 4114 double ImmediateFlipBW; 4115 4116 if (GPUVMEnable == true && HostVMEnable == true) 4117 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4118 else 4119 HostVMDynamicLevelsTrips = 0; 4120 4121 #ifdef __DML_VBA_DEBUG__ 4122 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4123 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4124 #endif 4125 4126 if (TotImmediateFlipBytes > 0) { 4127 if (use_one_row_for_frame_flip) { 4128 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4129 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4130 } else { 4131 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4132 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4133 } 4134 if (GPUVMEnable == true) { 4135 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4136 HostVMInefficiencyFactor / ImmediateFlipBW, 4137 UrgentExtraLatency + UrgentLatency * 4138 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4139 LineTime / 4.0); 4140 } else { 4141 TimeForFetchingMetaPTEImmediateFlip = 0; 4142 } 4143 if ((GPUVMEnable == true || DCCEnable == true)) { 4144 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4145 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4146 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4147 } else { 4148 TimeForFetchingRowInVBlankImmediateFlip = 0; 4149 } 4150 4151 *DestinationLinesToRequestVMInImmediateFlip = 4152 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4153 *DestinationLinesToRequestRowInImmediateFlip = 4154 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4155 4156 if (GPUVMEnable == true) { 4157 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4158 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4159 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4160 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4161 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4162 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4163 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4164 } else { 4165 *final_flip_bw = 0; 4166 } 4167 } else { 4168 TimeForFetchingMetaPTEImmediateFlip = 0; 4169 TimeForFetchingRowInVBlankImmediateFlip = 0; 4170 *DestinationLinesToRequestVMInImmediateFlip = 0; 4171 *DestinationLinesToRequestRowInImmediateFlip = 0; 4172 *final_flip_bw = 0; 4173 } 4174 4175 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4176 if (GPUVMEnable == true && DCCEnable != true) { 4177 min_row_time = dml_min(dpte_row_height * 4178 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4179 } else if (GPUVMEnable != true && DCCEnable == true) { 4180 min_row_time = dml_min(meta_row_height * 4181 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4182 } else { 4183 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4184 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4185 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4186 } 4187 } else { 4188 if (GPUVMEnable == true && DCCEnable != true) { 4189 min_row_time = dpte_row_height * LineTime / VRatio; 4190 } else if (GPUVMEnable != true && DCCEnable == true) { 4191 min_row_time = meta_row_height * LineTime / VRatio; 4192 } else { 4193 min_row_time = 4194 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4195 } 4196 } 4197 4198 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4199 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4200 > min_row_time) { 4201 *ImmediateFlipSupportedForPipe = false; 4202 } else { 4203 *ImmediateFlipSupportedForPipe = true; 4204 } 4205 4206 #ifdef __DML_VBA_DEBUG__ 4207 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4208 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4209 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4210 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4211 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4212 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4213 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4214 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4215 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4216 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4217 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4218 #endif 4219 } // CalculateFlipSchedule 4220 4221 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4222 struct vba_vars_st *v, 4223 unsigned int PrefetchMode, 4224 double DCFCLK, 4225 double ReturnBW, 4226 SOCParametersList mmSOCParameters, 4227 double SOCCLK, 4228 double DCFClkDeepSleep, 4229 unsigned int DETBufferSizeY[], 4230 unsigned int DETBufferSizeC[], 4231 unsigned int SwathHeightY[], 4232 unsigned int SwathHeightC[], 4233 double SwathWidthY[], 4234 double SwathWidthC[], 4235 unsigned int DPPPerSurface[], 4236 double BytePerPixelDETY[], 4237 double BytePerPixelDETC[], 4238 double DSTXAfterScaler[], 4239 double DSTYAfterScaler[], 4240 bool UnboundedRequestEnabled, 4241 unsigned int CompressedBufferSizeInkByte, 4242 4243 /* Output */ 4244 enum clock_change_support *DRAMClockChangeSupport, 4245 double MaxActiveDRAMClockChangeLatencySupported[], 4246 unsigned int SubViewportLinesNeededInMALL[], 4247 enum dm_fclock_change_support *FCLKChangeSupport, 4248 double *MinActiveFCLKChangeLatencySupported, 4249 bool *USRRetrainingSupport, 4250 double ActiveDRAMClockChangeLatencyMargin[]) 4251 { 4252 unsigned int i, j, k; 4253 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4254 unsigned int DRAMClockChangeSupportNumber = 0; 4255 unsigned int LastSurfaceWithoutMargin; 4256 unsigned int DRAMClockChangeMethod = 0; 4257 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4258 double MinActiveFCLKChangeMargin = 0.; 4259 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4260 double ActiveClockChangeLatencyHidingY; 4261 double ActiveClockChangeLatencyHidingC; 4262 double ActiveClockChangeLatencyHiding; 4263 double EffectiveDETBufferSizeY; 4264 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4265 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4266 double TotalPixelBW = 0.0; 4267 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4268 double EffectiveLBLatencyHidingY; 4269 double EffectiveLBLatencyHidingC; 4270 double LinesInDETY[DC__NUM_DPP__MAX]; 4271 double LinesInDETC[DC__NUM_DPP__MAX]; 4272 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4273 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4274 double FullDETBufferingTimeY; 4275 double FullDETBufferingTimeC; 4276 double WritebackDRAMClockChangeLatencyMargin; 4277 double WritebackFCLKChangeLatencyMargin; 4278 double WritebackLatencyHiding; 4279 bool SameTimingForFCLKChange; 4280 4281 unsigned int TotalActiveWriteback = 0; 4282 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4283 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4284 4285 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4286 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4287 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4288 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; 4289 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; 4290 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4291 + 10 / DCFClkDeepSleep; 4292 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4293 + 10 / DCFClkDeepSleep; 4294 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4295 + 10 / DCFClkDeepSleep; 4296 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4297 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4298 4299 #ifdef __DML_VBA_DEBUG__ 4300 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4301 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4302 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4303 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); 4304 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); 4305 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); 4306 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); 4307 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); 4308 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); 4309 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); 4310 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4311 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); 4312 #endif 4313 4314 4315 TotalActiveWriteback = 0; 4316 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4317 if (v->WritebackEnable[k] == true) 4318 TotalActiveWriteback = TotalActiveWriteback + 1; 4319 } 4320 4321 if (TotalActiveWriteback <= 1) { 4322 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4323 } else { 4324 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4325 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4326 } 4327 if (v->USRRetrainingRequiredFinal) 4328 v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark 4329 + mmSOCParameters.USRRetrainingLatency; 4330 4331 if (TotalActiveWriteback <= 1) { 4332 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4333 + mmSOCParameters.WritebackLatency; 4334 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4335 + mmSOCParameters.WritebackLatency; 4336 } else { 4337 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4338 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4339 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4340 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; 4341 } 4342 4343 if (v->USRRetrainingRequiredFinal) 4344 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4345 + mmSOCParameters.USRRetrainingLatency; 4346 4347 if (v->USRRetrainingRequiredFinal) 4348 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark 4349 + mmSOCParameters.USRRetrainingLatency; 4350 4351 #ifdef __DML_VBA_DEBUG__ 4352 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4353 __func__, v->Watermark.WritebackDRAMClockChangeWatermark); 4354 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); 4355 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); 4356 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); 4357 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4358 #endif 4359 4360 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4361 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + 4362 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); 4363 } 4364 4365 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4366 4367 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 4368 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 4369 4370 4371 #ifdef __DML_VBA_DEBUG__ 4372 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); 4373 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); 4374 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); 4375 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); 4376 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); 4377 #endif 4378 4379 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 4380 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 4381 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4382 4383 if (UnboundedRequestEnabled) { 4384 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4385 + CompressedBufferSizeInkByte * 1024 4386 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) 4387 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 4388 } 4389 4390 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4391 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4392 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 4393 4394 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4395 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; 4396 4397 if (v->NumberOfActiveSurfaces > 1) { 4398 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4399 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] 4400 / v->PixelClock[k] / v->VRatio[k]; 4401 } 4402 4403 if (BytePerPixelDETC[k] > 0) { 4404 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4405 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4406 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) 4407 / v->VRatioChroma[k]; 4408 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4409 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] 4410 / v->PixelClock[k]; 4411 if (v->NumberOfActiveSurfaces > 1) { 4412 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4413 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] 4414 / v->PixelClock[k] / v->VRatioChroma[k]; 4415 } 4416 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4417 ActiveClockChangeLatencyHidingC); 4418 } else { 4419 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4420 } 4421 4422 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4423 - v->Watermark.DRAMClockChangeWatermark; 4424 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4425 - v->Watermark.FCLKChangeWatermark; 4426 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; 4427 4428 if (v->WritebackEnable[k]) { 4429 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 4430 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4431 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 4432 if (v->WritebackPixelFormat[k] == dm_444_64) 4433 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4434 4435 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4436 - v->Watermark.WritebackDRAMClockChangeWatermark; 4437 4438 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4439 - v->Watermark.WritebackFCLKChangeWatermark; 4440 4441 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4442 WritebackFCLKChangeLatencyMargin); 4443 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4444 WritebackDRAMClockChangeLatencyMargin); 4445 } 4446 MaxActiveDRAMClockChangeLatencySupported[k] = 4447 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4448 0 : 4449 (ActiveDRAMClockChangeLatencyMargin[k] 4450 + mmSOCParameters.DRAMClockChangeLatency); 4451 } 4452 4453 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { 4454 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { 4455 if (i == j || 4456 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || 4457 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || 4458 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || 4459 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && 4460 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && 4461 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4462 (v->DRRDisplay[i] || v->DRRDisplay[j]))) { 4463 SynchronizedSurfaces[i][j] = true; 4464 } else { 4465 SynchronizedSurfaces[i][j] = false; 4466 } 4467 } 4468 } 4469 4470 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4471 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4472 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4473 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4474 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4475 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4476 SurfaceWithMinActiveFCLKChangeMargin = k; 4477 } 4478 } 4479 4480 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4481 4482 SameTimingForFCLKChange = true; 4483 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4484 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4485 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4486 (SameTimingForFCLKChange || 4487 ActiveFCLKChangeLatencyMargin[k] < 4488 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4489 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4490 } 4491 SameTimingForFCLKChange = false; 4492 } 4493 } 4494 4495 if (MinActiveFCLKChangeMargin > 0) { 4496 *FCLKChangeSupport = dm_fclock_change_vactive; 4497 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4498 (PrefetchMode <= 1)) { 4499 *FCLKChangeSupport = dm_fclock_change_vblank; 4500 } else { 4501 *FCLKChangeSupport = dm_fclock_change_unsupported; 4502 } 4503 4504 *USRRetrainingSupport = true; 4505 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4506 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4507 (USRRetrainingLatencyMargin[k] < 0)) { 4508 *USRRetrainingSupport = false; 4509 } 4510 } 4511 4512 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4513 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4514 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4515 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4516 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4517 if (PrefetchMode > 0) { 4518 DRAMClockChangeSupportNumber = 2; 4519 } else if (DRAMClockChangeSupportNumber == 0) { 4520 DRAMClockChangeSupportNumber = 1; 4521 LastSurfaceWithoutMargin = k; 4522 } else if (DRAMClockChangeSupportNumber == 1 && 4523 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4524 DRAMClockChangeSupportNumber = 2; 4525 } 4526 } 4527 } 4528 4529 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4530 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4531 DRAMClockChangeMethod = 1; 4532 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4533 DRAMClockChangeMethod = 2; 4534 } 4535 4536 if (DRAMClockChangeMethod == 0) { 4537 if (DRAMClockChangeSupportNumber == 0) 4538 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4539 else if (DRAMClockChangeSupportNumber == 1) 4540 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4541 else 4542 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4543 } else if (DRAMClockChangeMethod == 1) { 4544 if (DRAMClockChangeSupportNumber == 0) 4545 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4546 else if (DRAMClockChangeSupportNumber == 1) 4547 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4548 else 4549 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4550 } else { 4551 if (DRAMClockChangeSupportNumber == 0) 4552 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4553 else if (DRAMClockChangeSupportNumber == 1) 4554 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4555 else 4556 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4557 } 4558 4559 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4560 unsigned int dst_y_pstate; 4561 unsigned int src_y_pstate_l; 4562 unsigned int src_y_pstate_c; 4563 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4564 4565 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); 4566 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); 4567 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4568 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; 4569 4570 #ifdef __DML_VBA_DEBUG__ 4571 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4572 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4573 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4574 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4575 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4576 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4577 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4578 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4579 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); 4580 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4581 #endif 4582 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4583 4584 if (BytePerPixelDETC[k] > 0) { 4585 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); 4586 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4587 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; 4588 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4589 4590 #ifdef __DML_VBA_DEBUG__ 4591 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4592 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4593 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); 4594 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4595 #endif 4596 } 4597 } 4598 #ifdef __DML_VBA_DEBUG__ 4599 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4600 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4601 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4602 __func__, *MinActiveFCLKChangeLatencySupported); 4603 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4604 #endif 4605 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4606 4607 double dml32_CalculateWriteBackDISPCLK( 4608 enum source_format_class WritebackPixelFormat, 4609 double PixelClock, 4610 double WritebackHRatio, 4611 double WritebackVRatio, 4612 unsigned int WritebackHTaps, 4613 unsigned int WritebackVTaps, 4614 unsigned int WritebackSourceWidth, 4615 unsigned int WritebackDestinationWidth, 4616 unsigned int HTotal, 4617 unsigned int WritebackLineBufferSize, 4618 double DISPCLKDPPCLKVCOSpeed) 4619 { 4620 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4621 4622 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4623 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4624 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4625 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4626 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4627 } 4628 4629 void dml32_CalculateMinAndMaxPrefetchMode( 4630 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4631 unsigned int *MinPrefetchMode, 4632 unsigned int *MaxPrefetchMode) 4633 { 4634 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4635 *MinPrefetchMode = 3; 4636 *MaxPrefetchMode = 3; 4637 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4638 *MinPrefetchMode = 2; 4639 *MaxPrefetchMode = 2; 4640 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4641 *MinPrefetchMode = 1; 4642 *MaxPrefetchMode = 1; 4643 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4644 *MinPrefetchMode = 0; 4645 *MaxPrefetchMode = 0; 4646 } else { 4647 *MinPrefetchMode = 0; 4648 *MaxPrefetchMode = 3; 4649 } 4650 } // CalculateMinAndMaxPrefetchMode 4651 4652 void dml32_CalculatePixelDeliveryTimes( 4653 unsigned int NumberOfActiveSurfaces, 4654 double VRatio[], 4655 double VRatioChroma[], 4656 double VRatioPrefetchY[], 4657 double VRatioPrefetchC[], 4658 unsigned int swath_width_luma_ub[], 4659 unsigned int swath_width_chroma_ub[], 4660 unsigned int DPPPerSurface[], 4661 double HRatio[], 4662 double HRatioChroma[], 4663 double PixelClock[], 4664 double PSCL_THROUGHPUT[], 4665 double PSCL_THROUGHPUT_CHROMA[], 4666 double Dppclk[], 4667 unsigned int BytePerPixelC[], 4668 enum dm_rotation_angle SourceRotation[], 4669 unsigned int NumberOfCursors[], 4670 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4671 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4672 unsigned int BlockWidth256BytesY[], 4673 unsigned int BlockHeight256BytesY[], 4674 unsigned int BlockWidth256BytesC[], 4675 unsigned int BlockHeight256BytesC[], 4676 4677 /* Output */ 4678 double DisplayPipeLineDeliveryTimeLuma[], 4679 double DisplayPipeLineDeliveryTimeChroma[], 4680 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4681 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4682 double DisplayPipeRequestDeliveryTimeLuma[], 4683 double DisplayPipeRequestDeliveryTimeChroma[], 4684 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4685 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4686 double CursorRequestDeliveryTime[], 4687 double CursorRequestDeliveryTimePrefetch[]) 4688 { 4689 double req_per_swath_ub; 4690 unsigned int k; 4691 4692 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4693 4694 #ifdef __DML_VBA_DEBUG__ 4695 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4696 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4697 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4698 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4699 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4700 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4701 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4702 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4703 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4704 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4705 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4706 #endif 4707 4708 if (VRatio[k] <= 1) { 4709 DisplayPipeLineDeliveryTimeLuma[k] = 4710 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4711 } else { 4712 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4713 } 4714 4715 if (BytePerPixelC[k] == 0) { 4716 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4717 } else { 4718 if (VRatioChroma[k] <= 1) { 4719 DisplayPipeLineDeliveryTimeChroma[k] = 4720 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4721 } else { 4722 DisplayPipeLineDeliveryTimeChroma[k] = 4723 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4724 } 4725 } 4726 4727 if (VRatioPrefetchY[k] <= 1) { 4728 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4729 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4730 } else { 4731 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4732 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4733 } 4734 4735 if (BytePerPixelC[k] == 0) { 4736 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4737 } else { 4738 if (VRatioPrefetchC[k] <= 1) { 4739 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4740 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4741 } else { 4742 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4743 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4744 } 4745 } 4746 #ifdef __DML_VBA_DEBUG__ 4747 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4748 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4749 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4750 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4751 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4752 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4753 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4754 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4755 #endif 4756 } 4757 4758 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4759 if (!IsVertical(SourceRotation[k])) 4760 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4761 else 4762 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4763 #ifdef __DML_VBA_DEBUG__ 4764 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4765 #endif 4766 4767 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4768 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4769 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4770 if (BytePerPixelC[k] == 0) { 4771 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4772 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4773 } else { 4774 if (!IsVertical(SourceRotation[k])) 4775 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4776 else 4777 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4778 #ifdef __DML_VBA_DEBUG__ 4779 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4780 #endif 4781 DisplayPipeRequestDeliveryTimeChroma[k] = 4782 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4783 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4784 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4785 } 4786 #ifdef __DML_VBA_DEBUG__ 4787 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4788 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4789 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4790 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4791 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4792 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4793 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4794 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4795 #endif 4796 } 4797 4798 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4799 unsigned int cursor_req_per_width; 4800 4801 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4802 256.0 / 8.0, 1.0); 4803 if (NumberOfCursors[k] > 0) { 4804 if (VRatio[k] <= 1) { 4805 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4806 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4807 } else { 4808 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4809 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4810 } 4811 if (VRatioPrefetchY[k] <= 1) { 4812 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4813 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4814 } else { 4815 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4816 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4817 } 4818 } else { 4819 CursorRequestDeliveryTime[k] = 0; 4820 CursorRequestDeliveryTimePrefetch[k] = 0; 4821 } 4822 #ifdef __DML_VBA_DEBUG__ 4823 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4824 __func__, k, NumberOfCursors[k]); 4825 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4826 __func__, k, CursorRequestDeliveryTime[k]); 4827 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4828 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4829 #endif 4830 } 4831 } // CalculatePixelDeliveryTimes 4832 4833 void dml32_CalculateMetaAndPTETimes( 4834 bool use_one_row_for_frame[], 4835 unsigned int NumberOfActiveSurfaces, 4836 bool GPUVMEnable, 4837 unsigned int MetaChunkSize, 4838 unsigned int MinMetaChunkSizeBytes, 4839 unsigned int HTotal[], 4840 double VRatio[], 4841 double VRatioChroma[], 4842 double DestinationLinesToRequestRowInVBlank[], 4843 double DestinationLinesToRequestRowInImmediateFlip[], 4844 bool DCCEnable[], 4845 double PixelClock[], 4846 unsigned int BytePerPixelY[], 4847 unsigned int BytePerPixelC[], 4848 enum dm_rotation_angle SourceRotation[], 4849 unsigned int dpte_row_height[], 4850 unsigned int dpte_row_height_chroma[], 4851 unsigned int meta_row_width[], 4852 unsigned int meta_row_width_chroma[], 4853 unsigned int meta_row_height[], 4854 unsigned int meta_row_height_chroma[], 4855 unsigned int meta_req_width[], 4856 unsigned int meta_req_width_chroma[], 4857 unsigned int meta_req_height[], 4858 unsigned int meta_req_height_chroma[], 4859 unsigned int dpte_group_bytes[], 4860 unsigned int PTERequestSizeY[], 4861 unsigned int PTERequestSizeC[], 4862 unsigned int PixelPTEReqWidthY[], 4863 unsigned int PixelPTEReqHeightY[], 4864 unsigned int PixelPTEReqWidthC[], 4865 unsigned int PixelPTEReqHeightC[], 4866 unsigned int dpte_row_width_luma_ub[], 4867 unsigned int dpte_row_width_chroma_ub[], 4868 4869 /* Output */ 4870 double DST_Y_PER_PTE_ROW_NOM_L[], 4871 double DST_Y_PER_PTE_ROW_NOM_C[], 4872 double DST_Y_PER_META_ROW_NOM_L[], 4873 double DST_Y_PER_META_ROW_NOM_C[], 4874 double TimePerMetaChunkNominal[], 4875 double TimePerChromaMetaChunkNominal[], 4876 double TimePerMetaChunkVBlank[], 4877 double TimePerChromaMetaChunkVBlank[], 4878 double TimePerMetaChunkFlip[], 4879 double TimePerChromaMetaChunkFlip[], 4880 double time_per_pte_group_nom_luma[], 4881 double time_per_pte_group_vblank_luma[], 4882 double time_per_pte_group_flip_luma[], 4883 double time_per_pte_group_nom_chroma[], 4884 double time_per_pte_group_vblank_chroma[], 4885 double time_per_pte_group_flip_chroma[]) 4886 { 4887 unsigned int meta_chunk_width; 4888 unsigned int min_meta_chunk_width; 4889 unsigned int meta_chunk_per_row_int; 4890 unsigned int meta_row_remainder; 4891 unsigned int meta_chunk_threshold; 4892 unsigned int meta_chunks_per_row_ub; 4893 unsigned int meta_chunk_width_chroma; 4894 unsigned int min_meta_chunk_width_chroma; 4895 unsigned int meta_chunk_per_row_int_chroma; 4896 unsigned int meta_row_remainder_chroma; 4897 unsigned int meta_chunk_threshold_chroma; 4898 unsigned int meta_chunks_per_row_ub_chroma; 4899 unsigned int dpte_group_width_luma; 4900 unsigned int dpte_groups_per_row_luma_ub; 4901 unsigned int dpte_group_width_chroma; 4902 unsigned int dpte_groups_per_row_chroma_ub; 4903 unsigned int k; 4904 4905 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4906 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4907 if (BytePerPixelC[k] == 0) 4908 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4909 else 4910 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4911 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4912 if (BytePerPixelC[k] == 0) 4913 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4914 else 4915 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4916 } 4917 4918 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4919 if (DCCEnable[k] == true) { 4920 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4921 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4922 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4923 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4924 if (!IsVertical(SourceRotation[k])) 4925 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4926 else 4927 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4928 4929 if (meta_row_remainder <= meta_chunk_threshold) 4930 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4931 else 4932 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4933 4934 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4935 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4936 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4937 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4938 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4939 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4940 if (BytePerPixelC[k] == 0) { 4941 TimePerChromaMetaChunkNominal[k] = 0; 4942 TimePerChromaMetaChunkVBlank[k] = 0; 4943 TimePerChromaMetaChunkFlip[k] = 0; 4944 } else { 4945 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4946 meta_row_height_chroma[k]; 4947 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4948 meta_row_height_chroma[k]; 4949 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4950 meta_chunk_width_chroma; 4951 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4952 if (!IsVertical(SourceRotation[k])) { 4953 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4954 meta_req_width_chroma[k]; 4955 } else { 4956 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4957 meta_req_height_chroma[k]; 4958 } 4959 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4960 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4961 else 4962 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4963 4964 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4965 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4966 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4967 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4968 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4969 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4970 } 4971 } else { 4972 TimePerMetaChunkNominal[k] = 0; 4973 TimePerMetaChunkVBlank[k] = 0; 4974 TimePerMetaChunkFlip[k] = 0; 4975 TimePerChromaMetaChunkNominal[k] = 0; 4976 TimePerChromaMetaChunkVBlank[k] = 0; 4977 TimePerChromaMetaChunkFlip[k] = 0; 4978 } 4979 } 4980 4981 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4982 if (GPUVMEnable == true) { 4983 if (!IsVertical(SourceRotation[k])) { 4984 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4985 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 4986 } else { 4987 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4988 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 4989 } 4990 4991 if (use_one_row_for_frame[k]) { 4992 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 4993 (double) dpte_group_width_luma / 2.0, 1.0); 4994 } else { 4995 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 4996 (double) dpte_group_width_luma, 1.0); 4997 } 4998 #ifdef __DML_VBA_DEBUG__ 4999 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5000 __func__, k, use_one_row_for_frame[k]); 5001 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5002 __func__, k, dpte_group_bytes[k]); 5003 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5004 __func__, k, PTERequestSizeY[k]); 5005 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5006 __func__, k, PixelPTEReqWidthY[k]); 5007 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5008 __func__, k, PixelPTEReqHeightY[k]); 5009 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5010 __func__, k, dpte_row_width_luma_ub[k]); 5011 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5012 __func__, k, dpte_group_width_luma); 5013 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5014 __func__, k, dpte_groups_per_row_luma_ub); 5015 #endif 5016 5017 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5018 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5019 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5020 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5021 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5022 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5023 if (BytePerPixelC[k] == 0) { 5024 time_per_pte_group_nom_chroma[k] = 0; 5025 time_per_pte_group_vblank_chroma[k] = 0; 5026 time_per_pte_group_flip_chroma[k] = 0; 5027 } else { 5028 if (!IsVertical(SourceRotation[k])) { 5029 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5030 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5031 } else { 5032 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5033 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5034 } 5035 5036 if (use_one_row_for_frame[k]) { 5037 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5038 (double) dpte_group_width_chroma / 2.0, 1.0); 5039 } else { 5040 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5041 (double) dpte_group_width_chroma, 1.0); 5042 } 5043 #ifdef __DML_VBA_DEBUG__ 5044 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5045 __func__, k, dpte_row_width_chroma_ub[k]); 5046 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5047 __func__, k, dpte_group_width_chroma); 5048 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5049 __func__, k, dpte_groups_per_row_chroma_ub); 5050 #endif 5051 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5052 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5053 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5054 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5055 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5056 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5057 } 5058 } else { 5059 time_per_pte_group_nom_luma[k] = 0; 5060 time_per_pte_group_vblank_luma[k] = 0; 5061 time_per_pte_group_flip_luma[k] = 0; 5062 time_per_pte_group_nom_chroma[k] = 0; 5063 time_per_pte_group_vblank_chroma[k] = 0; 5064 time_per_pte_group_flip_chroma[k] = 0; 5065 } 5066 #ifdef __DML_VBA_DEBUG__ 5067 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5068 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5069 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5070 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5071 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5072 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5073 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5074 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5075 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5076 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5077 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5078 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5079 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5080 __func__, k, TimePerMetaChunkNominal[k]); 5081 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5082 __func__, k, TimePerMetaChunkVBlank[k]); 5083 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5084 __func__, k, TimePerMetaChunkFlip[k]); 5085 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5086 __func__, k, TimePerChromaMetaChunkNominal[k]); 5087 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5088 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5089 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5090 __func__, k, TimePerChromaMetaChunkFlip[k]); 5091 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5092 __func__, k, time_per_pte_group_nom_luma[k]); 5093 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5094 __func__, k, time_per_pte_group_vblank_luma[k]); 5095 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5096 __func__, k, time_per_pte_group_flip_luma[k]); 5097 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5098 __func__, k, time_per_pte_group_nom_chroma[k]); 5099 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5100 __func__, k, time_per_pte_group_vblank_chroma[k]); 5101 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5102 __func__, k, time_per_pte_group_flip_chroma[k]); 5103 #endif 5104 } 5105 } // CalculateMetaAndPTETimes 5106 5107 void dml32_CalculateVMGroupAndRequestTimes( 5108 unsigned int NumberOfActiveSurfaces, 5109 bool GPUVMEnable, 5110 unsigned int GPUVMMaxPageTableLevels, 5111 unsigned int HTotal[], 5112 unsigned int BytePerPixelC[], 5113 double DestinationLinesToRequestVMInVBlank[], 5114 double DestinationLinesToRequestVMInImmediateFlip[], 5115 bool DCCEnable[], 5116 double PixelClock[], 5117 unsigned int dpte_row_width_luma_ub[], 5118 unsigned int dpte_row_width_chroma_ub[], 5119 unsigned int vm_group_bytes[], 5120 unsigned int dpde0_bytes_per_frame_ub_l[], 5121 unsigned int dpde0_bytes_per_frame_ub_c[], 5122 unsigned int meta_pte_bytes_per_frame_ub_l[], 5123 unsigned int meta_pte_bytes_per_frame_ub_c[], 5124 5125 /* Output */ 5126 double TimePerVMGroupVBlank[], 5127 double TimePerVMGroupFlip[], 5128 double TimePerVMRequestVBlank[], 5129 double TimePerVMRequestFlip[]) 5130 { 5131 unsigned int k; 5132 unsigned int num_group_per_lower_vm_stage; 5133 unsigned int num_req_per_lower_vm_stage; 5134 5135 #ifdef __DML_VBA_DEBUG__ 5136 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5137 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5138 #endif 5139 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5140 5141 #ifdef __DML_VBA_DEBUG__ 5142 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5143 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5144 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5145 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5146 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5147 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5148 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5149 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5150 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5151 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5152 #endif 5153 5154 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5155 if (DCCEnable[k] == false) { 5156 if (BytePerPixelC[k] > 0) { 5157 num_group_per_lower_vm_stage = dml_ceil( 5158 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5159 (double) (vm_group_bytes[k]), 1.0) + 5160 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5161 (double) (vm_group_bytes[k]), 1.0); 5162 } else { 5163 num_group_per_lower_vm_stage = dml_ceil( 5164 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5165 (double) (vm_group_bytes[k]), 1.0); 5166 } 5167 } else { 5168 if (GPUVMMaxPageTableLevels == 1) { 5169 if (BytePerPixelC[k] > 0) { 5170 num_group_per_lower_vm_stage = dml_ceil( 5171 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5172 (double) (vm_group_bytes[k]), 1.0) + 5173 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5174 (double) (vm_group_bytes[k]), 1.0); 5175 } else { 5176 num_group_per_lower_vm_stage = dml_ceil( 5177 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5178 (double) (vm_group_bytes[k]), 1.0); 5179 } 5180 } else { 5181 if (BytePerPixelC[k] > 0) { 5182 num_group_per_lower_vm_stage = 2 + dml_ceil( 5183 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5184 (double) (vm_group_bytes[k]), 1) + 5185 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5186 (double) (vm_group_bytes[k]), 1) + 5187 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5188 (double) (vm_group_bytes[k]), 1) + 5189 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5190 (double) (vm_group_bytes[k]), 1); 5191 } else { 5192 num_group_per_lower_vm_stage = 1 + dml_ceil( 5193 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5194 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5195 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5196 (double) (vm_group_bytes[k]), 1); 5197 } 5198 } 5199 } 5200 5201 if (DCCEnable[k] == false) { 5202 if (BytePerPixelC[k] > 0) { 5203 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5204 dpde0_bytes_per_frame_ub_c[k] / 64; 5205 } else { 5206 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5207 } 5208 } else { 5209 if (GPUVMMaxPageTableLevels == 1) { 5210 if (BytePerPixelC[k] > 0) { 5211 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5212 meta_pte_bytes_per_frame_ub_c[k] / 64; 5213 } else { 5214 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5215 } 5216 } else { 5217 if (BytePerPixelC[k] > 0) { 5218 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5219 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5220 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5221 meta_pte_bytes_per_frame_ub_c[k] / 64; 5222 } else { 5223 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5224 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5225 } 5226 } 5227 } 5228 5229 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5230 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5231 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5232 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5233 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5234 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5235 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5236 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5237 5238 if (GPUVMMaxPageTableLevels > 2) { 5239 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5240 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5241 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5242 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5243 } 5244 5245 } else { 5246 TimePerVMGroupVBlank[k] = 0; 5247 TimePerVMGroupFlip[k] = 0; 5248 TimePerVMRequestVBlank[k] = 0; 5249 TimePerVMRequestFlip[k] = 0; 5250 } 5251 5252 #ifdef __DML_VBA_DEBUG__ 5253 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5254 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5255 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5256 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5257 #endif 5258 } 5259 } // CalculateVMGroupAndRequestTimes 5260 5261 void dml32_CalculateDCCConfiguration( 5262 bool DCCEnabled, 5263 bool DCCProgrammingAssumesScanDirectionUnknown, 5264 enum source_format_class SourcePixelFormat, 5265 unsigned int SurfaceWidthLuma, 5266 unsigned int SurfaceWidthChroma, 5267 unsigned int SurfaceHeightLuma, 5268 unsigned int SurfaceHeightChroma, 5269 unsigned int nomDETInKByte, 5270 unsigned int RequestHeight256ByteLuma, 5271 unsigned int RequestHeight256ByteChroma, 5272 enum dm_swizzle_mode TilingFormat, 5273 unsigned int BytePerPixelY, 5274 unsigned int BytePerPixelC, 5275 double BytePerPixelDETY, 5276 double BytePerPixelDETC, 5277 enum dm_rotation_angle SourceRotation, 5278 /* Output */ 5279 unsigned int *MaxUncompressedBlockLuma, 5280 unsigned int *MaxUncompressedBlockChroma, 5281 unsigned int *MaxCompressedBlockLuma, 5282 unsigned int *MaxCompressedBlockChroma, 5283 unsigned int *IndependentBlockLuma, 5284 unsigned int *IndependentBlockChroma) 5285 { 5286 typedef enum { 5287 REQ_256Bytes, 5288 REQ_128BytesNonContiguous, 5289 REQ_128BytesContiguous, 5290 REQ_NA 5291 } RequestType; 5292 5293 RequestType RequestLuma; 5294 RequestType RequestChroma; 5295 5296 unsigned int segment_order_horz_contiguous_luma; 5297 unsigned int segment_order_horz_contiguous_chroma; 5298 unsigned int segment_order_vert_contiguous_luma; 5299 unsigned int segment_order_vert_contiguous_chroma; 5300 unsigned int req128_horz_wc_l; 5301 unsigned int req128_horz_wc_c; 5302 unsigned int req128_vert_wc_l; 5303 unsigned int req128_vert_wc_c; 5304 unsigned int MAS_vp_horz_limit; 5305 unsigned int MAS_vp_vert_limit; 5306 unsigned int max_vp_horz_width; 5307 unsigned int max_vp_vert_height; 5308 unsigned int eff_surf_width_l; 5309 unsigned int eff_surf_width_c; 5310 unsigned int eff_surf_height_l; 5311 unsigned int eff_surf_height_c; 5312 unsigned int full_swath_bytes_horz_wc_l; 5313 unsigned int full_swath_bytes_horz_wc_c; 5314 unsigned int full_swath_bytes_vert_wc_l; 5315 unsigned int full_swath_bytes_vert_wc_c; 5316 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5317 5318 unsigned int yuv420; 5319 unsigned int horz_div_l; 5320 unsigned int horz_div_c; 5321 unsigned int vert_div_l; 5322 unsigned int vert_div_c; 5323 5324 unsigned int swath_buf_size; 5325 double detile_buf_vp_horz_limit; 5326 double detile_buf_vp_vert_limit; 5327 5328 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5329 SourcePixelFormat == dm_420_12) ? 1 : 0); 5330 horz_div_l = 1; 5331 horz_div_c = 1; 5332 vert_div_l = 1; 5333 vert_div_c = 1; 5334 5335 if (BytePerPixelY == 1) 5336 vert_div_l = 0; 5337 if (BytePerPixelC == 1) 5338 vert_div_c = 0; 5339 5340 if (BytePerPixelC == 0) { 5341 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5342 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5343 BytePerPixelY / (1 + horz_div_l)); 5344 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5345 (1 + vert_div_l)); 5346 } else { 5347 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5348 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5349 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5350 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5351 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5352 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5353 (1 + vert_div_c) / (1 + yuv420)); 5354 } 5355 5356 if (SourcePixelFormat == dm_420_10) { 5357 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5358 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5359 } 5360 5361 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5362 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5363 5364 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5365 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5366 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5367 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5368 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5369 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5370 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5371 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5372 5373 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5374 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5375 if (BytePerPixelC > 0) { 5376 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5377 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5378 } else { 5379 full_swath_bytes_horz_wc_c = 0; 5380 full_swath_bytes_vert_wc_c = 0; 5381 } 5382 5383 if (SourcePixelFormat == dm_420_10) { 5384 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5385 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5386 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5387 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5388 } 5389 5390 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5391 req128_horz_wc_l = 0; 5392 req128_horz_wc_c = 0; 5393 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5394 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5395 req128_horz_wc_l = 0; 5396 req128_horz_wc_c = 1; 5397 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5398 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5399 req128_horz_wc_l = 1; 5400 req128_horz_wc_c = 0; 5401 } else { 5402 req128_horz_wc_l = 1; 5403 req128_horz_wc_c = 1; 5404 } 5405 5406 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5407 req128_vert_wc_l = 0; 5408 req128_vert_wc_c = 0; 5409 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5410 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5411 req128_vert_wc_l = 0; 5412 req128_vert_wc_c = 1; 5413 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5414 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5415 req128_vert_wc_l = 1; 5416 req128_vert_wc_c = 0; 5417 } else { 5418 req128_vert_wc_l = 1; 5419 req128_vert_wc_c = 1; 5420 } 5421 5422 if (BytePerPixelY == 2) { 5423 segment_order_horz_contiguous_luma = 0; 5424 segment_order_vert_contiguous_luma = 1; 5425 } else { 5426 segment_order_horz_contiguous_luma = 1; 5427 segment_order_vert_contiguous_luma = 0; 5428 } 5429 5430 if (BytePerPixelC == 2) { 5431 segment_order_horz_contiguous_chroma = 0; 5432 segment_order_vert_contiguous_chroma = 1; 5433 } else { 5434 segment_order_horz_contiguous_chroma = 1; 5435 segment_order_vert_contiguous_chroma = 0; 5436 } 5437 #ifdef __DML_VBA_DEBUG__ 5438 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5439 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5440 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5441 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5442 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5443 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5444 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5445 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5446 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5447 __func__, segment_order_horz_contiguous_chroma); 5448 #endif 5449 5450 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5451 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5452 RequestLuma = REQ_256Bytes; 5453 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5454 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5455 RequestLuma = REQ_128BytesNonContiguous; 5456 else 5457 RequestLuma = REQ_128BytesContiguous; 5458 5459 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5460 RequestChroma = REQ_256Bytes; 5461 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5462 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5463 RequestChroma = REQ_128BytesNonContiguous; 5464 else 5465 RequestChroma = REQ_128BytesContiguous; 5466 5467 } else if (!IsVertical(SourceRotation)) { 5468 if (req128_horz_wc_l == 0) 5469 RequestLuma = REQ_256Bytes; 5470 else if (segment_order_horz_contiguous_luma == 0) 5471 RequestLuma = REQ_128BytesNonContiguous; 5472 else 5473 RequestLuma = REQ_128BytesContiguous; 5474 5475 if (req128_horz_wc_c == 0) 5476 RequestChroma = REQ_256Bytes; 5477 else if (segment_order_horz_contiguous_chroma == 0) 5478 RequestChroma = REQ_128BytesNonContiguous; 5479 else 5480 RequestChroma = REQ_128BytesContiguous; 5481 5482 } else { 5483 if (req128_vert_wc_l == 0) 5484 RequestLuma = REQ_256Bytes; 5485 else if (segment_order_vert_contiguous_luma == 0) 5486 RequestLuma = REQ_128BytesNonContiguous; 5487 else 5488 RequestLuma = REQ_128BytesContiguous; 5489 5490 if (req128_vert_wc_c == 0) 5491 RequestChroma = REQ_256Bytes; 5492 else if (segment_order_vert_contiguous_chroma == 0) 5493 RequestChroma = REQ_128BytesNonContiguous; 5494 else 5495 RequestChroma = REQ_128BytesContiguous; 5496 } 5497 5498 if (RequestLuma == REQ_256Bytes) { 5499 *MaxUncompressedBlockLuma = 256; 5500 *MaxCompressedBlockLuma = 256; 5501 *IndependentBlockLuma = 0; 5502 } else if (RequestLuma == REQ_128BytesContiguous) { 5503 *MaxUncompressedBlockLuma = 256; 5504 *MaxCompressedBlockLuma = 128; 5505 *IndependentBlockLuma = 128; 5506 } else { 5507 *MaxUncompressedBlockLuma = 256; 5508 *MaxCompressedBlockLuma = 64; 5509 *IndependentBlockLuma = 64; 5510 } 5511 5512 if (RequestChroma == REQ_256Bytes) { 5513 *MaxUncompressedBlockChroma = 256; 5514 *MaxCompressedBlockChroma = 256; 5515 *IndependentBlockChroma = 0; 5516 } else if (RequestChroma == REQ_128BytesContiguous) { 5517 *MaxUncompressedBlockChroma = 256; 5518 *MaxCompressedBlockChroma = 128; 5519 *IndependentBlockChroma = 128; 5520 } else { 5521 *MaxUncompressedBlockChroma = 256; 5522 *MaxCompressedBlockChroma = 64; 5523 *IndependentBlockChroma = 64; 5524 } 5525 5526 if (DCCEnabled != true || BytePerPixelC == 0) { 5527 *MaxUncompressedBlockChroma = 0; 5528 *MaxCompressedBlockChroma = 0; 5529 *IndependentBlockChroma = 0; 5530 } 5531 5532 if (DCCEnabled != true) { 5533 *MaxUncompressedBlockLuma = 0; 5534 *MaxCompressedBlockLuma = 0; 5535 *IndependentBlockLuma = 0; 5536 } 5537 5538 #ifdef __DML_VBA_DEBUG__ 5539 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5540 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5541 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5542 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5543 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5544 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5545 #endif 5546 5547 } // CalculateDCCConfiguration 5548 5549 void dml32_CalculateStutterEfficiency( 5550 unsigned int CompressedBufferSizeInkByte, 5551 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5552 bool UnboundedRequestEnabled, 5553 unsigned int MetaFIFOSizeInKEntries, 5554 unsigned int ZeroSizeBufferEntries, 5555 unsigned int PixelChunkSizeInKByte, 5556 unsigned int NumberOfActiveSurfaces, 5557 unsigned int ROBBufferSizeInKByte, 5558 double TotalDataReadBandwidth, 5559 double DCFCLK, 5560 double ReturnBW, 5561 unsigned int CompbufReservedSpace64B, 5562 unsigned int CompbufReservedSpaceZs, 5563 double SRExitTime, 5564 double SRExitZ8Time, 5565 bool SynchronizeTimingsFinal, 5566 unsigned int BlendingAndTiming[], 5567 double StutterEnterPlusExitWatermark, 5568 double Z8StutterEnterPlusExitWatermark, 5569 bool ProgressiveToInterlaceUnitInOPP, 5570 bool Interlace[], 5571 double MinTTUVBlank[], 5572 unsigned int DPPPerSurface[], 5573 unsigned int DETBufferSizeY[], 5574 unsigned int BytePerPixelY[], 5575 double BytePerPixelDETY[], 5576 double SwathWidthY[], 5577 unsigned int SwathHeightY[], 5578 unsigned int SwathHeightC[], 5579 double NetDCCRateLuma[], 5580 double NetDCCRateChroma[], 5581 double DCCFractionOfZeroSizeRequestsLuma[], 5582 double DCCFractionOfZeroSizeRequestsChroma[], 5583 unsigned int HTotal[], 5584 unsigned int VTotal[], 5585 double PixelClock[], 5586 double VRatio[], 5587 enum dm_rotation_angle SourceRotation[], 5588 unsigned int BlockHeight256BytesY[], 5589 unsigned int BlockWidth256BytesY[], 5590 unsigned int BlockHeight256BytesC[], 5591 unsigned int BlockWidth256BytesC[], 5592 unsigned int DCCYMaxUncompressedBlock[], 5593 unsigned int DCCCMaxUncompressedBlock[], 5594 unsigned int VActive[], 5595 bool DCCEnable[], 5596 bool WritebackEnable[], 5597 double ReadBandwidthSurfaceLuma[], 5598 double ReadBandwidthSurfaceChroma[], 5599 double meta_row_bw[], 5600 double dpte_row_bw[], 5601 5602 /* Output */ 5603 double *StutterEfficiencyNotIncludingVBlank, 5604 double *StutterEfficiency, 5605 unsigned int *NumberOfStutterBurstsPerFrame, 5606 double *Z8StutterEfficiencyNotIncludingVBlank, 5607 double *Z8StutterEfficiency, 5608 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5609 double *StutterPeriod, 5610 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5611 { 5612 5613 bool FoundCriticalSurface = false; 5614 unsigned int SwathSizeCriticalSurface = 0; 5615 unsigned int LastChunkOfSwathSize; 5616 unsigned int MissingPartOfLastSwathOfDETSize; 5617 double LastZ8StutterPeriod = 0.0; 5618 double LastStutterPeriod = 0.0; 5619 unsigned int TotalNumberOfActiveOTG = 0; 5620 double doublePixelClock; 5621 unsigned int doubleHTotal; 5622 unsigned int doubleVTotal; 5623 bool SameTiming = true; 5624 double DETBufferingTimeY; 5625 double SwathWidthYCriticalSurface = 0.0; 5626 double SwathHeightYCriticalSurface = 0.0; 5627 double VActiveTimeCriticalSurface = 0.0; 5628 double FrameTimeCriticalSurface = 0.0; 5629 unsigned int BytePerPixelYCriticalSurface = 0; 5630 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5631 unsigned int DETBufferSizeYCriticalSurface = 0; 5632 double MinTTUVBlankCriticalSurface = 0.0; 5633 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5634 bool doublePlaneCriticalSurface = 0; 5635 bool doublePipeCriticalSurface = 0; 5636 double TotalCompressedReadBandwidth; 5637 double TotalRowReadBandwidth; 5638 double AverageDCCCompressionRate; 5639 double EffectiveCompressedBufferSize; 5640 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5641 double StutterBurstTime; 5642 unsigned int TotalActiveWriteback; 5643 double LinesInDETY; 5644 double LinesInDETYRoundedDownToSwath; 5645 double MaximumEffectiveCompressionLuma; 5646 double MaximumEffectiveCompressionChroma; 5647 double TotalZeroSizeRequestReadBandwidth; 5648 double TotalZeroSizeCompressedReadBandwidth; 5649 double AverageDCCZeroSizeFraction; 5650 double AverageZeroSizeCompressionRate; 5651 unsigned int k; 5652 5653 TotalZeroSizeRequestReadBandwidth = 0; 5654 TotalZeroSizeCompressedReadBandwidth = 0; 5655 TotalRowReadBandwidth = 0; 5656 TotalCompressedReadBandwidth = 0; 5657 5658 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5659 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5660 if (DCCEnable[k] == true) { 5661 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5662 || (!IsVertical(SourceRotation[k]) 5663 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5664 || DCCYMaxUncompressedBlock[k] < 256) { 5665 MaximumEffectiveCompressionLuma = 2; 5666 } else { 5667 MaximumEffectiveCompressionLuma = 4; 5668 } 5669 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5670 + ReadBandwidthSurfaceLuma[k] 5671 / dml_min(NetDCCRateLuma[k], 5672 MaximumEffectiveCompressionLuma); 5673 #ifdef __DML_VBA_DEBUG__ 5674 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5675 __func__, k, ReadBandwidthSurfaceLuma[k]); 5676 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5677 __func__, k, NetDCCRateLuma[k]); 5678 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5679 __func__, k, MaximumEffectiveCompressionLuma); 5680 #endif 5681 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5682 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5683 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5684 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5685 / MaximumEffectiveCompressionLuma; 5686 5687 if (ReadBandwidthSurfaceChroma[k] > 0) { 5688 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5689 || (!IsVertical(SourceRotation[k]) 5690 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5691 || DCCCMaxUncompressedBlock[k] < 256) { 5692 MaximumEffectiveCompressionChroma = 2; 5693 } else { 5694 MaximumEffectiveCompressionChroma = 4; 5695 } 5696 TotalCompressedReadBandwidth = 5697 TotalCompressedReadBandwidth 5698 + ReadBandwidthSurfaceChroma[k] 5699 / dml_min(NetDCCRateChroma[k], 5700 MaximumEffectiveCompressionChroma); 5701 #ifdef __DML_VBA_DEBUG__ 5702 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5703 __func__, k, ReadBandwidthSurfaceChroma[k]); 5704 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5705 __func__, k, NetDCCRateChroma[k]); 5706 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5707 __func__, k, MaximumEffectiveCompressionChroma); 5708 #endif 5709 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5710 + ReadBandwidthSurfaceChroma[k] 5711 * DCCFractionOfZeroSizeRequestsChroma[k]; 5712 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5713 + ReadBandwidthSurfaceChroma[k] 5714 * DCCFractionOfZeroSizeRequestsChroma[k] 5715 / MaximumEffectiveCompressionChroma; 5716 } 5717 } else { 5718 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5719 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5720 } 5721 TotalRowReadBandwidth = TotalRowReadBandwidth 5722 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5723 } 5724 } 5725 5726 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5727 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5728 5729 #ifdef __DML_VBA_DEBUG__ 5730 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5731 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5732 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5733 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5734 __func__, TotalZeroSizeCompressedReadBandwidth); 5735 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5736 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5737 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5738 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5739 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5740 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5741 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5742 #endif 5743 if (AverageDCCZeroSizeFraction == 1) { 5744 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5745 / TotalZeroSizeCompressedReadBandwidth; 5746 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5747 * AverageZeroSizeCompressionRate 5748 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5749 * AverageZeroSizeCompressionRate; 5750 } else if (AverageDCCZeroSizeFraction > 0) { 5751 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5752 / TotalZeroSizeCompressedReadBandwidth; 5753 EffectiveCompressedBufferSize = dml_min( 5754 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5755 (double) MetaFIFOSizeInKEntries * 1024 * 64 5756 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5757 + 1 / AverageDCCCompressionRate)) 5758 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5759 * AverageDCCCompressionRate, 5760 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5761 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5762 5763 #ifdef __DML_VBA_DEBUG__ 5764 dml_print("DML::%s: min 1 = %f\n", __func__, 5765 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5766 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5767 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5768 AverageDCCCompressionRate)); 5769 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5770 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5771 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5772 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5773 #endif 5774 } else { 5775 EffectiveCompressedBufferSize = dml_min( 5776 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5777 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5778 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5779 * AverageDCCCompressionRate; 5780 5781 #ifdef __DML_VBA_DEBUG__ 5782 dml_print("DML::%s: min 1 = %f\n", __func__, 5783 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5784 dml_print("DML::%s: min 2 = %f\n", __func__, 5785 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5786 #endif 5787 } 5788 5789 #ifdef __DML_VBA_DEBUG__ 5790 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5791 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5792 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5793 #endif 5794 5795 *StutterPeriod = 0; 5796 5797 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5798 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5799 LinesInDETY = ((double) DETBufferSizeY[k] 5800 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5801 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5802 / BytePerPixelDETY[k] / SwathWidthY[k]; 5803 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5804 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5805 / VRatio[k]; 5806 #ifdef __DML_VBA_DEBUG__ 5807 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5808 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5809 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5810 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5811 __func__, k, ReadBandwidthSurfaceLuma[k]); 5812 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5813 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5814 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5815 __func__, k, LinesInDETYRoundedDownToSwath); 5816 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5817 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5818 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5819 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5820 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5821 #endif 5822 5823 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5824 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5825 5826 FoundCriticalSurface = true; 5827 *StutterPeriod = DETBufferingTimeY; 5828 FrameTimeCriticalSurface = ( 5829 isInterlaceTiming ? 5830 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5831 * (double) HTotal[k] / PixelClock[k]; 5832 VActiveTimeCriticalSurface = ( 5833 isInterlaceTiming ? 5834 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5835 * (double) HTotal[k] / PixelClock[k]; 5836 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5837 SwathWidthYCriticalSurface = SwathWidthY[k]; 5838 SwathHeightYCriticalSurface = SwathHeightY[k]; 5839 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5840 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5841 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5842 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5843 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5844 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5845 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5846 5847 #ifdef __DML_VBA_DEBUG__ 5848 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5849 __func__, k, FoundCriticalSurface); 5850 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5851 __func__, k, *StutterPeriod); 5852 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5853 __func__, k, MinTTUVBlankCriticalSurface); 5854 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5855 __func__, k, FrameTimeCriticalSurface); 5856 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5857 __func__, k, VActiveTimeCriticalSurface); 5858 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5859 __func__, k, BytePerPixelYCriticalSurface); 5860 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5861 __func__, k, SwathWidthYCriticalSurface); 5862 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5863 __func__, k, SwathHeightYCriticalSurface); 5864 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5865 __func__, k, BlockWidth256BytesYCriticalSurface); 5866 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5867 __func__, k, doublePlaneCriticalSurface); 5868 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5869 __func__, k, doublePipeCriticalSurface); 5870 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5871 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5872 #endif 5873 } 5874 } 5875 } 5876 5877 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5878 EffectiveCompressedBufferSize); 5879 #ifdef __DML_VBA_DEBUG__ 5880 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5881 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5882 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5883 __func__, *StutterPeriod * TotalDataReadBandwidth); 5884 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5885 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5886 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5887 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5888 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5889 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5890 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5891 #endif 5892 5893 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5894 / ReturnBW 5895 + (*StutterPeriod * TotalDataReadBandwidth 5896 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5897 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5898 #ifdef __DML_VBA_DEBUG__ 5899 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5900 AverageDCCCompressionRate / ReturnBW); 5901 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5902 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5903 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5904 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5905 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5906 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5907 #endif 5908 StutterBurstTime = dml_max(StutterBurstTime, 5909 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5910 * SwathWidthYCriticalSurface / ReturnBW); 5911 5912 #ifdef __DML_VBA_DEBUG__ 5913 dml_print("DML::%s: Time to finish residue swath=%f\n", 5914 __func__, 5915 LinesToFinishSwathTransferStutterCriticalSurface * 5916 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5917 #endif 5918 5919 TotalActiveWriteback = 0; 5920 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5921 if (WritebackEnable[k]) 5922 TotalActiveWriteback = TotalActiveWriteback + 1; 5923 } 5924 5925 if (TotalActiveWriteback == 0) { 5926 #ifdef __DML_VBA_DEBUG__ 5927 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5928 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5929 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5930 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5931 #endif 5932 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5933 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5934 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5935 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5936 *NumberOfStutterBurstsPerFrame = ( 5937 *StutterEfficiencyNotIncludingVBlank > 0 ? 5938 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5939 *Z8NumberOfStutterBurstsPerFrame = ( 5940 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5941 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5942 } else { 5943 *StutterEfficiencyNotIncludingVBlank = 0.; 5944 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5945 *NumberOfStutterBurstsPerFrame = 0; 5946 *Z8NumberOfStutterBurstsPerFrame = 0; 5947 } 5948 #ifdef __DML_VBA_DEBUG__ 5949 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5950 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5951 __func__, *StutterEfficiencyNotIncludingVBlank); 5952 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5953 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5954 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5955 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5956 #endif 5957 5958 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5959 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5960 if (BlendingAndTiming[k] == k) { 5961 if (TotalNumberOfActiveOTG == 0) { 5962 doublePixelClock = PixelClock[k]; 5963 doubleHTotal = HTotal[k]; 5964 doubleVTotal = VTotal[k]; 5965 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5966 || doubleVTotal != VTotal[k]) { 5967 SameTiming = false; 5968 } 5969 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5970 } 5971 } 5972 } 5973 5974 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5975 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5976 5977 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5978 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 5979 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 5980 + StutterBurstTime * VActiveTimeCriticalSurface 5981 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5982 } else { 5983 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 5984 } 5985 } else { 5986 *StutterEfficiency = 0; 5987 } 5988 5989 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 5990 LastZ8StutterPeriod = VActiveTimeCriticalSurface 5991 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5992 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 5993 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 5994 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 5995 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5996 } else { 5997 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 5998 } 5999 } else { 6000 *Z8StutterEfficiency = 0.; 6001 } 6002 6003 #ifdef __DML_VBA_DEBUG__ 6004 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6005 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6006 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6007 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6008 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6009 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6010 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6011 __func__, *StutterEfficiencyNotIncludingVBlank); 6012 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6013 #endif 6014 6015 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6016 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6017 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6018 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6019 - DETBufferSizeYCriticalSurface; 6020 6021 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6022 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6023 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6024 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6025 6026 #ifdef __DML_VBA_DEBUG__ 6027 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6028 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6029 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6030 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6031 #endif 6032 } // CalculateStutterEfficiency 6033 6034 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6035 unsigned int ConfigReturnBufferSizeInKByte, 6036 unsigned int ROBBufferSizeInKByte, 6037 unsigned int MaxNumDPP, 6038 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6039 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6040 6041 /* Output */ 6042 unsigned int *MaxTotalDETInKByte, 6043 unsigned int *nomDETInKByte, 6044 unsigned int *MinCompressedBufferSizeInKByte) 6045 { 6046 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6047 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6048 6049 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6050 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6051 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6052 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6053 6054 #ifdef __DML_VBA_DEBUG__ 6055 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6056 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6057 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6058 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6059 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6060 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6061 #endif 6062 6063 if (det_buff_size_override_en) { 6064 *nomDETInKByte = det_buff_size_override_val; 6065 #ifdef __DML_VBA_DEBUG__ 6066 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6067 #endif 6068 } 6069 } // CalculateMaxDETAndMinCompressedBufferSize 6070 6071 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6072 double ReturnBW, 6073 bool NotUrgentLatencyHiding[], 6074 double ReadBandwidthLuma[], 6075 double ReadBandwidthChroma[], 6076 double cursor_bw[], 6077 double meta_row_bandwidth[], 6078 double dpte_row_bandwidth[], 6079 unsigned int NumberOfDPP[], 6080 double UrgentBurstFactorLuma[], 6081 double UrgentBurstFactorChroma[], 6082 double UrgentBurstFactorCursor[]) 6083 { 6084 unsigned int k; 6085 bool NotEnoughUrgentLatencyHiding = false; 6086 bool CalculateVActiveBandwithSupport_val = false; 6087 double VActiveBandwith = 0; 6088 6089 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6090 if (NotUrgentLatencyHiding[k]) { 6091 NotEnoughUrgentLatencyHiding = true; 6092 } 6093 } 6094 6095 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6096 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6097 } 6098 6099 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6100 6101 #ifdef __DML_VBA_DEBUG__ 6102 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6103 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6104 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6105 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6106 #endif 6107 return CalculateVActiveBandwithSupport_val; 6108 } 6109 6110 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6111 double ReturnBW, 6112 bool NotUrgentLatencyHiding[], 6113 double ReadBandwidthLuma[], 6114 double ReadBandwidthChroma[], 6115 double PrefetchBandwidthLuma[], 6116 double PrefetchBandwidthChroma[], 6117 double cursor_bw[], 6118 double meta_row_bandwidth[], 6119 double dpte_row_bandwidth[], 6120 double cursor_bw_pre[], 6121 double prefetch_vmrow_bw[], 6122 unsigned int NumberOfDPP[], 6123 double UrgentBurstFactorLuma[], 6124 double UrgentBurstFactorChroma[], 6125 double UrgentBurstFactorCursor[], 6126 double UrgentBurstFactorLumaPre[], 6127 double UrgentBurstFactorChromaPre[], 6128 double UrgentBurstFactorCursorPre[], 6129 6130 /* output */ 6131 double *PrefetchBandwidth, 6132 double *FractionOfUrgentBandwidth, 6133 bool *PrefetchBandwidthSupport) 6134 { 6135 unsigned int k; 6136 bool NotEnoughUrgentLatencyHiding = false; 6137 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6138 if (NotUrgentLatencyHiding[k]) { 6139 NotEnoughUrgentLatencyHiding = true; 6140 } 6141 } 6142 6143 *PrefetchBandwidth = 0; 6144 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6145 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6146 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), 6147 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6148 } 6149 6150 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6151 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; 6152 } 6153 6154 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6155 double ReturnBW, 6156 double ReadBandwidthLuma[], 6157 double ReadBandwidthChroma[], 6158 double PrefetchBandwidthLuma[], 6159 double PrefetchBandwidthChroma[], 6160 double cursor_bw[], 6161 double cursor_bw_pre[], 6162 unsigned int NumberOfDPP[], 6163 double UrgentBurstFactorLuma[], 6164 double UrgentBurstFactorChroma[], 6165 double UrgentBurstFactorCursor[], 6166 double UrgentBurstFactorLumaPre[], 6167 double UrgentBurstFactorChromaPre[], 6168 double UrgentBurstFactorCursorPre[]) 6169 { 6170 unsigned int k; 6171 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6172 6173 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6174 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6175 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6176 } 6177 6178 return CalculateBandwidthAvailableForImmediateFlip_val; 6179 } 6180 6181 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6182 double ReturnBW, 6183 enum immediate_flip_requirement ImmediateFlipRequirement[], 6184 double final_flip_bw[], 6185 double ReadBandwidthLuma[], 6186 double ReadBandwidthChroma[], 6187 double PrefetchBandwidthLuma[], 6188 double PrefetchBandwidthChroma[], 6189 double cursor_bw[], 6190 double meta_row_bandwidth[], 6191 double dpte_row_bandwidth[], 6192 double cursor_bw_pre[], 6193 double prefetch_vmrow_bw[], 6194 unsigned int NumberOfDPP[], 6195 double UrgentBurstFactorLuma[], 6196 double UrgentBurstFactorChroma[], 6197 double UrgentBurstFactorCursor[], 6198 double UrgentBurstFactorLumaPre[], 6199 double UrgentBurstFactorChromaPre[], 6200 double UrgentBurstFactorCursorPre[], 6201 6202 /* output */ 6203 double *TotalBandwidth, 6204 double *FractionOfUrgentBandwidth, 6205 bool *ImmediateFlipBandwidthSupport) 6206 { 6207 unsigned int k; 6208 *TotalBandwidth = 0; 6209 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6210 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6211 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6212 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6213 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6214 } else { 6215 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6216 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6217 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6218 } 6219 } 6220 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6221 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6222 } 6223