1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dc.h" 27 #include "dc_link.h" 28 #include "../display_mode_lib.h" 29 #include "../dcn30/display_mode_vba_30.h" 30 #include "display_mode_vba_31.h" 31 #include "../dml_inline_defs.h" 32 33 /* 34 * NOTE: 35 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 36 * 37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 38 * ways. Unless there is something clearly wrong with it the code should 39 * remain as-is as it provides us with a guarantee from HW that it is correct. 40 */ 41 42 #define BPP_INVALID 0 43 #define BPP_BLENDED_PIPE 0xffffffff 44 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 45 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 46 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 47 #define DCN3_15_MAX_DET_SIZE 384 48 49 // For DML-C changes that hasn't been propagated to VBA yet 50 //#define __DML_VBA_ALLOW_DELTA__ 51 52 // Move these to ip paramaters/constant 53 54 // At which vstartup the DML start to try if the mode can be supported 55 #define __DML_VBA_MIN_VSTARTUP__ 9 56 57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 59 60 // fudge factor for min dcfclk calclation 61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 62 63 typedef struct { 64 double DPPCLK; 65 double DISPCLK; 66 double PixelClock; 67 double DCFCLKDeepSleep; 68 unsigned int DPPPerPlane; 69 bool ScalerEnabled; 70 double VRatio; 71 double VRatioChroma; 72 enum scan_direction_class SourceScan; 73 unsigned int BlockWidth256BytesY; 74 unsigned int BlockHeight256BytesY; 75 unsigned int BlockWidth256BytesC; 76 unsigned int BlockHeight256BytesC; 77 unsigned int InterlaceEnable; 78 unsigned int NumberOfCursors; 79 unsigned int VBlank; 80 unsigned int HTotal; 81 unsigned int DCCEnable; 82 bool ODMCombineIsEnabled; 83 enum source_format_class SourcePixelFormat; 84 int BytePerPixelY; 85 int BytePerPixelC; 86 bool ProgressiveToInterlaceUnitInOPP; 87 } Pipe; 88 89 #define BPP_INVALID 0 90 #define BPP_BLENDED_PIPE 0xffffffff 91 92 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 93 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 94 static unsigned int dscceComputeDelay( 95 unsigned int bpc, 96 double BPP, 97 unsigned int sliceWidth, 98 unsigned int numSlices, 99 enum output_format_class pixelFormat, 100 enum output_encoder_class Output); 101 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 102 static bool CalculatePrefetchSchedule( 103 struct display_mode_lib *mode_lib, 104 double HostVMInefficiencyFactor, 105 Pipe *myPipe, 106 unsigned int DSCDelay, 107 double DPPCLKDelaySubtotalPlusCNVCFormater, 108 double DPPCLKDelaySCL, 109 double DPPCLKDelaySCLLBOnly, 110 double DPPCLKDelayCNVCCursor, 111 double DISPCLKDelaySubtotal, 112 unsigned int DPP_RECOUT_WIDTH, 113 enum output_format_class OutputFormat, 114 unsigned int MaxInterDCNTileRepeaters, 115 unsigned int VStartup, 116 unsigned int MaxVStartup, 117 unsigned int GPUVMPageTableLevels, 118 bool GPUVMEnable, 119 bool HostVMEnable, 120 unsigned int HostVMMaxNonCachedPageTableLevels, 121 double HostVMMinPageSize, 122 bool DynamicMetadataEnable, 123 bool DynamicMetadataVMEnabled, 124 int DynamicMetadataLinesBeforeActiveRequired, 125 unsigned int DynamicMetadataTransmittedBytes, 126 double UrgentLatency, 127 double UrgentExtraLatency, 128 double TCalc, 129 unsigned int PDEAndMetaPTEBytesFrame, 130 unsigned int MetaRowByte, 131 unsigned int PixelPTEBytesPerRow, 132 double PrefetchSourceLinesY, 133 unsigned int SwathWidthY, 134 double VInitPreFillY, 135 unsigned int MaxNumSwathY, 136 double PrefetchSourceLinesC, 137 unsigned int SwathWidthC, 138 double VInitPreFillC, 139 unsigned int MaxNumSwathC, 140 int swath_width_luma_ub, 141 int swath_width_chroma_ub, 142 unsigned int SwathHeightY, 143 unsigned int SwathHeightC, 144 double TWait, 145 double *DSTXAfterScaler, 146 double *DSTYAfterScaler, 147 double *DestinationLinesForPrefetch, 148 double *PrefetchBandwidth, 149 double *DestinationLinesToRequestVMInVBlank, 150 double *DestinationLinesToRequestRowInVBlank, 151 double *VRatioPrefetchY, 152 double *VRatioPrefetchC, 153 double *RequiredPrefetchPixDataBWLuma, 154 double *RequiredPrefetchPixDataBWChroma, 155 bool *NotEnoughTimeForDynamicMetadata, 156 double *Tno_bw, 157 double *prefetch_vmrow_bw, 158 double *Tdmdl_vm, 159 double *Tdmdl, 160 double *TSetup, 161 int *VUpdateOffsetPix, 162 double *VUpdateWidthPix, 163 double *VReadyOffsetPix); 164 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 165 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 166 static void CalculateDCCConfiguration( 167 bool DCCEnabled, 168 bool DCCProgrammingAssumesScanDirectionUnknown, 169 enum source_format_class SourcePixelFormat, 170 unsigned int SurfaceWidthLuma, 171 unsigned int SurfaceWidthChroma, 172 unsigned int SurfaceHeightLuma, 173 unsigned int SurfaceHeightChroma, 174 double DETBufferSize, 175 unsigned int RequestHeight256ByteLuma, 176 unsigned int RequestHeight256ByteChroma, 177 enum dm_swizzle_mode TilingFormat, 178 unsigned int BytePerPixelY, 179 unsigned int BytePerPixelC, 180 double BytePerPixelDETY, 181 double BytePerPixelDETC, 182 enum scan_direction_class ScanOrientation, 183 unsigned int *MaxUncompressedBlockLuma, 184 unsigned int *MaxUncompressedBlockChroma, 185 unsigned int *MaxCompressedBlockLuma, 186 unsigned int *MaxCompressedBlockChroma, 187 unsigned int *IndependentBlockLuma, 188 unsigned int *IndependentBlockChroma); 189 static double CalculatePrefetchSourceLines( 190 struct display_mode_lib *mode_lib, 191 double VRatio, 192 double vtaps, 193 bool Interlace, 194 bool ProgressiveToInterlaceUnitInOPP, 195 unsigned int SwathHeight, 196 unsigned int ViewportYStart, 197 double *VInitPreFill, 198 unsigned int *MaxNumSwath); 199 static unsigned int CalculateVMAndRowBytes( 200 struct display_mode_lib *mode_lib, 201 bool DCCEnable, 202 unsigned int BlockHeight256Bytes, 203 unsigned int BlockWidth256Bytes, 204 enum source_format_class SourcePixelFormat, 205 unsigned int SurfaceTiling, 206 unsigned int BytePerPixel, 207 enum scan_direction_class ScanDirection, 208 unsigned int SwathWidth, 209 unsigned int ViewportHeight, 210 bool GPUVMEnable, 211 bool HostVMEnable, 212 unsigned int HostVMMaxNonCachedPageTableLevels, 213 unsigned int GPUVMMinPageSize, 214 unsigned int HostVMMinPageSize, 215 unsigned int PTEBufferSizeInRequests, 216 unsigned int Pitch, 217 unsigned int DCCMetaPitch, 218 unsigned int *MacroTileWidth, 219 unsigned int *MetaRowByte, 220 unsigned int *PixelPTEBytesPerRow, 221 bool *PTEBufferSizeNotExceeded, 222 int *dpte_row_width_ub, 223 unsigned int *dpte_row_height, 224 unsigned int *MetaRequestWidth, 225 unsigned int *MetaRequestHeight, 226 unsigned int *meta_row_width, 227 unsigned int *meta_row_height, 228 int *vm_group_bytes, 229 unsigned int *dpte_group_bytes, 230 unsigned int *PixelPTEReqWidth, 231 unsigned int *PixelPTEReqHeight, 232 unsigned int *PTERequestSize, 233 int *DPDE0BytesFrame, 234 int *MetaPTEBytesFrame); 235 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 236 static void CalculateRowBandwidth( 237 bool GPUVMEnable, 238 enum source_format_class SourcePixelFormat, 239 double VRatio, 240 double VRatioChroma, 241 bool DCCEnable, 242 double LineTime, 243 unsigned int MetaRowByteLuma, 244 unsigned int MetaRowByteChroma, 245 unsigned int meta_row_height_luma, 246 unsigned int meta_row_height_chroma, 247 unsigned int PixelPTEBytesPerRowLuma, 248 unsigned int PixelPTEBytesPerRowChroma, 249 unsigned int dpte_row_height_luma, 250 unsigned int dpte_row_height_chroma, 251 double *meta_row_bw, 252 double *dpte_row_bw); 253 254 static void CalculateFlipSchedule( 255 struct display_mode_lib *mode_lib, 256 unsigned int k, 257 double HostVMInefficiencyFactor, 258 double UrgentExtraLatency, 259 double UrgentLatency, 260 double PDEAndMetaPTEBytesPerFrame, 261 double MetaRowBytes, 262 double DPTEBytesPerRow); 263 static double CalculateWriteBackDelay( 264 enum source_format_class WritebackPixelFormat, 265 double WritebackHRatio, 266 double WritebackVRatio, 267 unsigned int WritebackVTaps, 268 int WritebackDestinationWidth, 269 int WritebackDestinationHeight, 270 int WritebackSourceHeight, 271 unsigned int HTotal); 272 273 static void CalculateVupdateAndDynamicMetadataParameters( 274 int MaxInterDCNTileRepeaters, 275 double DPPCLK, 276 double DISPCLK, 277 double DCFClkDeepSleep, 278 double PixelClock, 279 int HTotal, 280 int VBlank, 281 int DynamicMetadataTransmittedBytes, 282 int DynamicMetadataLinesBeforeActiveRequired, 283 int InterlaceEnable, 284 bool ProgressiveToInterlaceUnitInOPP, 285 double *TSetup, 286 double *Tdmbf, 287 double *Tdmec, 288 double *Tdmsks, 289 int *VUpdateOffsetPix, 290 double *VUpdateWidthPix, 291 double *VReadyOffsetPix); 292 293 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 294 struct display_mode_lib *mode_lib, 295 unsigned int PrefetchMode, 296 double DCFCLK, 297 double ReturnBW, 298 double UrgentLatency, 299 double ExtraLatency, 300 double SOCCLK, 301 double DCFCLKDeepSleep, 302 unsigned int DETBufferSizeY[], 303 unsigned int DETBufferSizeC[], 304 unsigned int SwathHeightY[], 305 unsigned int SwathHeightC[], 306 double SwathWidthY[], 307 double SwathWidthC[], 308 unsigned int DPPPerPlane[], 309 double BytePerPixelDETY[], 310 double BytePerPixelDETC[], 311 bool UnboundedRequestEnabled, 312 int unsigned CompressedBufferSizeInkByte, 313 enum clock_change_support *DRAMClockChangeSupport, 314 double *StutterExitWatermark, 315 double *StutterEnterPlusExitWatermark, 316 double *Z8StutterExitWatermark, 317 double *Z8StutterEnterPlusExitWatermark); 318 319 static void CalculateDCFCLKDeepSleep( 320 struct display_mode_lib *mode_lib, 321 unsigned int NumberOfActivePlanes, 322 int BytePerPixelY[], 323 int BytePerPixelC[], 324 double VRatio[], 325 double VRatioChroma[], 326 double SwathWidthY[], 327 double SwathWidthC[], 328 unsigned int DPPPerPlane[], 329 double HRatio[], 330 double HRatioChroma[], 331 double PixelClock[], 332 double PSCL_THROUGHPUT[], 333 double PSCL_THROUGHPUT_CHROMA[], 334 double DPPCLK[], 335 double ReadBandwidthLuma[], 336 double ReadBandwidthChroma[], 337 int ReturnBusWidth, 338 double *DCFCLKDeepSleep); 339 340 static void CalculateUrgentBurstFactor( 341 int swath_width_luma_ub, 342 int swath_width_chroma_ub, 343 unsigned int SwathHeightY, 344 unsigned int SwathHeightC, 345 double LineTime, 346 double UrgentLatency, 347 double CursorBufferSize, 348 unsigned int CursorWidth, 349 unsigned int CursorBPP, 350 double VRatio, 351 double VRatioC, 352 double BytePerPixelInDETY, 353 double BytePerPixelInDETC, 354 double DETBufferSizeY, 355 double DETBufferSizeC, 356 double *UrgentBurstFactorCursor, 357 double *UrgentBurstFactorLuma, 358 double *UrgentBurstFactorChroma, 359 bool *NotEnoughUrgentLatencyHiding); 360 361 static void UseMinimumDCFCLK( 362 struct display_mode_lib *mode_lib, 363 int MaxPrefetchMode, 364 int ReorderingBytes); 365 366 static void CalculatePixelDeliveryTimes( 367 unsigned int NumberOfActivePlanes, 368 double VRatio[], 369 double VRatioChroma[], 370 double VRatioPrefetchY[], 371 double VRatioPrefetchC[], 372 unsigned int swath_width_luma_ub[], 373 unsigned int swath_width_chroma_ub[], 374 unsigned int DPPPerPlane[], 375 double HRatio[], 376 double HRatioChroma[], 377 double PixelClock[], 378 double PSCL_THROUGHPUT[], 379 double PSCL_THROUGHPUT_CHROMA[], 380 double DPPCLK[], 381 int BytePerPixelC[], 382 enum scan_direction_class SourceScan[], 383 unsigned int NumberOfCursors[], 384 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 385 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 386 unsigned int BlockWidth256BytesY[], 387 unsigned int BlockHeight256BytesY[], 388 unsigned int BlockWidth256BytesC[], 389 unsigned int BlockHeight256BytesC[], 390 double DisplayPipeLineDeliveryTimeLuma[], 391 double DisplayPipeLineDeliveryTimeChroma[], 392 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 393 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 394 double DisplayPipeRequestDeliveryTimeLuma[], 395 double DisplayPipeRequestDeliveryTimeChroma[], 396 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 397 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 398 double CursorRequestDeliveryTime[], 399 double CursorRequestDeliveryTimePrefetch[]); 400 401 static void CalculateMetaAndPTETimes( 402 int NumberOfActivePlanes, 403 bool GPUVMEnable, 404 int MetaChunkSize, 405 int MinMetaChunkSizeBytes, 406 int HTotal[], 407 double VRatio[], 408 double VRatioChroma[], 409 double DestinationLinesToRequestRowInVBlank[], 410 double DestinationLinesToRequestRowInImmediateFlip[], 411 bool DCCEnable[], 412 double PixelClock[], 413 int BytePerPixelY[], 414 int BytePerPixelC[], 415 enum scan_direction_class SourceScan[], 416 int dpte_row_height[], 417 int dpte_row_height_chroma[], 418 int meta_row_width[], 419 int meta_row_width_chroma[], 420 int meta_row_height[], 421 int meta_row_height_chroma[], 422 int meta_req_width[], 423 int meta_req_width_chroma[], 424 int meta_req_height[], 425 int meta_req_height_chroma[], 426 int dpte_group_bytes[], 427 int PTERequestSizeY[], 428 int PTERequestSizeC[], 429 int PixelPTEReqWidthY[], 430 int PixelPTEReqHeightY[], 431 int PixelPTEReqWidthC[], 432 int PixelPTEReqHeightC[], 433 int dpte_row_width_luma_ub[], 434 int dpte_row_width_chroma_ub[], 435 double DST_Y_PER_PTE_ROW_NOM_L[], 436 double DST_Y_PER_PTE_ROW_NOM_C[], 437 double DST_Y_PER_META_ROW_NOM_L[], 438 double DST_Y_PER_META_ROW_NOM_C[], 439 double TimePerMetaChunkNominal[], 440 double TimePerChromaMetaChunkNominal[], 441 double TimePerMetaChunkVBlank[], 442 double TimePerChromaMetaChunkVBlank[], 443 double TimePerMetaChunkFlip[], 444 double TimePerChromaMetaChunkFlip[], 445 double time_per_pte_group_nom_luma[], 446 double time_per_pte_group_vblank_luma[], 447 double time_per_pte_group_flip_luma[], 448 double time_per_pte_group_nom_chroma[], 449 double time_per_pte_group_vblank_chroma[], 450 double time_per_pte_group_flip_chroma[]); 451 452 static void CalculateVMGroupAndRequestTimes( 453 unsigned int NumberOfActivePlanes, 454 bool GPUVMEnable, 455 unsigned int GPUVMMaxPageTableLevels, 456 unsigned int HTotal[], 457 int BytePerPixelC[], 458 double DestinationLinesToRequestVMInVBlank[], 459 double DestinationLinesToRequestVMInImmediateFlip[], 460 bool DCCEnable[], 461 double PixelClock[], 462 int dpte_row_width_luma_ub[], 463 int dpte_row_width_chroma_ub[], 464 int vm_group_bytes[], 465 unsigned int dpde0_bytes_per_frame_ub_l[], 466 unsigned int dpde0_bytes_per_frame_ub_c[], 467 int meta_pte_bytes_per_frame_ub_l[], 468 int meta_pte_bytes_per_frame_ub_c[], 469 double TimePerVMGroupVBlank[], 470 double TimePerVMGroupFlip[], 471 double TimePerVMRequestVBlank[], 472 double TimePerVMRequestFlip[]); 473 474 static void CalculateStutterEfficiency( 475 struct display_mode_lib *mode_lib, 476 int CompressedBufferSizeInkByte, 477 bool UnboundedRequestEnabled, 478 int ConfigReturnBufferSizeInKByte, 479 int MetaFIFOSizeInKEntries, 480 int ZeroSizeBufferEntries, 481 int NumberOfActivePlanes, 482 int ROBBufferSizeInKByte, 483 double TotalDataReadBandwidth, 484 double DCFCLK, 485 double ReturnBW, 486 double COMPBUF_RESERVED_SPACE_64B, 487 double COMPBUF_RESERVED_SPACE_ZS, 488 double SRExitTime, 489 double SRExitZ8Time, 490 bool SynchronizedVBlank, 491 double Z8StutterEnterPlusExitWatermark, 492 double StutterEnterPlusExitWatermark, 493 bool ProgressiveToInterlaceUnitInOPP, 494 bool Interlace[], 495 double MinTTUVBlank[], 496 int DPPPerPlane[], 497 unsigned int DETBufferSizeY[], 498 int BytePerPixelY[], 499 double BytePerPixelDETY[], 500 double SwathWidthY[], 501 int SwathHeightY[], 502 int SwathHeightC[], 503 double NetDCCRateLuma[], 504 double NetDCCRateChroma[], 505 double DCCFractionOfZeroSizeRequestsLuma[], 506 double DCCFractionOfZeroSizeRequestsChroma[], 507 int HTotal[], 508 int VTotal[], 509 double PixelClock[], 510 double VRatio[], 511 enum scan_direction_class SourceScan[], 512 int BlockHeight256BytesY[], 513 int BlockWidth256BytesY[], 514 int BlockHeight256BytesC[], 515 int BlockWidth256BytesC[], 516 int DCCYMaxUncompressedBlock[], 517 int DCCCMaxUncompressedBlock[], 518 int VActive[], 519 bool DCCEnable[], 520 bool WritebackEnable[], 521 double ReadBandwidthPlaneLuma[], 522 double ReadBandwidthPlaneChroma[], 523 double meta_row_bw[], 524 double dpte_row_bw[], 525 double *StutterEfficiencyNotIncludingVBlank, 526 double *StutterEfficiency, 527 int *NumberOfStutterBurstsPerFrame, 528 double *Z8StutterEfficiencyNotIncludingVBlank, 529 double *Z8StutterEfficiency, 530 int *Z8NumberOfStutterBurstsPerFrame, 531 double *StutterPeriod); 532 533 static void CalculateSwathAndDETConfiguration( 534 bool ForceSingleDPP, 535 int NumberOfActivePlanes, 536 unsigned int DETBufferSizeInKByte, 537 double MaximumSwathWidthLuma[], 538 double MaximumSwathWidthChroma[], 539 enum scan_direction_class SourceScan[], 540 enum source_format_class SourcePixelFormat[], 541 enum dm_swizzle_mode SurfaceTiling[], 542 int ViewportWidth[], 543 int ViewportHeight[], 544 int SurfaceWidthY[], 545 int SurfaceWidthC[], 546 int SurfaceHeightY[], 547 int SurfaceHeightC[], 548 int Read256BytesBlockHeightY[], 549 int Read256BytesBlockHeightC[], 550 int Read256BytesBlockWidthY[], 551 int Read256BytesBlockWidthC[], 552 enum odm_combine_mode ODMCombineEnabled[], 553 int BlendingAndTiming[], 554 int BytePerPixY[], 555 int BytePerPixC[], 556 double BytePerPixDETY[], 557 double BytePerPixDETC[], 558 int HActive[], 559 double HRatio[], 560 double HRatioChroma[], 561 int DPPPerPlane[], 562 int swath_width_luma_ub[], 563 int swath_width_chroma_ub[], 564 double SwathWidth[], 565 double SwathWidthChroma[], 566 int SwathHeightY[], 567 int SwathHeightC[], 568 unsigned int DETBufferSizeY[], 569 unsigned int DETBufferSizeC[], 570 bool ViewportSizeSupportPerPlane[], 571 bool *ViewportSizeSupport); 572 static void CalculateSwathWidth( 573 bool ForceSingleDPP, 574 int NumberOfActivePlanes, 575 enum source_format_class SourcePixelFormat[], 576 enum scan_direction_class SourceScan[], 577 int ViewportWidth[], 578 int ViewportHeight[], 579 int SurfaceWidthY[], 580 int SurfaceWidthC[], 581 int SurfaceHeightY[], 582 int SurfaceHeightC[], 583 enum odm_combine_mode ODMCombineEnabled[], 584 int BytePerPixY[], 585 int BytePerPixC[], 586 int Read256BytesBlockHeightY[], 587 int Read256BytesBlockHeightC[], 588 int Read256BytesBlockWidthY[], 589 int Read256BytesBlockWidthC[], 590 int BlendingAndTiming[], 591 int HActive[], 592 double HRatio[], 593 int DPPPerPlane[], 594 double SwathWidthSingleDPPY[], 595 double SwathWidthSingleDPPC[], 596 double SwathWidthY[], 597 double SwathWidthC[], 598 int MaximumSwathHeightY[], 599 int MaximumSwathHeightC[], 600 int swath_width_luma_ub[], 601 int swath_width_chroma_ub[]); 602 603 static double CalculateExtraLatency( 604 int RoundTripPingLatencyCycles, 605 int ReorderingBytes, 606 double DCFCLK, 607 int TotalNumberOfActiveDPP, 608 int PixelChunkSizeInKByte, 609 int TotalNumberOfDCCActiveDPP, 610 int MetaChunkSize, 611 double ReturnBW, 612 bool GPUVMEnable, 613 bool HostVMEnable, 614 int NumberOfActivePlanes, 615 int NumberOfDPP[], 616 int dpte_group_bytes[], 617 double HostVMInefficiencyFactor, 618 double HostVMMinPageSize, 619 int HostVMMaxNonCachedPageTableLevels); 620 621 static double CalculateExtraLatencyBytes( 622 int ReorderingBytes, 623 int TotalNumberOfActiveDPP, 624 int PixelChunkSizeInKByte, 625 int TotalNumberOfDCCActiveDPP, 626 int MetaChunkSize, 627 bool GPUVMEnable, 628 bool HostVMEnable, 629 int NumberOfActivePlanes, 630 int NumberOfDPP[], 631 int dpte_group_bytes[], 632 double HostVMInefficiencyFactor, 633 double HostVMMinPageSize, 634 int HostVMMaxNonCachedPageTableLevels); 635 636 static double CalculateUrgentLatency( 637 double UrgentLatencyPixelDataOnly, 638 double UrgentLatencyPixelMixedWithVMData, 639 double UrgentLatencyVMDataOnly, 640 bool DoUrgentLatencyAdjustment, 641 double UrgentLatencyAdjustmentFabricClockComponent, 642 double UrgentLatencyAdjustmentFabricClockReference, 643 double FabricClockSingle); 644 645 static void CalculateUnboundedRequestAndCompressedBufferSize( 646 unsigned int DETBufferSizeInKByte, 647 int ConfigReturnBufferSizeInKByte, 648 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 649 int TotalActiveDPP, 650 bool NoChromaPlanes, 651 int MaxNumDPP, 652 int CompressedBufferSegmentSizeInkByteFinal, 653 enum output_encoder_class *Output, 654 bool *UnboundedRequestEnabled, 655 int *CompressedBufferSizeInkByte); 656 657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 658 659 void dml31_recalculate(struct display_mode_lib *mode_lib) 660 { 661 ModeSupportAndSystemConfiguration(mode_lib); 662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 663 DisplayPipeConfiguration(mode_lib); 664 #ifdef __DML_VBA_DEBUG__ 665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 666 #endif 667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 668 } 669 670 static unsigned int dscceComputeDelay( 671 unsigned int bpc, 672 double BPP, 673 unsigned int sliceWidth, 674 unsigned int numSlices, 675 enum output_format_class pixelFormat, 676 enum output_encoder_class Output) 677 { 678 // valid bpc = source bits per component in the set of {8, 10, 12} 679 // valid bpp = increments of 1/16 of a bit 680 // min = 6/7/8 in N420/N422/444, respectively 681 // max = such that compression is 1:1 682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 685 686 // fixed value 687 unsigned int rcModelSize = 8192; 688 689 // N422/N420 operate at 2 pixels per clock 690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 691 692 if (pixelFormat == dm_420) 693 pixelsPerClock = 2; 694 else if (pixelFormat == dm_444) 695 pixelsPerClock = 1; 696 else if (pixelFormat == dm_n422) 697 pixelsPerClock = 2; 698 // #all other modes operate at 1 pixel per clock 699 else 700 pixelsPerClock = 1; 701 702 //initial transmit delay as per PPS 703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 704 705 //compute ssm delay 706 if (bpc == 8) 707 D = 81; 708 else if (bpc == 10) 709 D = 89; 710 else 711 D = 113; 712 713 //divide by pixel per cycle to compute slice width as seen by DSC 714 w = sliceWidth / pixelsPerClock; 715 716 //422 mode has an additional cycle of delay 717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 718 s = 0; 719 else 720 s = 1; 721 722 //main calculation for the dscce 723 ix = initalXmitDelay + 45; 724 wx = (w + 2) / 3; 725 P = 3 * wx - w; 726 l0 = ix / w; 727 a = ix + P * l0; 728 ax = (a + 2) / 3 + D + 6 + 1; 729 L = (ax + wx - 1) / wx; 730 if ((ix % w) == 0 && P != 0) 731 lstall = 1; 732 else 733 lstall = 0; 734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 735 736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 737 pixels = Delay * 3 * pixelsPerClock; 738 return pixels; 739 } 740 741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 742 { 743 unsigned int Delay = 0; 744 745 if (pixelFormat == dm_420) { 746 // sfr 747 Delay = Delay + 2; 748 // dsccif 749 Delay = Delay + 0; 750 // dscc - input deserializer 751 Delay = Delay + 3; 752 // dscc gets pixels every other cycle 753 Delay = Delay + 2; 754 // dscc - input cdc fifo 755 Delay = Delay + 12; 756 // dscc gets pixels every other cycle 757 Delay = Delay + 13; 758 // dscc - cdc uncertainty 759 Delay = Delay + 2; 760 // dscc - output cdc fifo 761 Delay = Delay + 7; 762 // dscc gets pixels every other cycle 763 Delay = Delay + 3; 764 // dscc - cdc uncertainty 765 Delay = Delay + 2; 766 // dscc - output serializer 767 Delay = Delay + 1; 768 // sft 769 Delay = Delay + 1; 770 } else if (pixelFormat == dm_n422) { 771 // sfr 772 Delay = Delay + 2; 773 // dsccif 774 Delay = Delay + 1; 775 // dscc - input deserializer 776 Delay = Delay + 5; 777 // dscc - input cdc fifo 778 Delay = Delay + 25; 779 // dscc - cdc uncertainty 780 Delay = Delay + 2; 781 // dscc - output cdc fifo 782 Delay = Delay + 10; 783 // dscc - cdc uncertainty 784 Delay = Delay + 2; 785 // dscc - output serializer 786 Delay = Delay + 1; 787 // sft 788 Delay = Delay + 1; 789 } else { 790 // sfr 791 Delay = Delay + 2; 792 // dsccif 793 Delay = Delay + 0; 794 // dscc - input deserializer 795 Delay = Delay + 3; 796 // dscc - input cdc fifo 797 Delay = Delay + 12; 798 // dscc - cdc uncertainty 799 Delay = Delay + 2; 800 // dscc - output cdc fifo 801 Delay = Delay + 7; 802 // dscc - output serializer 803 Delay = Delay + 1; 804 // dscc - cdc uncertainty 805 Delay = Delay + 2; 806 // sft 807 Delay = Delay + 1; 808 } 809 810 return Delay; 811 } 812 813 static bool CalculatePrefetchSchedule( 814 struct display_mode_lib *mode_lib, 815 double HostVMInefficiencyFactor, 816 Pipe *myPipe, 817 unsigned int DSCDelay, 818 double DPPCLKDelaySubtotalPlusCNVCFormater, 819 double DPPCLKDelaySCL, 820 double DPPCLKDelaySCLLBOnly, 821 double DPPCLKDelayCNVCCursor, 822 double DISPCLKDelaySubtotal, 823 unsigned int DPP_RECOUT_WIDTH, 824 enum output_format_class OutputFormat, 825 unsigned int MaxInterDCNTileRepeaters, 826 unsigned int VStartup, 827 unsigned int MaxVStartup, 828 unsigned int GPUVMPageTableLevels, 829 bool GPUVMEnable, 830 bool HostVMEnable, 831 unsigned int HostVMMaxNonCachedPageTableLevels, 832 double HostVMMinPageSize, 833 bool DynamicMetadataEnable, 834 bool DynamicMetadataVMEnabled, 835 int DynamicMetadataLinesBeforeActiveRequired, 836 unsigned int DynamicMetadataTransmittedBytes, 837 double UrgentLatency, 838 double UrgentExtraLatency, 839 double TCalc, 840 unsigned int PDEAndMetaPTEBytesFrame, 841 unsigned int MetaRowByte, 842 unsigned int PixelPTEBytesPerRow, 843 double PrefetchSourceLinesY, 844 unsigned int SwathWidthY, 845 double VInitPreFillY, 846 unsigned int MaxNumSwathY, 847 double PrefetchSourceLinesC, 848 unsigned int SwathWidthC, 849 double VInitPreFillC, 850 unsigned int MaxNumSwathC, 851 int swath_width_luma_ub, 852 int swath_width_chroma_ub, 853 unsigned int SwathHeightY, 854 unsigned int SwathHeightC, 855 double TWait, 856 double *DSTXAfterScaler, 857 double *DSTYAfterScaler, 858 double *DestinationLinesForPrefetch, 859 double *PrefetchBandwidth, 860 double *DestinationLinesToRequestVMInVBlank, 861 double *DestinationLinesToRequestRowInVBlank, 862 double *VRatioPrefetchY, 863 double *VRatioPrefetchC, 864 double *RequiredPrefetchPixDataBWLuma, 865 double *RequiredPrefetchPixDataBWChroma, 866 bool *NotEnoughTimeForDynamicMetadata, 867 double *Tno_bw, 868 double *prefetch_vmrow_bw, 869 double *Tdmdl_vm, 870 double *Tdmdl, 871 double *TSetup, 872 int *VUpdateOffsetPix, 873 double *VUpdateWidthPix, 874 double *VReadyOffsetPix) 875 { 876 bool MyError = false; 877 unsigned int DPPCycles, DISPCLKCycles; 878 double DSTTotalPixelsAfterScaler; 879 double LineTime; 880 double dst_y_prefetch_equ; 881 double Tsw_oto; 882 double prefetch_bw_oto; 883 double prefetch_bw_pr; 884 double Tvm_oto; 885 double Tr0_oto; 886 double Tvm_oto_lines; 887 double Tr0_oto_lines; 888 double dst_y_prefetch_oto; 889 double TimeForFetchingMetaPTE = 0; 890 double TimeForFetchingRowInVBlank = 0; 891 double LinesToRequestPrefetchPixelData = 0; 892 unsigned int HostVMDynamicLevelsTrips; 893 double trip_to_mem; 894 double Tvm_trips; 895 double Tr0_trips; 896 double Tvm_trips_rounded; 897 double Tr0_trips_rounded; 898 double Lsw_oto; 899 double Tpre_rounded; 900 double prefetch_bw_equ; 901 double Tvm_equ; 902 double Tr0_equ; 903 double Tdmbf; 904 double Tdmec; 905 double Tdmsks; 906 double prefetch_sw_bytes; 907 double bytes_pp; 908 double dep_bytes; 909 int max_vratio_pre = 4; 910 double min_Lsw; 911 double Tsw_est1 = 0; 912 double Tsw_est3 = 0; 913 double max_Tsw = 0; 914 915 if (GPUVMEnable == true && HostVMEnable == true) { 916 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 917 } else { 918 HostVMDynamicLevelsTrips = 0; 919 } 920 #ifdef __DML_VBA_DEBUG__ 921 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 922 #endif 923 CalculateVupdateAndDynamicMetadataParameters( 924 MaxInterDCNTileRepeaters, 925 myPipe->DPPCLK, 926 myPipe->DISPCLK, 927 myPipe->DCFCLKDeepSleep, 928 myPipe->PixelClock, 929 myPipe->HTotal, 930 myPipe->VBlank, 931 DynamicMetadataTransmittedBytes, 932 DynamicMetadataLinesBeforeActiveRequired, 933 myPipe->InterlaceEnable, 934 myPipe->ProgressiveToInterlaceUnitInOPP, 935 TSetup, 936 &Tdmbf, 937 &Tdmec, 938 &Tdmsks, 939 VUpdateOffsetPix, 940 VUpdateWidthPix, 941 VReadyOffsetPix); 942 943 LineTime = myPipe->HTotal / myPipe->PixelClock; 944 trip_to_mem = UrgentLatency; 945 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 946 947 #ifdef __DML_VBA_ALLOW_DELTA__ 948 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 949 #else 950 if (DynamicMetadataVMEnabled == true) { 951 #endif 952 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 953 } else { 954 *Tdmdl = TWait + UrgentExtraLatency; 955 } 956 957 #ifdef __DML_VBA_ALLOW_DELTA__ 958 if (DynamicMetadataEnable == false) { 959 *Tdmdl = 0.0; 960 } 961 #endif 962 963 if (DynamicMetadataEnable == true) { 964 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 965 *NotEnoughTimeForDynamicMetadata = true; 966 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 967 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 968 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 969 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 970 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); 971 } else { 972 *NotEnoughTimeForDynamicMetadata = false; 973 } 974 } else { 975 *NotEnoughTimeForDynamicMetadata = false; 976 } 977 978 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 979 980 if (myPipe->ScalerEnabled) 981 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 982 else 983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 984 985 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 986 987 DISPCLKCycles = DISPCLKDelaySubtotal; 988 989 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 990 return true; 991 992 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 993 994 #ifdef __DML_VBA_DEBUG__ 995 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 996 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 997 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 998 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 999 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1000 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1001 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1002 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1003 #endif 1004 1005 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1006 1007 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1008 *DSTYAfterScaler = 1; 1009 else 1010 *DSTYAfterScaler = 0; 1011 1012 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1013 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1014 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1015 1016 #ifdef __DML_VBA_DEBUG__ 1017 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1018 #endif 1019 1020 MyError = false; 1021 1022 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1023 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1024 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1025 1026 #ifdef __DML_VBA_ALLOW_DELTA__ 1027 if (!myPipe->DCCEnable) { 1028 Tr0_trips = 0.0; 1029 Tr0_trips_rounded = 0.0; 1030 } 1031 #endif 1032 1033 if (!GPUVMEnable) { 1034 Tvm_trips = 0.0; 1035 Tvm_trips_rounded = 0.0; 1036 } 1037 1038 if (GPUVMEnable) { 1039 if (GPUVMPageTableLevels >= 3) { 1040 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1041 } else { 1042 *Tno_bw = 0; 1043 } 1044 } else if (!myPipe->DCCEnable) { 1045 *Tno_bw = LineTime; 1046 } else { 1047 *Tno_bw = LineTime / 4; 1048 } 1049 1050 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1051 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1052 else 1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1054 /*rev 99*/ 1055 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); 1056 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1057 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1058 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 1059 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1060 1061 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1062 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1063 Tsw_oto = Lsw_oto * LineTime; 1064 1065 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; 1066 1067 #ifdef __DML_VBA_DEBUG__ 1068 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1069 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1070 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1071 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1072 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1073 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1074 #endif 1075 1076 if (GPUVMEnable == true) 1077 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1078 else 1079 Tvm_oto = LineTime / 4.0; 1080 1081 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1082 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1083 LineTime - Tvm_oto, 1084 LineTime / 4); 1085 } else { 1086 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1087 } 1088 1089 #ifdef __DML_VBA_DEBUG__ 1090 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1091 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1092 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1093 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1094 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1095 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1096 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1097 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1098 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1099 #endif 1100 1101 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1102 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1103 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1104 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1105 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1106 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1107 1108 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1109 1110 if (prefetch_sw_bytes < dep_bytes) 1111 prefetch_sw_bytes = 2 * dep_bytes; 1112 1113 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1114 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1115 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1116 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1117 dml_print("DML: LineTime: %f\n", LineTime); 1118 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1119 1120 dml_print("DML: LineTime: %f\n", LineTime); 1121 dml_print("DML: VStartup: %d\n", VStartup); 1122 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1123 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1124 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1125 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1126 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1127 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1128 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1129 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 1130 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 1131 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 1132 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); 1133 1134 *PrefetchBandwidth = 0; 1135 *DestinationLinesToRequestVMInVBlank = 0; 1136 *DestinationLinesToRequestRowInVBlank = 0; 1137 *VRatioPrefetchY = 0; 1138 *VRatioPrefetchC = 0; 1139 *RequiredPrefetchPixDataBWLuma = 0; 1140 if (dst_y_prefetch_equ > 1) { 1141 double PrefetchBandwidth1; 1142 double PrefetchBandwidth2; 1143 double PrefetchBandwidth3; 1144 double PrefetchBandwidth4; 1145 1146 if (Tpre_rounded - *Tno_bw > 0) { 1147 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1148 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1149 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1150 } else { 1151 PrefetchBandwidth1 = 0; 1152 } 1153 1154 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1155 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1156 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1157 } 1158 1159 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1160 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1161 else 1162 PrefetchBandwidth2 = 0; 1163 1164 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1165 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1166 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1167 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1168 } else { 1169 PrefetchBandwidth3 = 0; 1170 } 1171 1172 #ifdef __DML_VBA_DEBUG__ 1173 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1174 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1175 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1176 #endif 1177 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1178 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1179 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1180 } 1181 1182 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1183 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1184 else 1185 PrefetchBandwidth4 = 0; 1186 1187 { 1188 bool Case1OK; 1189 bool Case2OK; 1190 bool Case3OK; 1191 1192 if (PrefetchBandwidth1 > 0) { 1193 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1194 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1195 Case1OK = true; 1196 } else { 1197 Case1OK = false; 1198 } 1199 } else { 1200 Case1OK = false; 1201 } 1202 1203 if (PrefetchBandwidth2 > 0) { 1204 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1205 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1206 Case2OK = true; 1207 } else { 1208 Case2OK = false; 1209 } 1210 } else { 1211 Case2OK = false; 1212 } 1213 1214 if (PrefetchBandwidth3 > 0) { 1215 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1216 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1217 Case3OK = true; 1218 } else { 1219 Case3OK = false; 1220 } 1221 } else { 1222 Case3OK = false; 1223 } 1224 1225 if (Case1OK) { 1226 prefetch_bw_equ = PrefetchBandwidth1; 1227 } else if (Case2OK) { 1228 prefetch_bw_equ = PrefetchBandwidth2; 1229 } else if (Case3OK) { 1230 prefetch_bw_equ = PrefetchBandwidth3; 1231 } else { 1232 prefetch_bw_equ = PrefetchBandwidth4; 1233 } 1234 1235 #ifdef __DML_VBA_DEBUG__ 1236 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1237 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1238 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1239 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1240 #endif 1241 1242 if (prefetch_bw_equ > 0) { 1243 if (GPUVMEnable == true) { 1244 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1245 } else { 1246 Tvm_equ = LineTime / 4; 1247 } 1248 1249 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1250 Tr0_equ = dml_max4( 1251 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1252 Tr0_trips, 1253 (LineTime - Tvm_equ) / 2, 1254 LineTime / 4); 1255 } else { 1256 Tr0_equ = (LineTime - Tvm_equ) / 2; 1257 } 1258 } else { 1259 Tvm_equ = 0; 1260 Tr0_equ = 0; 1261 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1262 } 1263 } 1264 1265 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1266 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1267 TimeForFetchingMetaPTE = Tvm_oto; 1268 TimeForFetchingRowInVBlank = Tr0_oto; 1269 *PrefetchBandwidth = prefetch_bw_oto; 1270 } else { 1271 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1272 TimeForFetchingMetaPTE = Tvm_equ; 1273 TimeForFetchingRowInVBlank = Tr0_equ; 1274 *PrefetchBandwidth = prefetch_bw_equ; 1275 } 1276 1277 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1278 1279 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1280 1281 #ifdef __DML_VBA_ALLOW_DELTA__ 1282 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1283 // See note above dated 5/30/2018 1284 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1285 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1286 #else 1287 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1288 #endif 1289 1290 #ifdef __DML_VBA_DEBUG__ 1291 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1292 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1293 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1294 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1295 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1296 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1297 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1298 #endif 1299 1300 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1301 1302 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1303 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1304 #ifdef __DML_VBA_DEBUG__ 1305 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1306 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1307 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1308 #endif 1309 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1310 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1311 *VRatioPrefetchY = dml_max( 1312 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1313 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1314 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1315 } else { 1316 MyError = true; 1317 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1318 *VRatioPrefetchY = 0; 1319 } 1320 #ifdef __DML_VBA_DEBUG__ 1321 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1322 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1323 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1324 #endif 1325 } 1326 1327 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1328 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1329 1330 #ifdef __DML_VBA_DEBUG__ 1331 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1332 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1333 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1334 #endif 1335 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1336 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1337 *VRatioPrefetchC = dml_max( 1338 *VRatioPrefetchC, 1339 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1340 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1341 } else { 1342 MyError = true; 1343 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1344 *VRatioPrefetchC = 0; 1345 } 1346 #ifdef __DML_VBA_DEBUG__ 1347 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1348 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1349 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1350 #endif 1351 } 1352 1353 #ifdef __DML_VBA_DEBUG__ 1354 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1355 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1356 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1357 #endif 1358 1359 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1360 1361 #ifdef __DML_VBA_DEBUG__ 1362 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1363 #endif 1364 1365 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1366 / LineTime; 1367 } else { 1368 MyError = true; 1369 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1370 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1371 *VRatioPrefetchY = 0; 1372 *VRatioPrefetchC = 0; 1373 *RequiredPrefetchPixDataBWLuma = 0; 1374 *RequiredPrefetchPixDataBWChroma = 0; 1375 } 1376 1377 dml_print( 1378 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1379 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1380 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1381 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1382 dml_print( 1383 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1384 (double) LinesToRequestPrefetchPixelData * LineTime); 1385 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 1386 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / 1387 (double) myPipe->HTotal)) * LineTime); 1388 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1389 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", 1390 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1391 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1392 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1393 1394 } else { 1395 MyError = true; 1396 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1397 } 1398 1399 { 1400 double prefetch_vm_bw; 1401 double prefetch_row_bw; 1402 1403 if (PDEAndMetaPTEBytesFrame == 0) { 1404 prefetch_vm_bw = 0; 1405 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1406 #ifdef __DML_VBA_DEBUG__ 1407 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1408 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1409 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1410 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1411 #endif 1412 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1413 #ifdef __DML_VBA_DEBUG__ 1414 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1415 #endif 1416 } else { 1417 prefetch_vm_bw = 0; 1418 MyError = true; 1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1420 } 1421 1422 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1423 prefetch_row_bw = 0; 1424 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1425 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1426 1427 #ifdef __DML_VBA_DEBUG__ 1428 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1429 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1430 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1431 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1432 #endif 1433 } else { 1434 prefetch_row_bw = 0; 1435 MyError = true; 1436 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1437 } 1438 1439 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1440 } 1441 1442 if (MyError) { 1443 *PrefetchBandwidth = 0; 1444 TimeForFetchingMetaPTE = 0; 1445 TimeForFetchingRowInVBlank = 0; 1446 *DestinationLinesToRequestVMInVBlank = 0; 1447 *DestinationLinesToRequestRowInVBlank = 0; 1448 *DestinationLinesForPrefetch = 0; 1449 LinesToRequestPrefetchPixelData = 0; 1450 *VRatioPrefetchY = 0; 1451 *VRatioPrefetchC = 0; 1452 *RequiredPrefetchPixDataBWLuma = 0; 1453 *RequiredPrefetchPixDataBWChroma = 0; 1454 } 1455 1456 return MyError; 1457 } 1458 1459 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1460 { 1461 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1462 } 1463 1464 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1465 { 1466 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1467 } 1468 1469 static void CalculateDCCConfiguration( 1470 bool DCCEnabled, 1471 bool DCCProgrammingAssumesScanDirectionUnknown, 1472 enum source_format_class SourcePixelFormat, 1473 unsigned int SurfaceWidthLuma, 1474 unsigned int SurfaceWidthChroma, 1475 unsigned int SurfaceHeightLuma, 1476 unsigned int SurfaceHeightChroma, 1477 double DETBufferSize, 1478 unsigned int RequestHeight256ByteLuma, 1479 unsigned int RequestHeight256ByteChroma, 1480 enum dm_swizzle_mode TilingFormat, 1481 unsigned int BytePerPixelY, 1482 unsigned int BytePerPixelC, 1483 double BytePerPixelDETY, 1484 double BytePerPixelDETC, 1485 enum scan_direction_class ScanOrientation, 1486 unsigned int *MaxUncompressedBlockLuma, 1487 unsigned int *MaxUncompressedBlockChroma, 1488 unsigned int *MaxCompressedBlockLuma, 1489 unsigned int *MaxCompressedBlockChroma, 1490 unsigned int *IndependentBlockLuma, 1491 unsigned int *IndependentBlockChroma) 1492 { 1493 int yuv420; 1494 int horz_div_l; 1495 int horz_div_c; 1496 int vert_div_l; 1497 int vert_div_c; 1498 1499 int swath_buf_size; 1500 double detile_buf_vp_horz_limit; 1501 double detile_buf_vp_vert_limit; 1502 1503 int MAS_vp_horz_limit; 1504 int MAS_vp_vert_limit; 1505 int max_vp_horz_width; 1506 int max_vp_vert_height; 1507 int eff_surf_width_l; 1508 int eff_surf_width_c; 1509 int eff_surf_height_l; 1510 int eff_surf_height_c; 1511 1512 int full_swath_bytes_horz_wc_l; 1513 int full_swath_bytes_horz_wc_c; 1514 int full_swath_bytes_vert_wc_l; 1515 int full_swath_bytes_vert_wc_c; 1516 int req128_horz_wc_l; 1517 int req128_horz_wc_c; 1518 int req128_vert_wc_l; 1519 int req128_vert_wc_c; 1520 int segment_order_horz_contiguous_luma; 1521 int segment_order_horz_contiguous_chroma; 1522 int segment_order_vert_contiguous_luma; 1523 int segment_order_vert_contiguous_chroma; 1524 1525 typedef enum { 1526 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1527 } RequestType; 1528 RequestType RequestLuma; 1529 RequestType RequestChroma; 1530 1531 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1532 horz_div_l = 1; 1533 horz_div_c = 1; 1534 vert_div_l = 1; 1535 vert_div_c = 1; 1536 1537 if (BytePerPixelY == 1) 1538 vert_div_l = 0; 1539 if (BytePerPixelC == 1) 1540 vert_div_c = 0; 1541 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1542 horz_div_l = 0; 1543 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1544 horz_div_c = 0; 1545 1546 if (BytePerPixelC == 0) { 1547 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1548 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1549 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1550 } else { 1551 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1552 detile_buf_vp_horz_limit = (double) swath_buf_size 1553 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1554 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1555 detile_buf_vp_vert_limit = (double) swath_buf_size 1556 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1557 } 1558 1559 if (SourcePixelFormat == dm_420_10) { 1560 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1561 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1562 } 1563 1564 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1565 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1566 1567 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1568 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1569 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1570 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1571 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1572 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1573 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1574 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1575 1576 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1577 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1578 if (BytePerPixelC > 0) { 1579 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1580 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1581 } else { 1582 full_swath_bytes_horz_wc_c = 0; 1583 full_swath_bytes_vert_wc_c = 0; 1584 } 1585 1586 if (SourcePixelFormat == dm_420_10) { 1587 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1588 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1589 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1590 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1591 } 1592 1593 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1594 req128_horz_wc_l = 0; 1595 req128_horz_wc_c = 0; 1596 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1597 req128_horz_wc_l = 0; 1598 req128_horz_wc_c = 1; 1599 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1600 req128_horz_wc_l = 1; 1601 req128_horz_wc_c = 0; 1602 } else { 1603 req128_horz_wc_l = 1; 1604 req128_horz_wc_c = 1; 1605 } 1606 1607 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1608 req128_vert_wc_l = 0; 1609 req128_vert_wc_c = 0; 1610 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1611 req128_vert_wc_l = 0; 1612 req128_vert_wc_c = 1; 1613 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1614 req128_vert_wc_l = 1; 1615 req128_vert_wc_c = 0; 1616 } else { 1617 req128_vert_wc_l = 1; 1618 req128_vert_wc_c = 1; 1619 } 1620 1621 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1622 segment_order_horz_contiguous_luma = 0; 1623 } else { 1624 segment_order_horz_contiguous_luma = 1; 1625 } 1626 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1627 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1628 segment_order_vert_contiguous_luma = 0; 1629 } else { 1630 segment_order_vert_contiguous_luma = 1; 1631 } 1632 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1633 segment_order_horz_contiguous_chroma = 0; 1634 } else { 1635 segment_order_horz_contiguous_chroma = 1; 1636 } 1637 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1638 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1639 segment_order_vert_contiguous_chroma = 0; 1640 } else { 1641 segment_order_vert_contiguous_chroma = 1; 1642 } 1643 1644 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1645 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1646 RequestLuma = REQ_256Bytes; 1647 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1648 RequestLuma = REQ_128BytesNonContiguous; 1649 } else { 1650 RequestLuma = REQ_128BytesContiguous; 1651 } 1652 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1653 RequestChroma = REQ_256Bytes; 1654 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1655 RequestChroma = REQ_128BytesNonContiguous; 1656 } else { 1657 RequestChroma = REQ_128BytesContiguous; 1658 } 1659 } else if (ScanOrientation != dm_vert) { 1660 if (req128_horz_wc_l == 0) { 1661 RequestLuma = REQ_256Bytes; 1662 } else if (segment_order_horz_contiguous_luma == 0) { 1663 RequestLuma = REQ_128BytesNonContiguous; 1664 } else { 1665 RequestLuma = REQ_128BytesContiguous; 1666 } 1667 if (req128_horz_wc_c == 0) { 1668 RequestChroma = REQ_256Bytes; 1669 } else if (segment_order_horz_contiguous_chroma == 0) { 1670 RequestChroma = REQ_128BytesNonContiguous; 1671 } else { 1672 RequestChroma = REQ_128BytesContiguous; 1673 } 1674 } else { 1675 if (req128_vert_wc_l == 0) { 1676 RequestLuma = REQ_256Bytes; 1677 } else if (segment_order_vert_contiguous_luma == 0) { 1678 RequestLuma = REQ_128BytesNonContiguous; 1679 } else { 1680 RequestLuma = REQ_128BytesContiguous; 1681 } 1682 if (req128_vert_wc_c == 0) { 1683 RequestChroma = REQ_256Bytes; 1684 } else if (segment_order_vert_contiguous_chroma == 0) { 1685 RequestChroma = REQ_128BytesNonContiguous; 1686 } else { 1687 RequestChroma = REQ_128BytesContiguous; 1688 } 1689 } 1690 1691 if (RequestLuma == REQ_256Bytes) { 1692 *MaxUncompressedBlockLuma = 256; 1693 *MaxCompressedBlockLuma = 256; 1694 *IndependentBlockLuma = 0; 1695 } else if (RequestLuma == REQ_128BytesContiguous) { 1696 *MaxUncompressedBlockLuma = 256; 1697 *MaxCompressedBlockLuma = 128; 1698 *IndependentBlockLuma = 128; 1699 } else { 1700 *MaxUncompressedBlockLuma = 256; 1701 *MaxCompressedBlockLuma = 64; 1702 *IndependentBlockLuma = 64; 1703 } 1704 1705 if (RequestChroma == REQ_256Bytes) { 1706 *MaxUncompressedBlockChroma = 256; 1707 *MaxCompressedBlockChroma = 256; 1708 *IndependentBlockChroma = 0; 1709 } else if (RequestChroma == REQ_128BytesContiguous) { 1710 *MaxUncompressedBlockChroma = 256; 1711 *MaxCompressedBlockChroma = 128; 1712 *IndependentBlockChroma = 128; 1713 } else { 1714 *MaxUncompressedBlockChroma = 256; 1715 *MaxCompressedBlockChroma = 64; 1716 *IndependentBlockChroma = 64; 1717 } 1718 1719 if (DCCEnabled != true || BytePerPixelC == 0) { 1720 *MaxUncompressedBlockChroma = 0; 1721 *MaxCompressedBlockChroma = 0; 1722 *IndependentBlockChroma = 0; 1723 } 1724 1725 if (DCCEnabled != true) { 1726 *MaxUncompressedBlockLuma = 0; 1727 *MaxCompressedBlockLuma = 0; 1728 *IndependentBlockLuma = 0; 1729 } 1730 } 1731 1732 static double CalculatePrefetchSourceLines( 1733 struct display_mode_lib *mode_lib, 1734 double VRatio, 1735 double vtaps, 1736 bool Interlace, 1737 bool ProgressiveToInterlaceUnitInOPP, 1738 unsigned int SwathHeight, 1739 unsigned int ViewportYStart, 1740 double *VInitPreFill, 1741 unsigned int *MaxNumSwath) 1742 { 1743 struct vba_vars_st *v = &mode_lib->vba; 1744 unsigned int MaxPartialSwath; 1745 1746 if (ProgressiveToInterlaceUnitInOPP) 1747 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1748 else 1749 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1750 1751 if (!v->IgnoreViewportPositioning) { 1752 1753 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1754 1755 if (*VInitPreFill > 1.0) 1756 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1757 else 1758 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1759 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1760 1761 } else { 1762 1763 if (ViewportYStart != 0) 1764 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1765 1766 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1767 1768 if (*VInitPreFill > 1.0) 1769 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1770 else 1771 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1772 } 1773 1774 #ifdef __DML_VBA_DEBUG__ 1775 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1776 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1777 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1778 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1779 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1780 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1781 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1782 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1783 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1784 #endif 1785 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1786 } 1787 1788 static unsigned int CalculateVMAndRowBytes( 1789 struct display_mode_lib *mode_lib, 1790 bool DCCEnable, 1791 unsigned int BlockHeight256Bytes, 1792 unsigned int BlockWidth256Bytes, 1793 enum source_format_class SourcePixelFormat, 1794 unsigned int SurfaceTiling, 1795 unsigned int BytePerPixel, 1796 enum scan_direction_class ScanDirection, 1797 unsigned int SwathWidth, 1798 unsigned int ViewportHeight, 1799 bool GPUVMEnable, 1800 bool HostVMEnable, 1801 unsigned int HostVMMaxNonCachedPageTableLevels, 1802 unsigned int GPUVMMinPageSize, 1803 unsigned int HostVMMinPageSize, 1804 unsigned int PTEBufferSizeInRequests, 1805 unsigned int Pitch, 1806 unsigned int DCCMetaPitch, 1807 unsigned int *MacroTileWidth, 1808 unsigned int *MetaRowByte, 1809 unsigned int *PixelPTEBytesPerRow, 1810 bool *PTEBufferSizeNotExceeded, 1811 int *dpte_row_width_ub, 1812 unsigned int *dpte_row_height, 1813 unsigned int *MetaRequestWidth, 1814 unsigned int *MetaRequestHeight, 1815 unsigned int *meta_row_width, 1816 unsigned int *meta_row_height, 1817 int *vm_group_bytes, 1818 unsigned int *dpte_group_bytes, 1819 unsigned int *PixelPTEReqWidth, 1820 unsigned int *PixelPTEReqHeight, 1821 unsigned int *PTERequestSize, 1822 int *DPDE0BytesFrame, 1823 int *MetaPTEBytesFrame) 1824 { 1825 struct vba_vars_st *v = &mode_lib->vba; 1826 unsigned int MPDEBytesFrame; 1827 unsigned int DCCMetaSurfaceBytes; 1828 unsigned int MacroTileSizeBytes; 1829 unsigned int MacroTileHeight; 1830 unsigned int ExtraDPDEBytesFrame; 1831 unsigned int PDEAndMetaPTEBytesFrame; 1832 unsigned int PixelPTEReqHeightPTEs = 0; 1833 unsigned int HostVMDynamicLevels = 0; 1834 double FractionOfPTEReturnDrop; 1835 1836 if (GPUVMEnable == true && HostVMEnable == true) { 1837 if (HostVMMinPageSize < 2048) { 1838 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1839 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1840 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1841 } else { 1842 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1843 } 1844 } 1845 1846 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1847 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1848 if (ScanDirection != dm_vert) { 1849 *meta_row_height = *MetaRequestHeight; 1850 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1851 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1852 } else { 1853 *meta_row_height = *MetaRequestWidth; 1854 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1855 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1856 } 1857 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1858 if (GPUVMEnable == true) { 1859 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1860 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1861 } else { 1862 *MetaPTEBytesFrame = 0; 1863 MPDEBytesFrame = 0; 1864 } 1865 1866 if (DCCEnable != true) { 1867 *MetaPTEBytesFrame = 0; 1868 MPDEBytesFrame = 0; 1869 *MetaRowByte = 0; 1870 } 1871 1872 if (SurfaceTiling == dm_sw_linear) { 1873 MacroTileSizeBytes = 256; 1874 MacroTileHeight = BlockHeight256Bytes; 1875 } else { 1876 MacroTileSizeBytes = 65536; 1877 MacroTileHeight = 16 * BlockHeight256Bytes; 1878 } 1879 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1880 1881 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1882 if (ScanDirection != dm_vert) { 1883 *DPDE0BytesFrame = 64 1884 * (dml_ceil( 1885 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1886 / (8 * 2097152), 1887 1) + 1); 1888 } else { 1889 *DPDE0BytesFrame = 64 1890 * (dml_ceil( 1891 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1892 / (8 * 2097152), 1893 1) + 1); 1894 } 1895 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1896 } else { 1897 *DPDE0BytesFrame = 0; 1898 ExtraDPDEBytesFrame = 0; 1899 } 1900 1901 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1902 1903 #ifdef __DML_VBA_DEBUG__ 1904 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1905 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1906 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1907 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1908 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1909 #endif 1910 1911 if (HostVMEnable == true) { 1912 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1913 } 1914 #ifdef __DML_VBA_DEBUG__ 1915 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1916 #endif 1917 1918 if (SurfaceTiling == dm_sw_linear) { 1919 PixelPTEReqHeightPTEs = 1; 1920 *PixelPTEReqHeight = 1; 1921 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1922 *PTERequestSize = 64; 1923 FractionOfPTEReturnDrop = 0; 1924 } else if (MacroTileSizeBytes == 4096) { 1925 PixelPTEReqHeightPTEs = 1; 1926 *PixelPTEReqHeight = MacroTileHeight; 1927 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1928 *PTERequestSize = 64; 1929 if (ScanDirection != dm_vert) 1930 FractionOfPTEReturnDrop = 0; 1931 else 1932 FractionOfPTEReturnDrop = 7 / 8; 1933 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1934 PixelPTEReqHeightPTEs = 16; 1935 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1936 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1937 *PTERequestSize = 128; 1938 FractionOfPTEReturnDrop = 0; 1939 } else { 1940 PixelPTEReqHeightPTEs = 1; 1941 *PixelPTEReqHeight = MacroTileHeight; 1942 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1943 *PTERequestSize = 64; 1944 FractionOfPTEReturnDrop = 0; 1945 } 1946 1947 if (SurfaceTiling == dm_sw_linear) { 1948 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 1949 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1950 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1951 } else if (ScanDirection != dm_vert) { 1952 *dpte_row_height = *PixelPTEReqHeight; 1953 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1954 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1955 } else { 1956 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1957 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1958 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1959 } 1960 1961 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 1962 *PTEBufferSizeNotExceeded = true; 1963 } else { 1964 *PTEBufferSizeNotExceeded = false; 1965 } 1966 1967 if (GPUVMEnable != true) { 1968 *PixelPTEBytesPerRow = 0; 1969 *PTEBufferSizeNotExceeded = true; 1970 } 1971 1972 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 1973 1974 if (HostVMEnable == true) { 1975 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 1976 } 1977 1978 if (HostVMEnable == true) { 1979 *vm_group_bytes = 512; 1980 *dpte_group_bytes = 512; 1981 } else if (GPUVMEnable == true) { 1982 *vm_group_bytes = 2048; 1983 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 1984 *dpte_group_bytes = 512; 1985 } else { 1986 *dpte_group_bytes = 2048; 1987 } 1988 } else { 1989 *vm_group_bytes = 0; 1990 *dpte_group_bytes = 0; 1991 } 1992 return PDEAndMetaPTEBytesFrame; 1993 } 1994 1995 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 1996 { 1997 struct vba_vars_st *v = &mode_lib->vba; 1998 unsigned int j, k; 1999 double HostVMInefficiencyFactor = 1.0; 2000 bool NoChromaPlanes = true; 2001 int ReorderBytes; 2002 double VMDataOnlyReturnBW; 2003 double MaxTotalRDBandwidth = 0; 2004 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2005 2006 v->WritebackDISPCLK = 0.0; 2007 v->DISPCLKWithRamping = 0; 2008 v->DISPCLKWithoutRamping = 0; 2009 v->GlobalDPPCLK = 0.0; 2010 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ 2011 { 2012 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2013 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2014 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2015 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2016 if (v->HostVMEnable != true) { 2017 v->ReturnBW = dml_min( 2018 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2019 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2020 } else { 2021 v->ReturnBW = dml_min( 2022 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2023 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2024 } 2025 } 2026 /* End DAL custom code */ 2027 2028 // DISPCLK and DPPCLK Calculation 2029 // 2030 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2031 if (v->WritebackEnable[k]) { 2032 v->WritebackDISPCLK = dml_max( 2033 v->WritebackDISPCLK, 2034 dml31_CalculateWriteBackDISPCLK( 2035 v->WritebackPixelFormat[k], 2036 v->PixelClock[k], 2037 v->WritebackHRatio[k], 2038 v->WritebackVRatio[k], 2039 v->WritebackHTaps[k], 2040 v->WritebackVTaps[k], 2041 v->WritebackSourceWidth[k], 2042 v->WritebackDestinationWidth[k], 2043 v->HTotal[k], 2044 v->WritebackLineBufferSize)); 2045 } 2046 } 2047 2048 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2049 if (v->HRatio[k] > 1) { 2050 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2051 v->MaxDCHUBToPSCLThroughput, 2052 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2053 } else { 2054 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2055 } 2056 2057 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2058 * dml_max( 2059 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2060 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2061 2062 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2063 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2064 } 2065 2066 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2067 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2068 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2069 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2070 } else { 2071 if (v->HRatioChroma[k] > 1) { 2072 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2073 v->MaxDCHUBToPSCLThroughput, 2074 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2075 } else { 2076 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2077 } 2078 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2079 * dml_max3( 2080 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2081 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2082 1.0); 2083 2084 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2085 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2086 } 2087 2088 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2089 } 2090 } 2091 2092 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2093 if (v->BlendingAndTiming[k] != k) 2094 continue; 2095 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2096 v->DISPCLKWithRamping = dml_max( 2097 v->DISPCLKWithRamping, 2098 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2099 * (1 + v->DISPCLKRampingMargin / 100)); 2100 v->DISPCLKWithoutRamping = dml_max( 2101 v->DISPCLKWithoutRamping, 2102 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2103 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2104 v->DISPCLKWithRamping = dml_max( 2105 v->DISPCLKWithRamping, 2106 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2107 * (1 + v->DISPCLKRampingMargin / 100)); 2108 v->DISPCLKWithoutRamping = dml_max( 2109 v->DISPCLKWithoutRamping, 2110 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2111 } else { 2112 v->DISPCLKWithRamping = dml_max( 2113 v->DISPCLKWithRamping, 2114 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2115 v->DISPCLKWithoutRamping = dml_max( 2116 v->DISPCLKWithoutRamping, 2117 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2118 } 2119 } 2120 2121 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2122 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2123 2124 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2125 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2126 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2127 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2128 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2129 v->DISPCLKDPPCLKVCOSpeed); 2130 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2131 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2132 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2133 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2134 } else { 2135 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2136 } 2137 v->DISPCLK = v->DISPCLK_calculated; 2138 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2139 2140 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2141 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2142 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2143 } 2144 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2145 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2146 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2147 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2148 } 2149 2150 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2151 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2152 } 2153 2154 // Urgent and B P-State/DRAM Clock Change Watermark 2155 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2156 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2157 2158 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2159 dml30_CalculateBytePerPixelAnd256BBlockSizes( 2160 v->SourcePixelFormat[k], 2161 v->SurfaceTiling[k], 2162 &v->BytePerPixelY[k], 2163 &v->BytePerPixelC[k], 2164 &v->BytePerPixelDETY[k], 2165 &v->BytePerPixelDETC[k], 2166 &v->BlockHeight256BytesY[k], 2167 &v->BlockHeight256BytesC[k], 2168 &v->BlockWidth256BytesY[k], 2169 &v->BlockWidth256BytesC[k]); 2170 } 2171 2172 CalculateSwathWidth( 2173 false, 2174 v->NumberOfActivePlanes, 2175 v->SourcePixelFormat, 2176 v->SourceScan, 2177 v->ViewportWidth, 2178 v->ViewportHeight, 2179 v->SurfaceWidthY, 2180 v->SurfaceWidthC, 2181 v->SurfaceHeightY, 2182 v->SurfaceHeightC, 2183 v->ODMCombineEnabled, 2184 v->BytePerPixelY, 2185 v->BytePerPixelC, 2186 v->BlockHeight256BytesY, 2187 v->BlockHeight256BytesC, 2188 v->BlockWidth256BytesY, 2189 v->BlockWidth256BytesC, 2190 v->BlendingAndTiming, 2191 v->HActive, 2192 v->HRatio, 2193 v->DPPPerPlane, 2194 v->SwathWidthSingleDPPY, 2195 v->SwathWidthSingleDPPC, 2196 v->SwathWidthY, 2197 v->SwathWidthC, 2198 v->dummyinteger3, 2199 v->dummyinteger4, 2200 v->swath_width_luma_ub, 2201 v->swath_width_chroma_ub); 2202 2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2204 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2205 * v->VRatio[k]; 2206 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2207 * v->VRatioChroma[k]; 2208 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2209 } 2210 2211 // DCFCLK Deep Sleep 2212 CalculateDCFCLKDeepSleep( 2213 mode_lib, 2214 v->NumberOfActivePlanes, 2215 v->BytePerPixelY, 2216 v->BytePerPixelC, 2217 v->VRatio, 2218 v->VRatioChroma, 2219 v->SwathWidthY, 2220 v->SwathWidthC, 2221 v->DPPPerPlane, 2222 v->HRatio, 2223 v->HRatioChroma, 2224 v->PixelClock, 2225 v->PSCL_THROUGHPUT_LUMA, 2226 v->PSCL_THROUGHPUT_CHROMA, 2227 v->DPPCLK, 2228 v->ReadBandwidthPlaneLuma, 2229 v->ReadBandwidthPlaneChroma, 2230 v->ReturnBusWidth, 2231 &v->DCFCLKDeepSleep); 2232 2233 // DSCCLK 2234 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2235 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2236 v->DSCCLK_calculated[k] = 0.0; 2237 } else { 2238 if (v->OutputFormat[k] == dm_420) 2239 v->DSCFormatFactor = 2; 2240 else if (v->OutputFormat[k] == dm_444) 2241 v->DSCFormatFactor = 1; 2242 else if (v->OutputFormat[k] == dm_n422) 2243 v->DSCFormatFactor = 2; 2244 else 2245 v->DSCFormatFactor = 1; 2246 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2247 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2248 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2249 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2250 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2251 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2252 else 2253 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2254 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2255 } 2256 } 2257 2258 // DSC Delay 2259 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2260 double BPP = v->OutputBpp[k]; 2261 2262 if (v->DSCEnabled[k] && BPP != 0) { 2263 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2264 v->DSCDelay[k] = dscceComputeDelay( 2265 v->DSCInputBitPerComponent[k], 2266 BPP, 2267 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2268 v->NumberOfDSCSlices[k], 2269 v->OutputFormat[k], 2270 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2271 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2272 v->DSCDelay[k] = 2 2273 * (dscceComputeDelay( 2274 v->DSCInputBitPerComponent[k], 2275 BPP, 2276 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2277 v->NumberOfDSCSlices[k] / 2.0, 2278 v->OutputFormat[k], 2279 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2280 } else { 2281 v->DSCDelay[k] = 4 2282 * (dscceComputeDelay( 2283 v->DSCInputBitPerComponent[k], 2284 BPP, 2285 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2286 v->NumberOfDSCSlices[k] / 4.0, 2287 v->OutputFormat[k], 2288 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2289 } 2290 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2291 } else { 2292 v->DSCDelay[k] = 0; 2293 } 2294 } 2295 2296 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2297 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2298 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2299 v->DSCDelay[k] = v->DSCDelay[j]; 2300 2301 // Prefetch 2302 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2303 unsigned int PDEAndMetaPTEBytesFrameY; 2304 unsigned int PixelPTEBytesPerRowY; 2305 unsigned int MetaRowByteY; 2306 unsigned int MetaRowByteC; 2307 unsigned int PDEAndMetaPTEBytesFrameC; 2308 unsigned int PixelPTEBytesPerRowC; 2309 bool PTEBufferSizeNotExceededY; 2310 bool PTEBufferSizeNotExceededC; 2311 2312 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2313 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2314 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2315 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2316 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2317 } else { 2318 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2319 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2320 } 2321 2322 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2323 mode_lib, 2324 v->DCCEnable[k], 2325 v->BlockHeight256BytesC[k], 2326 v->BlockWidth256BytesC[k], 2327 v->SourcePixelFormat[k], 2328 v->SurfaceTiling[k], 2329 v->BytePerPixelC[k], 2330 v->SourceScan[k], 2331 v->SwathWidthC[k], 2332 v->ViewportHeightChroma[k], 2333 v->GPUVMEnable, 2334 v->HostVMEnable, 2335 v->HostVMMaxNonCachedPageTableLevels, 2336 v->GPUVMMinPageSize, 2337 v->HostVMMinPageSize, 2338 v->PTEBufferSizeInRequestsForChroma, 2339 v->PitchC[k], 2340 v->DCCMetaPitchC[k], 2341 &v->MacroTileWidthC[k], 2342 &MetaRowByteC, 2343 &PixelPTEBytesPerRowC, 2344 &PTEBufferSizeNotExceededC, 2345 &v->dpte_row_width_chroma_ub[k], 2346 &v->dpte_row_height_chroma[k], 2347 &v->meta_req_width_chroma[k], 2348 &v->meta_req_height_chroma[k], 2349 &v->meta_row_width_chroma[k], 2350 &v->meta_row_height_chroma[k], 2351 &v->dummyinteger1, 2352 &v->dummyinteger2, 2353 &v->PixelPTEReqWidthC[k], 2354 &v->PixelPTEReqHeightC[k], 2355 &v->PTERequestSizeC[k], 2356 &v->dpde0_bytes_per_frame_ub_c[k], 2357 &v->meta_pte_bytes_per_frame_ub_c[k]); 2358 2359 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2360 mode_lib, 2361 v->VRatioChroma[k], 2362 v->VTAPsChroma[k], 2363 v->Interlace[k], 2364 v->ProgressiveToInterlaceUnitInOPP, 2365 v->SwathHeightC[k], 2366 v->ViewportYStartC[k], 2367 &v->VInitPreFillC[k], 2368 &v->MaxNumSwathC[k]); 2369 } else { 2370 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2371 v->PTEBufferSizeInRequestsForChroma = 0; 2372 PixelPTEBytesPerRowC = 0; 2373 PDEAndMetaPTEBytesFrameC = 0; 2374 MetaRowByteC = 0; 2375 v->MaxNumSwathC[k] = 0; 2376 v->PrefetchSourceLinesC[k] = 0; 2377 } 2378 2379 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2380 mode_lib, 2381 v->DCCEnable[k], 2382 v->BlockHeight256BytesY[k], 2383 v->BlockWidth256BytesY[k], 2384 v->SourcePixelFormat[k], 2385 v->SurfaceTiling[k], 2386 v->BytePerPixelY[k], 2387 v->SourceScan[k], 2388 v->SwathWidthY[k], 2389 v->ViewportHeight[k], 2390 v->GPUVMEnable, 2391 v->HostVMEnable, 2392 v->HostVMMaxNonCachedPageTableLevels, 2393 v->GPUVMMinPageSize, 2394 v->HostVMMinPageSize, 2395 v->PTEBufferSizeInRequestsForLuma, 2396 v->PitchY[k], 2397 v->DCCMetaPitchY[k], 2398 &v->MacroTileWidthY[k], 2399 &MetaRowByteY, 2400 &PixelPTEBytesPerRowY, 2401 &PTEBufferSizeNotExceededY, 2402 &v->dpte_row_width_luma_ub[k], 2403 &v->dpte_row_height[k], 2404 &v->meta_req_width[k], 2405 &v->meta_req_height[k], 2406 &v->meta_row_width[k], 2407 &v->meta_row_height[k], 2408 &v->vm_group_bytes[k], 2409 &v->dpte_group_bytes[k], 2410 &v->PixelPTEReqWidthY[k], 2411 &v->PixelPTEReqHeightY[k], 2412 &v->PTERequestSizeY[k], 2413 &v->dpde0_bytes_per_frame_ub_l[k], 2414 &v->meta_pte_bytes_per_frame_ub_l[k]); 2415 2416 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2417 mode_lib, 2418 v->VRatio[k], 2419 v->vtaps[k], 2420 v->Interlace[k], 2421 v->ProgressiveToInterlaceUnitInOPP, 2422 v->SwathHeightY[k], 2423 v->ViewportYStartY[k], 2424 &v->VInitPreFillY[k], 2425 &v->MaxNumSwathY[k]); 2426 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2427 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2428 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2429 2430 CalculateRowBandwidth( 2431 v->GPUVMEnable, 2432 v->SourcePixelFormat[k], 2433 v->VRatio[k], 2434 v->VRatioChroma[k], 2435 v->DCCEnable[k], 2436 v->HTotal[k] / v->PixelClock[k], 2437 MetaRowByteY, 2438 MetaRowByteC, 2439 v->meta_row_height[k], 2440 v->meta_row_height_chroma[k], 2441 PixelPTEBytesPerRowY, 2442 PixelPTEBytesPerRowC, 2443 v->dpte_row_height[k], 2444 v->dpte_row_height_chroma[k], 2445 &v->meta_row_bw[k], 2446 &v->dpte_row_bw[k]); 2447 } 2448 2449 v->TotalDCCActiveDPP = 0; 2450 v->TotalActiveDPP = 0; 2451 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2452 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2453 if (v->DCCEnable[k]) 2454 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2455 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2456 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2457 NoChromaPlanes = false; 2458 } 2459 2460 ReorderBytes = v->NumberOfChannels 2461 * dml_max3( 2462 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2463 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2464 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2465 2466 VMDataOnlyReturnBW = dml_min( 2467 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2468 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2469 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2470 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2471 2472 #ifdef __DML_VBA_DEBUG__ 2473 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2474 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2475 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2476 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2477 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2478 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2479 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2480 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2481 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2482 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2483 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2484 #endif 2485 2486 if (v->GPUVMEnable && v->HostVMEnable) 2487 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2488 2489 v->UrgentExtraLatency = CalculateExtraLatency( 2490 v->RoundTripPingLatencyCycles, 2491 ReorderBytes, 2492 v->DCFCLK, 2493 v->TotalActiveDPP, 2494 v->PixelChunkSizeInKByte, 2495 v->TotalDCCActiveDPP, 2496 v->MetaChunkSize, 2497 v->ReturnBW, 2498 v->GPUVMEnable, 2499 v->HostVMEnable, 2500 v->NumberOfActivePlanes, 2501 v->DPPPerPlane, 2502 v->dpte_group_bytes, 2503 HostVMInefficiencyFactor, 2504 v->HostVMMinPageSize, 2505 v->HostVMMaxNonCachedPageTableLevels); 2506 2507 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2508 2509 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2510 if (v->BlendingAndTiming[k] == k) { 2511 if (v->WritebackEnable[k] == true) { 2512 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2513 + CalculateWriteBackDelay( 2514 v->WritebackPixelFormat[k], 2515 v->WritebackHRatio[k], 2516 v->WritebackVRatio[k], 2517 v->WritebackVTaps[k], 2518 v->WritebackDestinationWidth[k], 2519 v->WritebackDestinationHeight[k], 2520 v->WritebackSourceHeight[k], 2521 v->HTotal[k]) / v->DISPCLK; 2522 } else 2523 v->WritebackDelay[v->VoltageLevel][k] = 0; 2524 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2525 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2526 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2527 v->WritebackDelay[v->VoltageLevel][k], 2528 v->WritebackLatency 2529 + CalculateWriteBackDelay( 2530 v->WritebackPixelFormat[j], 2531 v->WritebackHRatio[j], 2532 v->WritebackVRatio[j], 2533 v->WritebackVTaps[j], 2534 v->WritebackDestinationWidth[j], 2535 v->WritebackDestinationHeight[j], 2536 v->WritebackSourceHeight[j], 2537 v->HTotal[k]) / v->DISPCLK); 2538 } 2539 } 2540 } 2541 } 2542 2543 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2544 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2545 if (v->BlendingAndTiming[k] == j) 2546 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2547 2548 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2549 v->MaxVStartupLines[k] = 2550 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 2551 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 2552 v->VTotal[k] - v->VActive[k] 2553 - dml_max( 2554 1.0, 2555 dml_ceil( 2556 (double) v->WritebackDelay[v->VoltageLevel][k] 2557 / (v->HTotal[k] / v->PixelClock[k]), 2558 1)); 2559 if (v->MaxVStartupLines[k] > 1023) 2560 v->MaxVStartupLines[k] = 1023; 2561 2562 #ifdef __DML_VBA_DEBUG__ 2563 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2564 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2565 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2566 #endif 2567 } 2568 2569 v->MaximumMaxVStartupLines = 0; 2570 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2571 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2572 2573 // VBA_DELTA 2574 // We don't really care to iterate between the various prefetch modes 2575 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2576 2577 v->UrgentLatency = CalculateUrgentLatency( 2578 v->UrgentLatencyPixelDataOnly, 2579 v->UrgentLatencyPixelMixedWithVMData, 2580 v->UrgentLatencyVMDataOnly, 2581 v->DoUrgentLatencyAdjustment, 2582 v->UrgentLatencyAdjustmentFabricClockComponent, 2583 v->UrgentLatencyAdjustmentFabricClockReference, 2584 v->FabricClock); 2585 2586 v->FractionOfUrgentBandwidth = 0.0; 2587 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2588 2589 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2590 2591 do { 2592 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2593 bool DestinationLineTimesForPrefetchLessThan2 = false; 2594 bool VRatioPrefetchMoreThan4 = false; 2595 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2596 MaxTotalRDBandwidth = 0; 2597 2598 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2599 2600 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2601 Pipe myPipe; 2602 2603 myPipe.DPPCLK = v->DPPCLK[k]; 2604 myPipe.DISPCLK = v->DISPCLK; 2605 myPipe.PixelClock = v->PixelClock[k]; 2606 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2607 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2608 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2609 myPipe.VRatio = v->VRatio[k]; 2610 myPipe.VRatioChroma = v->VRatioChroma[k]; 2611 myPipe.SourceScan = v->SourceScan[k]; 2612 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2613 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2614 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2615 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2616 myPipe.InterlaceEnable = v->Interlace[k]; 2617 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2618 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2619 myPipe.HTotal = v->HTotal[k]; 2620 myPipe.DCCEnable = v->DCCEnable[k]; 2621 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2622 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2623 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2624 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2625 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2626 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2627 v->ErrorResult[k] = CalculatePrefetchSchedule( 2628 mode_lib, 2629 HostVMInefficiencyFactor, 2630 &myPipe, 2631 v->DSCDelay[k], 2632 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2633 v->DPPCLKDelaySCL, 2634 v->DPPCLKDelaySCLLBOnly, 2635 v->DPPCLKDelayCNVCCursor, 2636 v->DISPCLKDelaySubtotal, 2637 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2638 v->OutputFormat[k], 2639 v->MaxInterDCNTileRepeaters, 2640 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2641 v->MaxVStartupLines[k], 2642 v->GPUVMMaxPageTableLevels, 2643 v->GPUVMEnable, 2644 v->HostVMEnable, 2645 v->HostVMMaxNonCachedPageTableLevels, 2646 v->HostVMMinPageSize, 2647 v->DynamicMetadataEnable[k], 2648 v->DynamicMetadataVMEnabled, 2649 v->DynamicMetadataLinesBeforeActiveRequired[k], 2650 v->DynamicMetadataTransmittedBytes[k], 2651 v->UrgentLatency, 2652 v->UrgentExtraLatency, 2653 v->TCalc, 2654 v->PDEAndMetaPTEBytesFrame[k], 2655 v->MetaRowByte[k], 2656 v->PixelPTEBytesPerRow[k], 2657 v->PrefetchSourceLinesY[k], 2658 v->SwathWidthY[k], 2659 v->VInitPreFillY[k], 2660 v->MaxNumSwathY[k], 2661 v->PrefetchSourceLinesC[k], 2662 v->SwathWidthC[k], 2663 v->VInitPreFillC[k], 2664 v->MaxNumSwathC[k], 2665 v->swath_width_luma_ub[k], 2666 v->swath_width_chroma_ub[k], 2667 v->SwathHeightY[k], 2668 v->SwathHeightC[k], 2669 TWait, 2670 &v->DSTXAfterScaler[k], 2671 &v->DSTYAfterScaler[k], 2672 &v->DestinationLinesForPrefetch[k], 2673 &v->PrefetchBandwidth[k], 2674 &v->DestinationLinesToRequestVMInVBlank[k], 2675 &v->DestinationLinesToRequestRowInVBlank[k], 2676 &v->VRatioPrefetchY[k], 2677 &v->VRatioPrefetchC[k], 2678 &v->RequiredPrefetchPixDataBWLuma[k], 2679 &v->RequiredPrefetchPixDataBWChroma[k], 2680 &v->NotEnoughTimeForDynamicMetadata[k], 2681 &v->Tno_bw[k], 2682 &v->prefetch_vmrow_bw[k], 2683 &v->Tdmdl_vm[k], 2684 &v->Tdmdl[k], 2685 &v->TSetup[k], 2686 &v->VUpdateOffsetPix[k], 2687 &v->VUpdateWidthPix[k], 2688 &v->VReadyOffsetPix[k]); 2689 2690 #ifdef __DML_VBA_DEBUG__ 2691 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2692 #endif 2693 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2694 } 2695 2696 v->NoEnoughUrgentLatencyHiding = false; 2697 v->NoEnoughUrgentLatencyHidingPre = false; 2698 2699 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2700 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2701 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2702 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2703 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2704 2705 CalculateUrgentBurstFactor( 2706 v->swath_width_luma_ub[k], 2707 v->swath_width_chroma_ub[k], 2708 v->SwathHeightY[k], 2709 v->SwathHeightC[k], 2710 v->HTotal[k] / v->PixelClock[k], 2711 v->UrgentLatency, 2712 v->CursorBufferSize, 2713 v->CursorWidth[k][0], 2714 v->CursorBPP[k][0], 2715 v->VRatio[k], 2716 v->VRatioChroma[k], 2717 v->BytePerPixelDETY[k], 2718 v->BytePerPixelDETC[k], 2719 v->DETBufferSizeY[k], 2720 v->DETBufferSizeC[k], 2721 &v->UrgBurstFactorCursor[k], 2722 &v->UrgBurstFactorLuma[k], 2723 &v->UrgBurstFactorChroma[k], 2724 &v->NoUrgentLatencyHiding[k]); 2725 2726 CalculateUrgentBurstFactor( 2727 v->swath_width_luma_ub[k], 2728 v->swath_width_chroma_ub[k], 2729 v->SwathHeightY[k], 2730 v->SwathHeightC[k], 2731 v->HTotal[k] / v->PixelClock[k], 2732 v->UrgentLatency, 2733 v->CursorBufferSize, 2734 v->CursorWidth[k][0], 2735 v->CursorBPP[k][0], 2736 v->VRatioPrefetchY[k], 2737 v->VRatioPrefetchC[k], 2738 v->BytePerPixelDETY[k], 2739 v->BytePerPixelDETC[k], 2740 v->DETBufferSizeY[k], 2741 v->DETBufferSizeC[k], 2742 &v->UrgBurstFactorCursorPre[k], 2743 &v->UrgBurstFactorLumaPre[k], 2744 &v->UrgBurstFactorChromaPre[k], 2745 &v->NoUrgentLatencyHidingPre[k]); 2746 2747 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2748 + dml_max3( 2749 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2750 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2751 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2752 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2753 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2754 v->DPPPerPlane[k] 2755 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2756 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2757 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2758 2759 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2760 + dml_max3( 2761 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2762 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2763 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2764 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2765 + v->cursor_bw_pre[k]); 2766 2767 #ifdef __DML_VBA_DEBUG__ 2768 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2769 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2770 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2771 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2772 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2773 2774 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2775 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2776 2777 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2778 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2779 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2780 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2781 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2782 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2783 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2784 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2785 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2786 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2787 #endif 2788 2789 if (v->DestinationLinesForPrefetch[k] < 2) 2790 DestinationLineTimesForPrefetchLessThan2 = true; 2791 2792 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2793 VRatioPrefetchMoreThan4 = true; 2794 2795 if (v->NoUrgentLatencyHiding[k] == true) 2796 v->NoEnoughUrgentLatencyHiding = true; 2797 2798 if (v->NoUrgentLatencyHidingPre[k] == true) 2799 v->NoEnoughUrgentLatencyHidingPre = true; 2800 } 2801 2802 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2803 2804 #ifdef __DML_VBA_DEBUG__ 2805 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2806 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); 2807 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); 2808 #endif 2809 2810 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2811 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2812 v->PrefetchModeSupported = true; 2813 else { 2814 v->PrefetchModeSupported = false; 2815 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2816 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2817 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2818 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2819 } 2820 2821 // PREVIOUS_ERROR 2822 // This error result check was done after the PrefetchModeSupported. So we will 2823 // still try to calculate flip schedule even prefetch mode not supported 2824 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2825 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2826 v->PrefetchModeSupported = false; 2827 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2828 } 2829 } 2830 2831 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2832 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2833 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2834 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2835 - dml_max( 2836 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2837 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2838 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2839 v->DPPPerPlane[k] 2840 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2841 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2842 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2843 } 2844 2845 v->TotImmediateFlipBytes = 0; 2846 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2847 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2848 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2849 } 2850 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2851 CalculateFlipSchedule( 2852 mode_lib, 2853 k, 2854 HostVMInefficiencyFactor, 2855 v->UrgentExtraLatency, 2856 v->UrgentLatency, 2857 v->PDEAndMetaPTEBytesFrame[k], 2858 v->MetaRowByte[k], 2859 v->PixelPTEBytesPerRow[k]); 2860 } 2861 2862 v->total_dcn_read_bw_with_flip = 0.0; 2863 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2864 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2865 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2866 + dml_max3( 2867 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2868 v->DPPPerPlane[k] * v->final_flip_bw[k] 2869 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2870 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2871 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2872 v->DPPPerPlane[k] 2873 * (v->final_flip_bw[k] 2874 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2875 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2876 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2877 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2878 + dml_max3( 2879 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2880 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2881 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2882 v->DPPPerPlane[k] 2883 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2884 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2885 } 2886 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2887 2888 v->ImmediateFlipSupported = true; 2889 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2890 #ifdef __DML_VBA_DEBUG__ 2891 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2892 #endif 2893 v->ImmediateFlipSupported = false; 2894 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2895 } 2896 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2897 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2898 #ifdef __DML_VBA_DEBUG__ 2899 dml_print("DML::%s: Pipe %0d not supporting iflip\n", 2900 __func__, k); 2901 #endif 2902 v->ImmediateFlipSupported = false; 2903 } 2904 } 2905 } else { 2906 v->ImmediateFlipSupported = false; 2907 } 2908 2909 v->PrefetchAndImmediateFlipSupported = 2910 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2911 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2912 v->ImmediateFlipSupported)) ? true : false; 2913 #ifdef __DML_VBA_DEBUG__ 2914 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2915 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required); 2916 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 2917 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 2918 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 2919 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 2920 #endif 2921 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 2922 2923 v->VStartupLines = v->VStartupLines + 1; 2924 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 2925 ASSERT(v->PrefetchAndImmediateFlipSupported); 2926 2927 // Unbounded Request Enabled 2928 CalculateUnboundedRequestAndCompressedBufferSize( 2929 v->DETBufferSizeInKByte[0], 2930 v->ConfigReturnBufferSizeInKByte, 2931 v->UseUnboundedRequesting, 2932 v->TotalActiveDPP, 2933 NoChromaPlanes, 2934 v->MaxNumDPP, 2935 v->CompressedBufferSegmentSizeInkByte, 2936 v->Output, 2937 &v->UnboundedRequestEnabled, 2938 &v->CompressedBufferSizeInkByte); 2939 2940 //Watermarks and NB P-State/DRAM Clock Change Support 2941 { 2942 enum clock_change_support DRAMClockChangeSupport; // dummy 2943 CalculateWatermarksAndDRAMSpeedChangeSupport( 2944 mode_lib, 2945 PrefetchMode, 2946 v->DCFCLK, 2947 v->ReturnBW, 2948 v->UrgentLatency, 2949 v->UrgentExtraLatency, 2950 v->SOCCLK, 2951 v->DCFCLKDeepSleep, 2952 v->DETBufferSizeY, 2953 v->DETBufferSizeC, 2954 v->SwathHeightY, 2955 v->SwathHeightC, 2956 v->SwathWidthY, 2957 v->SwathWidthC, 2958 v->DPPPerPlane, 2959 v->BytePerPixelDETY, 2960 v->BytePerPixelDETC, 2961 v->UnboundedRequestEnabled, 2962 v->CompressedBufferSizeInkByte, 2963 &DRAMClockChangeSupport, 2964 &v->StutterExitWatermark, 2965 &v->StutterEnterPlusExitWatermark, 2966 &v->Z8StutterExitWatermark, 2967 &v->Z8StutterEnterPlusExitWatermark); 2968 2969 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2970 if (v->WritebackEnable[k] == true) { 2971 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 2972 0, 2973 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 2974 } else { 2975 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 2976 } 2977 } 2978 } 2979 2980 //Display Pipeline Delivery Time in Prefetch, Groups 2981 CalculatePixelDeliveryTimes( 2982 v->NumberOfActivePlanes, 2983 v->VRatio, 2984 v->VRatioChroma, 2985 v->VRatioPrefetchY, 2986 v->VRatioPrefetchC, 2987 v->swath_width_luma_ub, 2988 v->swath_width_chroma_ub, 2989 v->DPPPerPlane, 2990 v->HRatio, 2991 v->HRatioChroma, 2992 v->PixelClock, 2993 v->PSCL_THROUGHPUT_LUMA, 2994 v->PSCL_THROUGHPUT_CHROMA, 2995 v->DPPCLK, 2996 v->BytePerPixelC, 2997 v->SourceScan, 2998 v->NumberOfCursors, 2999 v->CursorWidth, 3000 v->CursorBPP, 3001 v->BlockWidth256BytesY, 3002 v->BlockHeight256BytesY, 3003 v->BlockWidth256BytesC, 3004 v->BlockHeight256BytesC, 3005 v->DisplayPipeLineDeliveryTimeLuma, 3006 v->DisplayPipeLineDeliveryTimeChroma, 3007 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3008 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3009 v->DisplayPipeRequestDeliveryTimeLuma, 3010 v->DisplayPipeRequestDeliveryTimeChroma, 3011 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3012 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3013 v->CursorRequestDeliveryTime, 3014 v->CursorRequestDeliveryTimePrefetch); 3015 3016 CalculateMetaAndPTETimes( 3017 v->NumberOfActivePlanes, 3018 v->GPUVMEnable, 3019 v->MetaChunkSize, 3020 v->MinMetaChunkSizeBytes, 3021 v->HTotal, 3022 v->VRatio, 3023 v->VRatioChroma, 3024 v->DestinationLinesToRequestRowInVBlank, 3025 v->DestinationLinesToRequestRowInImmediateFlip, 3026 v->DCCEnable, 3027 v->PixelClock, 3028 v->BytePerPixelY, 3029 v->BytePerPixelC, 3030 v->SourceScan, 3031 v->dpte_row_height, 3032 v->dpte_row_height_chroma, 3033 v->meta_row_width, 3034 v->meta_row_width_chroma, 3035 v->meta_row_height, 3036 v->meta_row_height_chroma, 3037 v->meta_req_width, 3038 v->meta_req_width_chroma, 3039 v->meta_req_height, 3040 v->meta_req_height_chroma, 3041 v->dpte_group_bytes, 3042 v->PTERequestSizeY, 3043 v->PTERequestSizeC, 3044 v->PixelPTEReqWidthY, 3045 v->PixelPTEReqHeightY, 3046 v->PixelPTEReqWidthC, 3047 v->PixelPTEReqHeightC, 3048 v->dpte_row_width_luma_ub, 3049 v->dpte_row_width_chroma_ub, 3050 v->DST_Y_PER_PTE_ROW_NOM_L, 3051 v->DST_Y_PER_PTE_ROW_NOM_C, 3052 v->DST_Y_PER_META_ROW_NOM_L, 3053 v->DST_Y_PER_META_ROW_NOM_C, 3054 v->TimePerMetaChunkNominal, 3055 v->TimePerChromaMetaChunkNominal, 3056 v->TimePerMetaChunkVBlank, 3057 v->TimePerChromaMetaChunkVBlank, 3058 v->TimePerMetaChunkFlip, 3059 v->TimePerChromaMetaChunkFlip, 3060 v->time_per_pte_group_nom_luma, 3061 v->time_per_pte_group_vblank_luma, 3062 v->time_per_pte_group_flip_luma, 3063 v->time_per_pte_group_nom_chroma, 3064 v->time_per_pte_group_vblank_chroma, 3065 v->time_per_pte_group_flip_chroma); 3066 3067 CalculateVMGroupAndRequestTimes( 3068 v->NumberOfActivePlanes, 3069 v->GPUVMEnable, 3070 v->GPUVMMaxPageTableLevels, 3071 v->HTotal, 3072 v->BytePerPixelC, 3073 v->DestinationLinesToRequestVMInVBlank, 3074 v->DestinationLinesToRequestVMInImmediateFlip, 3075 v->DCCEnable, 3076 v->PixelClock, 3077 v->dpte_row_width_luma_ub, 3078 v->dpte_row_width_chroma_ub, 3079 v->vm_group_bytes, 3080 v->dpde0_bytes_per_frame_ub_l, 3081 v->dpde0_bytes_per_frame_ub_c, 3082 v->meta_pte_bytes_per_frame_ub_l, 3083 v->meta_pte_bytes_per_frame_ub_c, 3084 v->TimePerVMGroupVBlank, 3085 v->TimePerVMGroupFlip, 3086 v->TimePerVMRequestVBlank, 3087 v->TimePerVMRequestFlip); 3088 3089 // Min TTUVBlank 3090 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3091 if (PrefetchMode == 0) { 3092 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3093 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3094 v->MinTTUVBlank[k] = dml_max( 3095 v->DRAMClockChangeWatermark, 3096 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3097 } else if (PrefetchMode == 1) { 3098 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3099 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3100 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3101 } else { 3102 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3103 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3104 v->MinTTUVBlank[k] = v->UrgentWatermark; 3105 } 3106 if (!v->DynamicMetadataEnable[k]) 3107 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3108 } 3109 3110 // DCC Configuration 3111 v->ActiveDPPs = 0; 3112 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3113 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3114 v->SourcePixelFormat[k], 3115 v->SurfaceWidthY[k], 3116 v->SurfaceWidthC[k], 3117 v->SurfaceHeightY[k], 3118 v->SurfaceHeightC[k], 3119 v->DETBufferSizeInKByte[0] * 1024, 3120 v->BlockHeight256BytesY[k], 3121 v->BlockHeight256BytesC[k], 3122 v->SurfaceTiling[k], 3123 v->BytePerPixelY[k], 3124 v->BytePerPixelC[k], 3125 v->BytePerPixelDETY[k], 3126 v->BytePerPixelDETC[k], 3127 v->SourceScan[k], 3128 &v->DCCYMaxUncompressedBlock[k], 3129 &v->DCCCMaxUncompressedBlock[k], 3130 &v->DCCYMaxCompressedBlock[k], 3131 &v->DCCCMaxCompressedBlock[k], 3132 &v->DCCYIndependentBlock[k], 3133 &v->DCCCIndependentBlock[k]); 3134 } 3135 3136 // VStartup Adjustment 3137 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3138 bool isInterlaceTiming; 3139 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3140 #ifdef __DML_VBA_DEBUG__ 3141 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3142 #endif 3143 3144 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3145 3146 #ifdef __DML_VBA_DEBUG__ 3147 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3148 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3149 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3150 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3151 #endif 3152 3153 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3154 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3155 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3156 } 3157 3158 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3159 3160 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) 3161 - v->VFrontPorch[k]) 3162 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) 3163 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3164 3165 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3166 3167 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3168 <= (isInterlaceTiming ? 3169 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3170 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3171 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3172 } else { 3173 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3174 } 3175 #ifdef __DML_VBA_DEBUG__ 3176 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3177 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3178 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3179 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3180 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3181 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3182 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3183 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3184 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3185 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3186 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3187 #endif 3188 } 3189 3190 { 3191 //Maximum Bandwidth Used 3192 double TotalWRBandwidth = 0; 3193 double MaxPerPlaneVActiveWRBandwidth = 0; 3194 double WRBandwidth = 0; 3195 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3196 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3197 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3198 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3199 } else if (v->WritebackEnable[k] == true) { 3200 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3201 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3202 } 3203 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3204 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3205 } 3206 3207 v->TotalDataReadBandwidth = 0; 3208 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3209 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3210 } 3211 } 3212 // Stutter Efficiency 3213 CalculateStutterEfficiency( 3214 mode_lib, 3215 v->CompressedBufferSizeInkByte, 3216 v->UnboundedRequestEnabled, 3217 v->ConfigReturnBufferSizeInKByte, 3218 v->MetaFIFOSizeInKEntries, 3219 v->ZeroSizeBufferEntries, 3220 v->NumberOfActivePlanes, 3221 v->ROBBufferSizeInKByte, 3222 v->TotalDataReadBandwidth, 3223 v->DCFCLK, 3224 v->ReturnBW, 3225 v->COMPBUF_RESERVED_SPACE_64B, 3226 v->COMPBUF_RESERVED_SPACE_ZS, 3227 v->SRExitTime, 3228 v->SRExitZ8Time, 3229 v->SynchronizedVBlank, 3230 v->StutterEnterPlusExitWatermark, 3231 v->Z8StutterEnterPlusExitWatermark, 3232 v->ProgressiveToInterlaceUnitInOPP, 3233 v->Interlace, 3234 v->MinTTUVBlank, 3235 v->DPPPerPlane, 3236 v->DETBufferSizeY, 3237 v->BytePerPixelY, 3238 v->BytePerPixelDETY, 3239 v->SwathWidthY, 3240 v->SwathHeightY, 3241 v->SwathHeightC, 3242 v->DCCRateLuma, 3243 v->DCCRateChroma, 3244 v->DCCFractionOfZeroSizeRequestsLuma, 3245 v->DCCFractionOfZeroSizeRequestsChroma, 3246 v->HTotal, 3247 v->VTotal, 3248 v->PixelClock, 3249 v->VRatio, 3250 v->SourceScan, 3251 v->BlockHeight256BytesY, 3252 v->BlockWidth256BytesY, 3253 v->BlockHeight256BytesC, 3254 v->BlockWidth256BytesC, 3255 v->DCCYMaxUncompressedBlock, 3256 v->DCCCMaxUncompressedBlock, 3257 v->VActive, 3258 v->DCCEnable, 3259 v->WritebackEnable, 3260 v->ReadBandwidthPlaneLuma, 3261 v->ReadBandwidthPlaneChroma, 3262 v->meta_row_bw, 3263 v->dpte_row_bw, 3264 &v->StutterEfficiencyNotIncludingVBlank, 3265 &v->StutterEfficiency, 3266 &v->NumberOfStutterBurstsPerFrame, 3267 &v->Z8StutterEfficiencyNotIncludingVBlank, 3268 &v->Z8StutterEfficiency, 3269 &v->Z8NumberOfStutterBurstsPerFrame, 3270 &v->StutterPeriod); 3271 } 3272 3273 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3274 { 3275 struct vba_vars_st *v = &mode_lib->vba; 3276 // Display Pipe Configuration 3277 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3278 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3279 int BytePerPixY[DC__NUM_DPP__MAX]; 3280 int BytePerPixC[DC__NUM_DPP__MAX]; 3281 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3282 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3283 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3284 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3285 double dummy1[DC__NUM_DPP__MAX]; 3286 double dummy2[DC__NUM_DPP__MAX]; 3287 double dummy3[DC__NUM_DPP__MAX]; 3288 double dummy4[DC__NUM_DPP__MAX]; 3289 int dummy5[DC__NUM_DPP__MAX]; 3290 int dummy6[DC__NUM_DPP__MAX]; 3291 bool dummy7[DC__NUM_DPP__MAX]; 3292 bool dummysinglestring; 3293 3294 unsigned int k; 3295 3296 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3297 3298 dml30_CalculateBytePerPixelAnd256BBlockSizes( 3299 v->SourcePixelFormat[k], 3300 v->SurfaceTiling[k], 3301 &BytePerPixY[k], 3302 &BytePerPixC[k], 3303 &BytePerPixDETY[k], 3304 &BytePerPixDETC[k], 3305 &Read256BytesBlockHeightY[k], 3306 &Read256BytesBlockHeightC[k], 3307 &Read256BytesBlockWidthY[k], 3308 &Read256BytesBlockWidthC[k]); 3309 } 3310 3311 CalculateSwathAndDETConfiguration( 3312 false, 3313 v->NumberOfActivePlanes, 3314 v->DETBufferSizeInKByte[0], 3315 dummy1, 3316 dummy2, 3317 v->SourceScan, 3318 v->SourcePixelFormat, 3319 v->SurfaceTiling, 3320 v->ViewportWidth, 3321 v->ViewportHeight, 3322 v->SurfaceWidthY, 3323 v->SurfaceWidthC, 3324 v->SurfaceHeightY, 3325 v->SurfaceHeightC, 3326 Read256BytesBlockHeightY, 3327 Read256BytesBlockHeightC, 3328 Read256BytesBlockWidthY, 3329 Read256BytesBlockWidthC, 3330 v->ODMCombineEnabled, 3331 v->BlendingAndTiming, 3332 BytePerPixY, 3333 BytePerPixC, 3334 BytePerPixDETY, 3335 BytePerPixDETC, 3336 v->HActive, 3337 v->HRatio, 3338 v->HRatioChroma, 3339 v->DPPPerPlane, 3340 dummy5, 3341 dummy6, 3342 dummy3, 3343 dummy4, 3344 v->SwathHeightY, 3345 v->SwathHeightC, 3346 v->DETBufferSizeY, 3347 v->DETBufferSizeC, 3348 dummy7, 3349 &dummysinglestring); 3350 } 3351 3352 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3353 { 3354 if (PrefetchMode == 0) { 3355 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3356 } else if (PrefetchMode == 1) { 3357 return dml_max(SREnterPlusExitTime, UrgentLatency); 3358 } else { 3359 return UrgentLatency; 3360 } 3361 } 3362 3363 double dml31_CalculateWriteBackDISPCLK( 3364 enum source_format_class WritebackPixelFormat, 3365 double PixelClock, 3366 double WritebackHRatio, 3367 double WritebackVRatio, 3368 unsigned int WritebackHTaps, 3369 unsigned int WritebackVTaps, 3370 long WritebackSourceWidth, 3371 long WritebackDestinationWidth, 3372 unsigned int HTotal, 3373 unsigned int WritebackLineBufferSize) 3374 { 3375 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3376 3377 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3378 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3379 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3380 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3381 } 3382 3383 static double CalculateWriteBackDelay( 3384 enum source_format_class WritebackPixelFormat, 3385 double WritebackHRatio, 3386 double WritebackVRatio, 3387 unsigned int WritebackVTaps, 3388 int WritebackDestinationWidth, 3389 int WritebackDestinationHeight, 3390 int WritebackSourceHeight, 3391 unsigned int HTotal) 3392 { 3393 double CalculateWriteBackDelay; 3394 double Line_length; 3395 double Output_lines_last_notclamped; 3396 double WritebackVInit; 3397 3398 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3399 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3400 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3401 if (Output_lines_last_notclamped < 0) { 3402 CalculateWriteBackDelay = 0; 3403 } else { 3404 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3405 } 3406 return CalculateWriteBackDelay; 3407 } 3408 3409 static void CalculateVupdateAndDynamicMetadataParameters( 3410 int MaxInterDCNTileRepeaters, 3411 double DPPCLK, 3412 double DISPCLK, 3413 double DCFClkDeepSleep, 3414 double PixelClock, 3415 int HTotal, 3416 int VBlank, 3417 int DynamicMetadataTransmittedBytes, 3418 int DynamicMetadataLinesBeforeActiveRequired, 3419 int InterlaceEnable, 3420 bool ProgressiveToInterlaceUnitInOPP, 3421 double *TSetup, 3422 double *Tdmbf, 3423 double *Tdmec, 3424 double *Tdmsks, 3425 int *VUpdateOffsetPix, 3426 double *VUpdateWidthPix, 3427 double *VReadyOffsetPix) 3428 { 3429 double TotalRepeaterDelayTime; 3430 3431 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3432 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3433 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3434 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3435 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3436 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3437 *Tdmec = HTotal / PixelClock; 3438 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3439 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3440 } else { 3441 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3442 } 3443 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3444 *Tdmsks = *Tdmsks / 2; 3445 } 3446 #ifdef __DML_VBA_DEBUG__ 3447 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3448 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3449 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3450 #endif 3451 } 3452 3453 static void CalculateRowBandwidth( 3454 bool GPUVMEnable, 3455 enum source_format_class SourcePixelFormat, 3456 double VRatio, 3457 double VRatioChroma, 3458 bool DCCEnable, 3459 double LineTime, 3460 unsigned int MetaRowByteLuma, 3461 unsigned int MetaRowByteChroma, 3462 unsigned int meta_row_height_luma, 3463 unsigned int meta_row_height_chroma, 3464 unsigned int PixelPTEBytesPerRowLuma, 3465 unsigned int PixelPTEBytesPerRowChroma, 3466 unsigned int dpte_row_height_luma, 3467 unsigned int dpte_row_height_chroma, 3468 double *meta_row_bw, 3469 double *dpte_row_bw) 3470 { 3471 if (DCCEnable != true) { 3472 *meta_row_bw = 0; 3473 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3474 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3475 } else { 3476 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3477 } 3478 3479 if (GPUVMEnable != true) { 3480 *dpte_row_bw = 0; 3481 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3482 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3483 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3484 } else { 3485 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3486 } 3487 } 3488 3489 static void CalculateFlipSchedule( 3490 struct display_mode_lib *mode_lib, 3491 unsigned int k, 3492 double HostVMInefficiencyFactor, 3493 double UrgentExtraLatency, 3494 double UrgentLatency, 3495 double PDEAndMetaPTEBytesPerFrame, 3496 double MetaRowBytes, 3497 double DPTEBytesPerRow) 3498 { 3499 struct vba_vars_st *v = &mode_lib->vba; 3500 double min_row_time = 0.0; 3501 unsigned int HostVMDynamicLevelsTrips; 3502 double TimeForFetchingMetaPTEImmediateFlip; 3503 double TimeForFetchingRowInVBlankImmediateFlip; 3504 double ImmediateFlipBW; 3505 double LineTime = v->HTotal[k] / v->PixelClock[k]; 3506 3507 if (v->GPUVMEnable == true && v->HostVMEnable == true) { 3508 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3509 } else { 3510 HostVMDynamicLevelsTrips = 0; 3511 } 3512 3513 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { 3514 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; 3515 } 3516 3517 if (v->GPUVMEnable == true) { 3518 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3519 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3520 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3521 LineTime / 4.0); 3522 } else { 3523 TimeForFetchingMetaPTEImmediateFlip = 0; 3524 } 3525 3526 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3527 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3528 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3529 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3530 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3531 LineTime / 4); 3532 } else { 3533 TimeForFetchingRowInVBlankImmediateFlip = 0; 3534 } 3535 3536 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3537 3538 if (v->GPUVMEnable == true) { 3539 v->final_flip_bw[k] = dml_max( 3540 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), 3541 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); 3542 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3543 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); 3544 } else { 3545 v->final_flip_bw[k] = 0; 3546 } 3547 3548 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 3549 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3550 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3551 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3552 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3553 } else { 3554 min_row_time = dml_min4( 3555 v->dpte_row_height[k] * LineTime / v->VRatio[k], 3556 v->meta_row_height[k] * LineTime / v->VRatio[k], 3557 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], 3558 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3559 } 3560 } else { 3561 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3562 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; 3563 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3564 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; 3565 } else { 3566 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); 3567 } 3568 } 3569 3570 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 3571 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3572 v->ImmediateFlipSupportedForPipe[k] = false; 3573 } else { 3574 v->ImmediateFlipSupportedForPipe[k] = true; 3575 } 3576 3577 #ifdef __DML_VBA_DEBUG__ 3578 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); 3579 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); 3580 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3581 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3582 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3583 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); 3584 #endif 3585 3586 } 3587 3588 static double TruncToValidBPP( 3589 double LinkBitRate, 3590 int Lanes, 3591 int HTotal, 3592 int HActive, 3593 double PixelClock, 3594 double DesiredBPP, 3595 bool DSCEnable, 3596 enum output_encoder_class Output, 3597 enum output_format_class Format, 3598 unsigned int DSCInputBitPerComponent, 3599 int DSCSlices, 3600 int AudioRate, 3601 int AudioLayout, 3602 enum odm_combine_mode ODMCombine) 3603 { 3604 double MaxLinkBPP; 3605 int MinDSCBPP; 3606 double MaxDSCBPP; 3607 int NonDSCBPP0; 3608 int NonDSCBPP1; 3609 int NonDSCBPP2; 3610 3611 if (Format == dm_420) { 3612 NonDSCBPP0 = 12; 3613 NonDSCBPP1 = 15; 3614 NonDSCBPP2 = 18; 3615 MinDSCBPP = 6; 3616 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3617 } else if (Format == dm_444) { 3618 NonDSCBPP0 = 24; 3619 NonDSCBPP1 = 30; 3620 NonDSCBPP2 = 36; 3621 MinDSCBPP = 8; 3622 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3623 } else { 3624 3625 NonDSCBPP0 = 16; 3626 NonDSCBPP1 = 20; 3627 NonDSCBPP2 = 24; 3628 3629 if (Format == dm_n422) { 3630 MinDSCBPP = 7; 3631 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3632 } else { 3633 MinDSCBPP = 8; 3634 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3635 } 3636 } 3637 3638 if (DSCEnable && Output == dm_dp) { 3639 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3640 } else { 3641 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3642 } 3643 3644 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3645 MaxLinkBPP = 16; 3646 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3647 MaxLinkBPP = 32; 3648 } 3649 3650 if (DesiredBPP == 0) { 3651 if (DSCEnable) { 3652 if (MaxLinkBPP < MinDSCBPP) { 3653 return BPP_INVALID; 3654 } else if (MaxLinkBPP >= MaxDSCBPP) { 3655 return MaxDSCBPP; 3656 } else { 3657 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3658 } 3659 } else { 3660 if (MaxLinkBPP >= NonDSCBPP2) { 3661 return NonDSCBPP2; 3662 } else if (MaxLinkBPP >= NonDSCBPP1) { 3663 return NonDSCBPP1; 3664 } else if (MaxLinkBPP >= NonDSCBPP0) { 3665 return 16.0; 3666 } else { 3667 return BPP_INVALID; 3668 } 3669 } 3670 } else { 3671 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3672 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3673 return BPP_INVALID; 3674 } else { 3675 return DesiredBPP; 3676 } 3677 } 3678 return BPP_INVALID; 3679 } 3680 3681 static noinline void CalculatePrefetchSchedulePerPlane( 3682 struct display_mode_lib *mode_lib, 3683 double HostVMInefficiencyFactor, 3684 int i, 3685 unsigned j, 3686 unsigned k) 3687 { 3688 struct vba_vars_st *v = &mode_lib->vba; 3689 Pipe myPipe; 3690 3691 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3692 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3693 myPipe.PixelClock = v->PixelClock[k]; 3694 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3695 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3696 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3697 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3698 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3699 3700 myPipe.SourceScan = v->SourceScan[k]; 3701 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3702 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3703 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3704 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3705 myPipe.InterlaceEnable = v->Interlace[k]; 3706 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3707 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3708 myPipe.HTotal = v->HTotal[k]; 3709 myPipe.DCCEnable = v->DCCEnable[k]; 3710 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3711 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3712 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3713 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3714 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3715 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3716 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3717 mode_lib, 3718 HostVMInefficiencyFactor, 3719 &myPipe, 3720 v->DSCDelayPerState[i][k], 3721 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3722 v->DPPCLKDelaySCL, 3723 v->DPPCLKDelaySCLLBOnly, 3724 v->DPPCLKDelayCNVCCursor, 3725 v->DISPCLKDelaySubtotal, 3726 v->SwathWidthYThisState[k] / v->HRatio[k], 3727 v->OutputFormat[k], 3728 v->MaxInterDCNTileRepeaters, 3729 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3730 v->MaximumVStartup[i][j][k], 3731 v->GPUVMMaxPageTableLevels, 3732 v->GPUVMEnable, 3733 v->HostVMEnable, 3734 v->HostVMMaxNonCachedPageTableLevels, 3735 v->HostVMMinPageSize, 3736 v->DynamicMetadataEnable[k], 3737 v->DynamicMetadataVMEnabled, 3738 v->DynamicMetadataLinesBeforeActiveRequired[k], 3739 v->DynamicMetadataTransmittedBytes[k], 3740 v->UrgLatency[i], 3741 v->ExtraLatency, 3742 v->TimeCalc, 3743 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3744 v->MetaRowBytes[i][j][k], 3745 v->DPTEBytesPerRow[i][j][k], 3746 v->PrefetchLinesY[i][j][k], 3747 v->SwathWidthYThisState[k], 3748 v->PrefillY[k], 3749 v->MaxNumSwY[k], 3750 v->PrefetchLinesC[i][j][k], 3751 v->SwathWidthCThisState[k], 3752 v->PrefillC[k], 3753 v->MaxNumSwC[k], 3754 v->swath_width_luma_ub_this_state[k], 3755 v->swath_width_chroma_ub_this_state[k], 3756 v->SwathHeightYThisState[k], 3757 v->SwathHeightCThisState[k], 3758 v->TWait, 3759 &v->DSTXAfterScaler[k], 3760 &v->DSTYAfterScaler[k], 3761 &v->LineTimesForPrefetch[k], 3762 &v->PrefetchBW[k], 3763 &v->LinesForMetaPTE[k], 3764 &v->LinesForMetaAndDPTERow[k], 3765 &v->VRatioPreY[i][j][k], 3766 &v->VRatioPreC[i][j][k], 3767 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3768 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3769 &v->NoTimeForDynamicMetadata[i][j][k], 3770 &v->Tno_bw[k], 3771 &v->prefetch_vmrow_bw[k], 3772 &v->dummy7[k], 3773 &v->dummy8[k], 3774 &v->dummy13[k], 3775 &v->VUpdateOffsetPix[k], 3776 &v->VUpdateWidthPix[k], 3777 &v->VReadyOffsetPix[k]); 3778 } 3779 3780 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte) 3781 { 3782 int i, total_pipes = 0; 3783 for (i = 0; i < NumberOfActivePlanes; i++) 3784 total_pipes += NoOfDPPThisState[i]; 3785 *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64; 3786 if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE) 3787 *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE; 3788 } 3789 3790 3791 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3792 { 3793 struct vba_vars_st *v = &mode_lib->vba; 3794 3795 int i, j; 3796 unsigned int k, m; 3797 int ReorderingBytes; 3798 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3799 bool NoChroma = true; 3800 bool EnoughWritebackUnits = true; 3801 bool P2IWith420 = false; 3802 bool DSCOnlyIfNecessaryWithBPP = false; 3803 bool DSC422NativeNotSupported = false; 3804 double MaxTotalVActiveRDBandwidth; 3805 bool ViewportExceedsSurface = false; 3806 bool FMTBufferExceeded = false; 3807 3808 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3809 3810 CalculateMinAndMaxPrefetchMode( 3811 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3812 &MinPrefetchMode, &MaxPrefetchMode); 3813 3814 /*Scale Ratio, taps Support Check*/ 3815 3816 v->ScaleRatioAndTapsSupport = true; 3817 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3818 if (v->ScalerEnabled[k] == false 3819 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3820 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3821 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3822 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3823 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3824 v->ScaleRatioAndTapsSupport = false; 3825 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3826 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3827 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3828 || v->VRatio[k] > v->vtaps[k] 3829 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3830 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3831 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3832 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3833 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3834 || v->HRatioChroma[k] > v->MaxHSCLRatio 3835 || v->VRatioChroma[k] > v->MaxVSCLRatio 3836 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3837 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3838 v->ScaleRatioAndTapsSupport = false; 3839 } 3840 } 3841 /*Source Format, Pixel Format and Scan Support Check*/ 3842 3843 v->SourceFormatPixelAndScanSupport = true; 3844 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3845 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 3846 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 3847 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 3848 v->SourceFormatPixelAndScanSupport = false; 3849 } 3850 } 3851 /*Bandwidth Support Check*/ 3852 3853 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3854 dml30_CalculateBytePerPixelAnd256BBlockSizes( 3855 v->SourcePixelFormat[k], 3856 v->SurfaceTiling[k], 3857 &v->BytePerPixelY[k], 3858 &v->BytePerPixelC[k], 3859 &v->BytePerPixelInDETY[k], 3860 &v->BytePerPixelInDETC[k], 3861 &v->Read256BlockHeightY[k], 3862 &v->Read256BlockHeightC[k], 3863 &v->Read256BlockWidthY[k], 3864 &v->Read256BlockWidthC[k]); 3865 } 3866 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3867 if (v->SourceScan[k] != dm_vert) { 3868 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 3869 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 3870 } else { 3871 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 3872 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 3873 } 3874 } 3875 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3876 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 3877 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 3878 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 3879 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 3880 } 3881 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3882 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 3883 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3884 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 3885 } else if (v->WritebackEnable[k] == true) { 3886 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3887 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 3888 } else { 3889 v->WriteBandwidth[k] = 0.0; 3890 } 3891 } 3892 3893 /*Writeback Latency support check*/ 3894 3895 v->WritebackLatencySupport = true; 3896 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3897 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 3898 v->WritebackLatencySupport = false; 3899 } 3900 } 3901 3902 /*Writeback Mode Support Check*/ 3903 3904 v->TotalNumberOfActiveWriteback = 0; 3905 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3906 if (v->WritebackEnable[k] == true) { 3907 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 3908 } 3909 } 3910 3911 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 3912 EnoughWritebackUnits = false; 3913 } 3914 3915 /*Writeback Scale Ratio and Taps Support Check*/ 3916 3917 v->WritebackScaleRatioAndTapsSupport = true; 3918 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3919 if (v->WritebackEnable[k] == true) { 3920 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 3921 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 3922 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 3923 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 3924 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 3925 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 3926 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 3927 v->WritebackScaleRatioAndTapsSupport = false; 3928 } 3929 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 3930 v->WritebackScaleRatioAndTapsSupport = false; 3931 } 3932 } 3933 } 3934 /*Maximum DISPCLK/DPPCLK Support check*/ 3935 3936 v->WritebackRequiredDISPCLK = 0.0; 3937 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3938 if (v->WritebackEnable[k] == true) { 3939 v->WritebackRequiredDISPCLK = dml_max( 3940 v->WritebackRequiredDISPCLK, 3941 dml31_CalculateWriteBackDISPCLK( 3942 v->WritebackPixelFormat[k], 3943 v->PixelClock[k], 3944 v->WritebackHRatio[k], 3945 v->WritebackVRatio[k], 3946 v->WritebackHTaps[k], 3947 v->WritebackVTaps[k], 3948 v->WritebackSourceWidth[k], 3949 v->WritebackDestinationWidth[k], 3950 v->HTotal[k], 3951 v->WritebackLineBufferSize)); 3952 } 3953 } 3954 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3955 if (v->HRatio[k] > 1.0) { 3956 v->PSCL_FACTOR[k] = dml_min( 3957 v->MaxDCHUBToPSCLThroughput, 3958 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 3959 } else { 3960 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 3961 } 3962 if (v->BytePerPixelC[k] == 0.0) { 3963 v->PSCL_FACTOR_CHROMA[k] = 0.0; 3964 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 3965 * dml_max3( 3966 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 3967 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 3968 1.0); 3969 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 3970 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 3971 } 3972 } else { 3973 if (v->HRatioChroma[k] > 1.0) { 3974 v->PSCL_FACTOR_CHROMA[k] = dml_min( 3975 v->MaxDCHUBToPSCLThroughput, 3976 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 3977 } else { 3978 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 3979 } 3980 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 3981 * dml_max5( 3982 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 3983 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 3984 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 3985 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 3986 1.0); 3987 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 3988 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 3989 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 3990 } 3991 } 3992 } 3993 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3994 int MaximumSwathWidthSupportLuma; 3995 int MaximumSwathWidthSupportChroma; 3996 3997 if (v->SurfaceTiling[k] == dm_sw_linear) { 3998 MaximumSwathWidthSupportLuma = 8192.0; 3999 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4000 MaximumSwathWidthSupportLuma = 2880.0; 4001 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4002 MaximumSwathWidthSupportLuma = 3840.0; 4003 } else { 4004 MaximumSwathWidthSupportLuma = 5760.0; 4005 } 4006 4007 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4008 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4009 } else { 4010 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4011 } 4012 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4013 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4014 if (v->BytePerPixelC[k] == 0.0) { 4015 v->MaximumSwathWidthInLineBufferChroma = 0; 4016 } else { 4017 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4018 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4019 } 4020 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4021 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4022 } 4023 4024 CalculateSwathAndDETConfiguration( 4025 true, 4026 v->NumberOfActivePlanes, 4027 v->DETBufferSizeInKByte[0], 4028 v->MaximumSwathWidthLuma, 4029 v->MaximumSwathWidthChroma, 4030 v->SourceScan, 4031 v->SourcePixelFormat, 4032 v->SurfaceTiling, 4033 v->ViewportWidth, 4034 v->ViewportHeight, 4035 v->SurfaceWidthY, 4036 v->SurfaceWidthC, 4037 v->SurfaceHeightY, 4038 v->SurfaceHeightC, 4039 v->Read256BlockHeightY, 4040 v->Read256BlockHeightC, 4041 v->Read256BlockWidthY, 4042 v->Read256BlockWidthC, 4043 v->odm_combine_dummy, 4044 v->BlendingAndTiming, 4045 v->BytePerPixelY, 4046 v->BytePerPixelC, 4047 v->BytePerPixelInDETY, 4048 v->BytePerPixelInDETC, 4049 v->HActive, 4050 v->HRatio, 4051 v->HRatioChroma, 4052 v->NoOfDPPThisState, 4053 v->swath_width_luma_ub_this_state, 4054 v->swath_width_chroma_ub_this_state, 4055 v->SwathWidthYThisState, 4056 v->SwathWidthCThisState, 4057 v->SwathHeightYThisState, 4058 v->SwathHeightCThisState, 4059 v->DETBufferSizeYThisState, 4060 v->DETBufferSizeCThisState, 4061 v->SingleDPPViewportSizeSupportPerPlane, 4062 &v->ViewportSizeSupport[0][0]); 4063 4064 for (i = 0; i < v->soc.num_states; i++) { 4065 for (j = 0; j < 2; j++) { 4066 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4067 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4068 v->RequiredDISPCLK[i][j] = 0.0; 4069 v->DISPCLK_DPPCLK_Support[i][j] = true; 4070 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4071 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4072 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4073 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4074 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4075 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4076 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4077 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4078 } 4079 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4080 * (1 + v->DISPCLKRampingMargin / 100.0); 4081 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4082 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4083 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4084 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4085 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4086 } 4087 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4088 * (1 + v->DISPCLKRampingMargin / 100.0); 4089 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4090 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4091 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4092 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4093 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4094 } 4095 4096 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4097 || !(v->Output[k] == dm_dp || 4098 v->Output[k] == dm_dp2p0 || 4099 v->Output[k] == dm_edp)) { 4100 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4101 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4102 4103 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4104 FMTBufferExceeded = true; 4105 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4106 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4107 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4108 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4109 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4110 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4111 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4112 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4113 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4114 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4115 } else { 4116 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4117 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4118 } 4119 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH 4120 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4121 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { 4122 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4123 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4124 } else { 4125 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4126 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4127 } 4128 } 4129 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH 4130 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4131 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { 4132 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4133 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4134 4135 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4136 FMTBufferExceeded = true; 4137 } else { 4138 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4139 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4140 } 4141 } 4142 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4143 v->MPCCombine[i][j][k] = false; 4144 v->NoOfDPP[i][j][k] = 4; 4145 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4146 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4147 v->MPCCombine[i][j][k] = false; 4148 v->NoOfDPP[i][j][k] = 2; 4149 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4150 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4151 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4152 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4153 v->MPCCombine[i][j][k] = false; 4154 v->NoOfDPP[i][j][k] = 1; 4155 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4156 } else { 4157 v->MPCCombine[i][j][k] = true; 4158 v->NoOfDPP[i][j][k] = 2; 4159 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4160 } 4161 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4162 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4163 > v->MaxDppclkRoundedDownToDFSGranularity) 4164 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4165 v->DISPCLK_DPPCLK_Support[i][j] = false; 4166 } 4167 } 4168 v->TotalNumberOfActiveDPP[i][j] = 0; 4169 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4170 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4171 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4172 if (v->NoOfDPP[i][j][k] == 1) 4173 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4174 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4175 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4176 NoChroma = false; 4177 } 4178 4179 // UPTO 4180 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4181 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4182 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4183 double BWOfNonSplitPlaneOfMaximumBandwidth; 4184 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4185 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4186 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4187 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4188 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4189 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4190 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4191 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4192 } 4193 } 4194 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4195 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4196 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4197 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4198 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4199 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4200 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4201 } 4202 } 4203 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4204 v->RequiredDISPCLK[i][j] = 0.0; 4205 v->DISPCLK_DPPCLK_Support[i][j] = true; 4206 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4207 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4208 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4209 v->MPCCombine[i][j][k] = true; 4210 v->NoOfDPP[i][j][k] = 2; 4211 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4212 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4213 } else { 4214 v->MPCCombine[i][j][k] = false; 4215 v->NoOfDPP[i][j][k] = 1; 4216 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4217 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4218 } 4219 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4220 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4221 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4222 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4223 } else { 4224 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4225 } 4226 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4227 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4228 > v->MaxDppclkRoundedDownToDFSGranularity) 4229 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4230 v->DISPCLK_DPPCLK_Support[i][j] = false; 4231 } 4232 } 4233 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4234 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4235 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4236 } 4237 } 4238 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4239 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4240 v->DISPCLK_DPPCLK_Support[i][j] = false; 4241 } 4242 } 4243 } 4244 4245 /*Total Available Pipes Support Check*/ 4246 4247 for (i = 0; i < v->soc.num_states; i++) { 4248 for (j = 0; j < 2; j++) { 4249 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4250 v->TotalAvailablePipesSupport[i][j] = true; 4251 } else { 4252 v->TotalAvailablePipesSupport[i][j] = false; 4253 } 4254 } 4255 } 4256 /*Display IO and DSC Support Check*/ 4257 4258 v->NonsupportedDSCInputBPC = false; 4259 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4260 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4261 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4262 v->NonsupportedDSCInputBPC = true; 4263 } 4264 } 4265 4266 /*Number Of DSC Slices*/ 4267 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4268 if (v->BlendingAndTiming[k] == k) { 4269 if (v->PixelClockBackEnd[k] > 3200) { 4270 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4271 } else if (v->PixelClockBackEnd[k] > 1360) { 4272 v->NumberOfDSCSlices[k] = 8; 4273 } else if (v->PixelClockBackEnd[k] > 680) { 4274 v->NumberOfDSCSlices[k] = 4; 4275 } else if (v->PixelClockBackEnd[k] > 340) { 4276 v->NumberOfDSCSlices[k] = 2; 4277 } else { 4278 v->NumberOfDSCSlices[k] = 1; 4279 } 4280 } else { 4281 v->NumberOfDSCSlices[k] = 0; 4282 } 4283 } 4284 4285 for (i = 0; i < v->soc.num_states; i++) { 4286 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4287 v->RequiresDSC[i][k] = false; 4288 v->RequiresFEC[i][k] = false; 4289 if (v->BlendingAndTiming[k] == k) { 4290 if (v->Output[k] == dm_hdmi) { 4291 v->RequiresDSC[i][k] = false; 4292 v->RequiresFEC[i][k] = false; 4293 v->OutputBppPerState[i][k] = TruncToValidBPP( 4294 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4295 3, 4296 v->HTotal[k], 4297 v->HActive[k], 4298 v->PixelClockBackEnd[k], 4299 v->ForcedOutputLinkBPP[k], 4300 false, 4301 v->Output[k], 4302 v->OutputFormat[k], 4303 v->DSCInputBitPerComponent[k], 4304 v->NumberOfDSCSlices[k], 4305 v->AudioSampleRate[k], 4306 v->AudioSampleLayout[k], 4307 v->ODMCombineEnablePerState[i][k]); 4308 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4309 if (v->DSCEnable[k] == true) { 4310 v->RequiresDSC[i][k] = true; 4311 v->LinkDSCEnable = true; 4312 if (v->Output[k] == dm_dp) { 4313 v->RequiresFEC[i][k] = true; 4314 } else { 4315 v->RequiresFEC[i][k] = false; 4316 } 4317 } else { 4318 v->RequiresDSC[i][k] = false; 4319 v->LinkDSCEnable = false; 4320 v->RequiresFEC[i][k] = false; 4321 } 4322 4323 v->Outbpp = BPP_INVALID; 4324 if (v->PHYCLKPerState[i] >= 270.0) { 4325 v->Outbpp = TruncToValidBPP( 4326 (1.0 - v->Downspreading / 100.0) * 2700, 4327 v->OutputLinkDPLanes[k], 4328 v->HTotal[k], 4329 v->HActive[k], 4330 v->PixelClockBackEnd[k], 4331 v->ForcedOutputLinkBPP[k], 4332 v->LinkDSCEnable, 4333 v->Output[k], 4334 v->OutputFormat[k], 4335 v->DSCInputBitPerComponent[k], 4336 v->NumberOfDSCSlices[k], 4337 v->AudioSampleRate[k], 4338 v->AudioSampleLayout[k], 4339 v->ODMCombineEnablePerState[i][k]); 4340 v->OutputBppPerState[i][k] = v->Outbpp; 4341 // TODO: Need some other way to handle this nonsense 4342 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4343 } 4344 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4345 v->Outbpp = TruncToValidBPP( 4346 (1.0 - v->Downspreading / 100.0) * 5400, 4347 v->OutputLinkDPLanes[k], 4348 v->HTotal[k], 4349 v->HActive[k], 4350 v->PixelClockBackEnd[k], 4351 v->ForcedOutputLinkBPP[k], 4352 v->LinkDSCEnable, 4353 v->Output[k], 4354 v->OutputFormat[k], 4355 v->DSCInputBitPerComponent[k], 4356 v->NumberOfDSCSlices[k], 4357 v->AudioSampleRate[k], 4358 v->AudioSampleLayout[k], 4359 v->ODMCombineEnablePerState[i][k]); 4360 v->OutputBppPerState[i][k] = v->Outbpp; 4361 // TODO: Need some other way to handle this nonsense 4362 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4363 } 4364 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4365 v->Outbpp = TruncToValidBPP( 4366 (1.0 - v->Downspreading / 100.0) * 8100, 4367 v->OutputLinkDPLanes[k], 4368 v->HTotal[k], 4369 v->HActive[k], 4370 v->PixelClockBackEnd[k], 4371 v->ForcedOutputLinkBPP[k], 4372 v->LinkDSCEnable, 4373 v->Output[k], 4374 v->OutputFormat[k], 4375 v->DSCInputBitPerComponent[k], 4376 v->NumberOfDSCSlices[k], 4377 v->AudioSampleRate[k], 4378 v->AudioSampleLayout[k], 4379 v->ODMCombineEnablePerState[i][k]); 4380 v->OutputBppPerState[i][k] = v->Outbpp; 4381 // TODO: Need some other way to handle this nonsense 4382 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4383 } 4384 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4385 v->Outbpp = TruncToValidBPP( 4386 (1.0 - v->Downspreading / 100.0) * 10000, 4387 4, 4388 v->HTotal[k], 4389 v->HActive[k], 4390 v->PixelClockBackEnd[k], 4391 v->ForcedOutputLinkBPP[k], 4392 v->LinkDSCEnable, 4393 v->Output[k], 4394 v->OutputFormat[k], 4395 v->DSCInputBitPerComponent[k], 4396 v->NumberOfDSCSlices[k], 4397 v->AudioSampleRate[k], 4398 v->AudioSampleLayout[k], 4399 v->ODMCombineEnablePerState[i][k]); 4400 v->OutputBppPerState[i][k] = v->Outbpp; 4401 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4402 } 4403 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4404 v->Outbpp = TruncToValidBPP( 4405 12000, 4406 4, 4407 v->HTotal[k], 4408 v->HActive[k], 4409 v->PixelClockBackEnd[k], 4410 v->ForcedOutputLinkBPP[k], 4411 v->LinkDSCEnable, 4412 v->Output[k], 4413 v->OutputFormat[k], 4414 v->DSCInputBitPerComponent[k], 4415 v->NumberOfDSCSlices[k], 4416 v->AudioSampleRate[k], 4417 v->AudioSampleLayout[k], 4418 v->ODMCombineEnablePerState[i][k]); 4419 v->OutputBppPerState[i][k] = v->Outbpp; 4420 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4421 } 4422 } 4423 } else { 4424 v->OutputBppPerState[i][k] = 0; 4425 } 4426 } 4427 } 4428 4429 for (i = 0; i < v->soc.num_states; i++) { 4430 v->LinkCapacitySupport[i] = true; 4431 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4432 if (v->BlendingAndTiming[k] == k 4433 && (v->Output[k] == dm_dp || 4434 v->Output[k] == dm_edp || 4435 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4436 v->LinkCapacitySupport[i] = false; 4437 } 4438 } 4439 } 4440 4441 // UPTO 2172 4442 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4443 if (v->BlendingAndTiming[k] == k 4444 && (v->Output[k] == dm_dp || 4445 v->Output[k] == dm_edp || 4446 v->Output[k] == dm_hdmi)) { 4447 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4448 P2IWith420 = true; 4449 } 4450 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4451 && !v->DSC422NativeSupport) { 4452 DSC422NativeNotSupported = true; 4453 } 4454 } 4455 } 4456 4457 for (i = 0; i < v->soc.num_states; ++i) { 4458 v->ODMCombine4To1SupportCheckOK[i] = true; 4459 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4460 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4461 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4462 || v->Output[k] == dm_hdmi)) { 4463 v->ODMCombine4To1SupportCheckOK[i] = false; 4464 } 4465 } 4466 } 4467 4468 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4469 4470 for (i = 0; i < v->soc.num_states; i++) { 4471 v->NotEnoughDSCUnits[i] = false; 4472 v->TotalDSCUnitsRequired = 0.0; 4473 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4474 if (v->RequiresDSC[i][k] == true) { 4475 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4476 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4477 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4478 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4479 } else { 4480 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4481 } 4482 } 4483 } 4484 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4485 v->NotEnoughDSCUnits[i] = true; 4486 } 4487 } 4488 /*DSC Delay per state*/ 4489 4490 for (i = 0; i < v->soc.num_states; i++) { 4491 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4492 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4493 v->BPP = 0.0; 4494 } else { 4495 v->BPP = v->OutputBppPerState[i][k]; 4496 } 4497 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4498 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4499 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4500 v->DSCInputBitPerComponent[k], 4501 v->BPP, 4502 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4503 v->NumberOfDSCSlices[k], 4504 v->OutputFormat[k], 4505 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4506 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4507 v->DSCDelayPerState[i][k] = 2.0 4508 * (dscceComputeDelay( 4509 v->DSCInputBitPerComponent[k], 4510 v->BPP, 4511 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4512 v->NumberOfDSCSlices[k] / 2, 4513 v->OutputFormat[k], 4514 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4515 } else { 4516 v->DSCDelayPerState[i][k] = 4.0 4517 * (dscceComputeDelay( 4518 v->DSCInputBitPerComponent[k], 4519 v->BPP, 4520 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4521 v->NumberOfDSCSlices[k] / 4, 4522 v->OutputFormat[k], 4523 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4524 } 4525 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4526 } else { 4527 v->DSCDelayPerState[i][k] = 0.0; 4528 } 4529 } 4530 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4531 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4532 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4533 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4534 } 4535 } 4536 } 4537 } 4538 4539 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4540 // 4541 for (i = 0; i < v->soc.num_states; ++i) { 4542 for (j = 0; j <= 1; ++j) { 4543 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4544 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4545 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4546 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4547 } 4548 4549 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315) 4550 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]); 4551 CalculateSwathAndDETConfiguration( 4552 false, 4553 v->NumberOfActivePlanes, 4554 v->DETBufferSizeInKByte[0], 4555 v->MaximumSwathWidthLuma, 4556 v->MaximumSwathWidthChroma, 4557 v->SourceScan, 4558 v->SourcePixelFormat, 4559 v->SurfaceTiling, 4560 v->ViewportWidth, 4561 v->ViewportHeight, 4562 v->SurfaceWidthY, 4563 v->SurfaceWidthC, 4564 v->SurfaceHeightY, 4565 v->SurfaceHeightC, 4566 v->Read256BlockHeightY, 4567 v->Read256BlockHeightC, 4568 v->Read256BlockWidthY, 4569 v->Read256BlockWidthC, 4570 v->ODMCombineEnableThisState, 4571 v->BlendingAndTiming, 4572 v->BytePerPixelY, 4573 v->BytePerPixelC, 4574 v->BytePerPixelInDETY, 4575 v->BytePerPixelInDETC, 4576 v->HActive, 4577 v->HRatio, 4578 v->HRatioChroma, 4579 v->NoOfDPPThisState, 4580 v->swath_width_luma_ub_this_state, 4581 v->swath_width_chroma_ub_this_state, 4582 v->SwathWidthYThisState, 4583 v->SwathWidthCThisState, 4584 v->SwathHeightYThisState, 4585 v->SwathHeightCThisState, 4586 v->DETBufferSizeYThisState, 4587 v->DETBufferSizeCThisState, 4588 v->dummystring, 4589 &v->ViewportSizeSupport[i][j]); 4590 4591 CalculateDCFCLKDeepSleep( 4592 mode_lib, 4593 v->NumberOfActivePlanes, 4594 v->BytePerPixelY, 4595 v->BytePerPixelC, 4596 v->VRatio, 4597 v->VRatioChroma, 4598 v->SwathWidthYThisState, 4599 v->SwathWidthCThisState, 4600 v->NoOfDPPThisState, 4601 v->HRatio, 4602 v->HRatioChroma, 4603 v->PixelClock, 4604 v->PSCL_FACTOR, 4605 v->PSCL_FACTOR_CHROMA, 4606 v->RequiredDPPCLKThisState, 4607 v->ReadBandwidthLuma, 4608 v->ReadBandwidthChroma, 4609 v->ReturnBusWidth, 4610 &v->ProjectedDCFCLKDeepSleep[i][j]); 4611 4612 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4613 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4614 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4615 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4616 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4617 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4618 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4619 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4620 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4621 } 4622 } 4623 } 4624 4625 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4626 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4627 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4628 } 4629 4630 for (i = 0; i < v->soc.num_states; i++) { 4631 for (j = 0; j < 2; j++) { 4632 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4633 4634 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4635 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4636 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4637 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4638 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4639 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4640 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4641 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4642 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4643 } 4644 4645 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4646 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4647 if (v->DCCEnable[k] == true) { 4648 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4649 } 4650 } 4651 4652 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4653 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4654 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4655 4656 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4657 && v->SourceScan[k] != dm_vert) { 4658 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4659 / 2; 4660 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4661 } else { 4662 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4663 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4664 } 4665 4666 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4667 mode_lib, 4668 v->DCCEnable[k], 4669 v->Read256BlockHeightC[k], 4670 v->Read256BlockWidthC[k], 4671 v->SourcePixelFormat[k], 4672 v->SurfaceTiling[k], 4673 v->BytePerPixelC[k], 4674 v->SourceScan[k], 4675 v->SwathWidthCThisState[k], 4676 v->ViewportHeightChroma[k], 4677 v->GPUVMEnable, 4678 v->HostVMEnable, 4679 v->HostVMMaxNonCachedPageTableLevels, 4680 v->GPUVMMinPageSize, 4681 v->HostVMMinPageSize, 4682 v->PTEBufferSizeInRequestsForChroma, 4683 v->PitchC[k], 4684 0.0, 4685 &v->MacroTileWidthC[k], 4686 &v->MetaRowBytesC, 4687 &v->DPTEBytesPerRowC, 4688 &v->PTEBufferSizeNotExceededC[i][j][k], 4689 &v->dummyinteger7, 4690 &v->dpte_row_height_chroma[k], 4691 &v->dummyinteger28, 4692 &v->dummyinteger26, 4693 &v->dummyinteger23, 4694 &v->meta_row_height_chroma[k], 4695 &v->dummyinteger8, 4696 &v->dummyinteger9, 4697 &v->dummyinteger19, 4698 &v->dummyinteger20, 4699 &v->dummyinteger17, 4700 &v->dummyinteger10, 4701 &v->dummyinteger11); 4702 4703 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4704 mode_lib, 4705 v->VRatioChroma[k], 4706 v->VTAPsChroma[k], 4707 v->Interlace[k], 4708 v->ProgressiveToInterlaceUnitInOPP, 4709 v->SwathHeightCThisState[k], 4710 v->ViewportYStartC[k], 4711 &v->PrefillC[k], 4712 &v->MaxNumSwC[k]); 4713 } else { 4714 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4715 v->PTEBufferSizeInRequestsForChroma = 0; 4716 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4717 v->MetaRowBytesC = 0.0; 4718 v->DPTEBytesPerRowC = 0.0; 4719 v->PrefetchLinesC[i][j][k] = 0.0; 4720 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4721 } 4722 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4723 mode_lib, 4724 v->DCCEnable[k], 4725 v->Read256BlockHeightY[k], 4726 v->Read256BlockWidthY[k], 4727 v->SourcePixelFormat[k], 4728 v->SurfaceTiling[k], 4729 v->BytePerPixelY[k], 4730 v->SourceScan[k], 4731 v->SwathWidthYThisState[k], 4732 v->ViewportHeight[k], 4733 v->GPUVMEnable, 4734 v->HostVMEnable, 4735 v->HostVMMaxNonCachedPageTableLevels, 4736 v->GPUVMMinPageSize, 4737 v->HostVMMinPageSize, 4738 v->PTEBufferSizeInRequestsForLuma, 4739 v->PitchY[k], 4740 v->DCCMetaPitchY[k], 4741 &v->MacroTileWidthY[k], 4742 &v->MetaRowBytesY, 4743 &v->DPTEBytesPerRowY, 4744 &v->PTEBufferSizeNotExceededY[i][j][k], 4745 &v->dummyinteger7, 4746 &v->dpte_row_height[k], 4747 &v->dummyinteger29, 4748 &v->dummyinteger27, 4749 &v->dummyinteger24, 4750 &v->meta_row_height[k], 4751 &v->dummyinteger25, 4752 &v->dpte_group_bytes[k], 4753 &v->dummyinteger21, 4754 &v->dummyinteger22, 4755 &v->dummyinteger18, 4756 &v->dummyinteger5, 4757 &v->dummyinteger6); 4758 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4759 mode_lib, 4760 v->VRatio[k], 4761 v->vtaps[k], 4762 v->Interlace[k], 4763 v->ProgressiveToInterlaceUnitInOPP, 4764 v->SwathHeightYThisState[k], 4765 v->ViewportYStartY[k], 4766 &v->PrefillY[k], 4767 &v->MaxNumSwY[k]); 4768 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4769 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4770 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4771 4772 CalculateRowBandwidth( 4773 v->GPUVMEnable, 4774 v->SourcePixelFormat[k], 4775 v->VRatio[k], 4776 v->VRatioChroma[k], 4777 v->DCCEnable[k], 4778 v->HTotal[k] / v->PixelClock[k], 4779 v->MetaRowBytesY, 4780 v->MetaRowBytesC, 4781 v->meta_row_height[k], 4782 v->meta_row_height_chroma[k], 4783 v->DPTEBytesPerRowY, 4784 v->DPTEBytesPerRowC, 4785 v->dpte_row_height[k], 4786 v->dpte_row_height_chroma[k], 4787 &v->meta_row_bandwidth[i][j][k], 4788 &v->dpte_row_bandwidth[i][j][k]); 4789 } 4790 /*DCCMetaBufferSizeSupport(i, j) = True 4791 For k = 0 To NumberOfActivePlanes - 1 4792 If MetaRowBytes(i, j, k) > 24064 Then 4793 DCCMetaBufferSizeSupport(i, j) = False 4794 End If 4795 Next k*/ 4796 v->DCCMetaBufferSizeSupport[i][j] = true; 4797 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4798 if (v->MetaRowBytes[i][j][k] > 24064) 4799 v->DCCMetaBufferSizeSupport[i][j] = false; 4800 } 4801 v->UrgLatency[i] = CalculateUrgentLatency( 4802 v->UrgentLatencyPixelDataOnly, 4803 v->UrgentLatencyPixelMixedWithVMData, 4804 v->UrgentLatencyVMDataOnly, 4805 v->DoUrgentLatencyAdjustment, 4806 v->UrgentLatencyAdjustmentFabricClockComponent, 4807 v->UrgentLatencyAdjustmentFabricClockReference, 4808 v->FabricClockPerState[i]); 4809 4810 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4811 CalculateUrgentBurstFactor( 4812 v->swath_width_luma_ub_this_state[k], 4813 v->swath_width_chroma_ub_this_state[k], 4814 v->SwathHeightYThisState[k], 4815 v->SwathHeightCThisState[k], 4816 v->HTotal[k] / v->PixelClock[k], 4817 v->UrgLatency[i], 4818 v->CursorBufferSize, 4819 v->CursorWidth[k][0], 4820 v->CursorBPP[k][0], 4821 v->VRatio[k], 4822 v->VRatioChroma[k], 4823 v->BytePerPixelInDETY[k], 4824 v->BytePerPixelInDETC[k], 4825 v->DETBufferSizeYThisState[k], 4826 v->DETBufferSizeCThisState[k], 4827 &v->UrgentBurstFactorCursor[k], 4828 &v->UrgentBurstFactorLuma[k], 4829 &v->UrgentBurstFactorChroma[k], 4830 &NotUrgentLatencyHiding[k]); 4831 } 4832 4833 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 4834 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4835 if (NotUrgentLatencyHiding[k]) { 4836 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 4837 } 4838 } 4839 4840 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4841 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 4842 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 4843 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 4844 } 4845 4846 v->TotalVActivePixelBandwidth[i][j] = 0; 4847 v->TotalVActiveCursorBandwidth[i][j] = 0; 4848 v->TotalMetaRowBandwidth[i][j] = 0; 4849 v->TotalDPTERowBandwidth[i][j] = 0; 4850 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4851 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 4852 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 4853 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 4854 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 4855 } 4856 } 4857 } 4858 4859 //Calculate Return BW 4860 for (i = 0; i < v->soc.num_states; ++i) { 4861 for (j = 0; j <= 1; ++j) { 4862 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4863 if (v->BlendingAndTiming[k] == k) { 4864 if (v->WritebackEnable[k] == true) { 4865 v->WritebackDelayTime[k] = v->WritebackLatency 4866 + CalculateWriteBackDelay( 4867 v->WritebackPixelFormat[k], 4868 v->WritebackHRatio[k], 4869 v->WritebackVRatio[k], 4870 v->WritebackVTaps[k], 4871 v->WritebackDestinationWidth[k], 4872 v->WritebackDestinationHeight[k], 4873 v->WritebackSourceHeight[k], 4874 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 4875 } else { 4876 v->WritebackDelayTime[k] = 0.0; 4877 } 4878 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4879 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 4880 v->WritebackDelayTime[k] = dml_max( 4881 v->WritebackDelayTime[k], 4882 v->WritebackLatency 4883 + CalculateWriteBackDelay( 4884 v->WritebackPixelFormat[m], 4885 v->WritebackHRatio[m], 4886 v->WritebackVRatio[m], 4887 v->WritebackVTaps[m], 4888 v->WritebackDestinationWidth[m], 4889 v->WritebackDestinationHeight[m], 4890 v->WritebackSourceHeight[m], 4891 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 4892 } 4893 } 4894 } 4895 } 4896 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4897 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4898 if (v->BlendingAndTiming[k] == m) { 4899 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 4900 } 4901 } 4902 } 4903 v->MaxMaxVStartup[i][j] = 0; 4904 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4905 v->MaximumVStartup[i][j][k] = 4906 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 4907 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 4908 v->VTotal[k] - v->VActive[k] 4909 - dml_max( 4910 1.0, 4911 dml_ceil( 4912 1.0 * v->WritebackDelayTime[k] 4913 / (v->HTotal[k] 4914 / v->PixelClock[k]), 4915 1.0)); 4916 if (v->MaximumVStartup[i][j][k] > 1023) 4917 v->MaximumVStartup[i][j][k] = 1023; 4918 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 4919 } 4920 } 4921 } 4922 4923 ReorderingBytes = v->NumberOfChannels 4924 * dml_max3( 4925 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 4926 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 4927 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 4928 4929 for (i = 0; i < v->soc.num_states; ++i) { 4930 for (j = 0; j <= 1; ++j) { 4931 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 4932 } 4933 } 4934 4935 if (v->UseMinimumRequiredDCFCLK == true) 4936 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 4937 4938 for (i = 0; i < v->soc.num_states; ++i) { 4939 for (j = 0; j <= 1; ++j) { 4940 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 4941 v->ReturnBusWidth * v->DCFCLKState[i][j], 4942 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 4943 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 4944 double PixelDataOnlyReturnBWPerState = dml_min( 4945 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 4946 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 4947 double PixelMixedWithVMDataReturnBWPerState = dml_min( 4948 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 4949 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 4950 4951 if (v->HostVMEnable != true) { 4952 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 4953 } else { 4954 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 4955 } 4956 } 4957 } 4958 4959 //Re-ordering Buffer Support Check 4960 for (i = 0; i < v->soc.num_states; ++i) { 4961 for (j = 0; j <= 1; ++j) { 4962 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 4963 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 4964 v->ROBSupport[i][j] = true; 4965 } else { 4966 v->ROBSupport[i][j] = false; 4967 } 4968 } 4969 } 4970 4971 //Vertical Active BW support check 4972 4973 MaxTotalVActiveRDBandwidth = 0; 4974 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4975 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4976 } 4977 4978 for (i = 0; i < v->soc.num_states; ++i) { 4979 for (j = 0; j <= 1; ++j) { 4980 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 4981 dml_min( 4982 v->ReturnBusWidth * v->DCFCLKState[i][j], 4983 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 4984 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 4985 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 4986 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 4987 4988 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 4989 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 4990 } else { 4991 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 4992 } 4993 } 4994 } 4995 4996 v->UrgentLatency = CalculateUrgentLatency( 4997 v->UrgentLatencyPixelDataOnly, 4998 v->UrgentLatencyPixelMixedWithVMData, 4999 v->UrgentLatencyVMDataOnly, 5000 v->DoUrgentLatencyAdjustment, 5001 v->UrgentLatencyAdjustmentFabricClockComponent, 5002 v->UrgentLatencyAdjustmentFabricClockReference, 5003 v->FabricClock); 5004 //Prefetch Check 5005 for (i = 0; i < v->soc.num_states; ++i) { 5006 for (j = 0; j <= 1; ++j) { 5007 double VMDataOnlyReturnBWPerState; 5008 double HostVMInefficiencyFactor = 1; 5009 int NextPrefetchModeState = MinPrefetchMode; 5010 bool UnboundedRequestEnabledThisState = false; 5011 int CompressedBufferSizeInkByteThisState = 0; 5012 double dummy; 5013 5014 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5015 5016 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5017 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5018 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5019 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5020 } 5021 5022 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5023 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5024 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5025 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5026 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5027 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5028 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5029 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5030 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5031 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5032 } 5033 5034 VMDataOnlyReturnBWPerState = dml_min( 5035 dml_min( 5036 v->ReturnBusWidth * v->DCFCLKState[i][j], 5037 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5038 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5039 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5040 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5041 if (v->GPUVMEnable && v->HostVMEnable) 5042 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5043 5044 v->ExtraLatency = CalculateExtraLatency( 5045 v->RoundTripPingLatencyCycles, 5046 ReorderingBytes, 5047 v->DCFCLKState[i][j], 5048 v->TotalNumberOfActiveDPP[i][j], 5049 v->PixelChunkSizeInKByte, 5050 v->TotalNumberOfDCCActiveDPP[i][j], 5051 v->MetaChunkSize, 5052 v->ReturnBWPerState[i][j], 5053 v->GPUVMEnable, 5054 v->HostVMEnable, 5055 v->NumberOfActivePlanes, 5056 v->NoOfDPPThisState, 5057 v->dpte_group_bytes, 5058 HostVMInefficiencyFactor, 5059 v->HostVMMinPageSize, 5060 v->HostVMMaxNonCachedPageTableLevels); 5061 5062 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5063 do { 5064 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5065 v->MaxVStartup = v->NextMaxVStartup; 5066 5067 v->TWait = CalculateTWait( 5068 v->PrefetchModePerState[i][j], 5069 v->DRAMClockChangeLatency, 5070 v->UrgLatency[i], 5071 v->SREnterPlusExitTime); 5072 5073 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5074 CalculatePrefetchSchedulePerPlane(mode_lib, 5075 HostVMInefficiencyFactor, 5076 i, j, k); 5077 } 5078 5079 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5080 CalculateUrgentBurstFactor( 5081 v->swath_width_luma_ub_this_state[k], 5082 v->swath_width_chroma_ub_this_state[k], 5083 v->SwathHeightYThisState[k], 5084 v->SwathHeightCThisState[k], 5085 v->HTotal[k] / v->PixelClock[k], 5086 v->UrgentLatency, 5087 v->CursorBufferSize, 5088 v->CursorWidth[k][0], 5089 v->CursorBPP[k][0], 5090 v->VRatioPreY[i][j][k], 5091 v->VRatioPreC[i][j][k], 5092 v->BytePerPixelInDETY[k], 5093 v->BytePerPixelInDETC[k], 5094 v->DETBufferSizeYThisState[k], 5095 v->DETBufferSizeCThisState[k], 5096 &v->UrgentBurstFactorCursorPre[k], 5097 &v->UrgentBurstFactorLumaPre[k], 5098 &v->UrgentBurstFactorChroma[k], 5099 &v->NotUrgentLatencyHidingPre[k]); 5100 } 5101 5102 v->MaximumReadBandwidthWithPrefetch = 0.0; 5103 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5104 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5105 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5106 5107 v->MaximumReadBandwidthWithPrefetch = 5108 v->MaximumReadBandwidthWithPrefetch 5109 + dml_max3( 5110 v->VActivePixelBandwidth[i][j][k] 5111 + v->VActiveCursorBandwidth[i][j][k] 5112 + v->NoOfDPP[i][j][k] 5113 * (v->meta_row_bandwidth[i][j][k] 5114 + v->dpte_row_bandwidth[i][j][k]), 5115 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5116 v->NoOfDPP[i][j][k] 5117 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5118 * v->UrgentBurstFactorLumaPre[k] 5119 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5120 * v->UrgentBurstFactorChromaPre[k]) 5121 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5122 } 5123 5124 v->NotEnoughUrgentLatencyHidingPre = false; 5125 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5126 if (v->NotUrgentLatencyHidingPre[k] == true) { 5127 v->NotEnoughUrgentLatencyHidingPre = true; 5128 } 5129 } 5130 5131 v->PrefetchSupported[i][j] = true; 5132 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5133 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5134 v->PrefetchSupported[i][j] = false; 5135 } 5136 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5137 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5138 || v->NoTimeForPrefetch[i][j][k] == true) { 5139 v->PrefetchSupported[i][j] = false; 5140 } 5141 } 5142 5143 v->DynamicMetadataSupported[i][j] = true; 5144 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5145 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5146 v->DynamicMetadataSupported[i][j] = false; 5147 } 5148 } 5149 5150 v->VRatioInPrefetchSupported[i][j] = true; 5151 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5152 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5153 v->VRatioInPrefetchSupported[i][j] = false; 5154 } 5155 } 5156 v->AnyLinesForVMOrRowTooLarge = false; 5157 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5158 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5159 v->AnyLinesForVMOrRowTooLarge = true; 5160 } 5161 } 5162 5163 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5164 5165 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5166 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5167 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5168 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5169 - dml_max( 5170 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5171 v->NoOfDPP[i][j][k] 5172 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5173 * v->UrgentBurstFactorLumaPre[k] 5174 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5175 * v->UrgentBurstFactorChromaPre[k]) 5176 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5177 } 5178 v->TotImmediateFlipBytes = 0.0; 5179 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5180 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5181 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5182 + v->DPTEBytesPerRow[i][j][k]; 5183 } 5184 5185 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5186 CalculateFlipSchedule( 5187 mode_lib, 5188 k, 5189 HostVMInefficiencyFactor, 5190 v->ExtraLatency, 5191 v->UrgLatency[i], 5192 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5193 v->MetaRowBytes[i][j][k], 5194 v->DPTEBytesPerRow[i][j][k]); 5195 } 5196 v->total_dcn_read_bw_with_flip = 0.0; 5197 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5198 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5199 + dml_max3( 5200 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5201 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5202 + v->VActiveCursorBandwidth[i][j][k], 5203 v->NoOfDPP[i][j][k] 5204 * (v->final_flip_bw[k] 5205 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5206 * v->UrgentBurstFactorLumaPre[k] 5207 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5208 * v->UrgentBurstFactorChromaPre[k]) 5209 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5210 } 5211 v->ImmediateFlipSupportedForState[i][j] = true; 5212 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5213 v->ImmediateFlipSupportedForState[i][j] = false; 5214 } 5215 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5216 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5217 v->ImmediateFlipSupportedForState[i][j] = false; 5218 } 5219 } 5220 } else { 5221 v->ImmediateFlipSupportedForState[i][j] = false; 5222 } 5223 5224 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5225 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5226 NextPrefetchModeState = NextPrefetchModeState + 1; 5227 } else { 5228 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5229 } 5230 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5231 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5232 && ((v->HostVMEnable == false && 5233 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5234 || v->ImmediateFlipSupportedForState[i][j] == true)) 5235 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5236 5237 CalculateUnboundedRequestAndCompressedBufferSize( 5238 v->DETBufferSizeInKByte[0], 5239 v->ConfigReturnBufferSizeInKByte, 5240 v->UseUnboundedRequesting, 5241 v->TotalNumberOfActiveDPP[i][j], 5242 NoChroma, 5243 v->MaxNumDPP, 5244 v->CompressedBufferSegmentSizeInkByte, 5245 v->Output, 5246 &UnboundedRequestEnabledThisState, 5247 &CompressedBufferSizeInkByteThisState); 5248 5249 CalculateWatermarksAndDRAMSpeedChangeSupport( 5250 mode_lib, 5251 v->PrefetchModePerState[i][j], 5252 v->DCFCLKState[i][j], 5253 v->ReturnBWPerState[i][j], 5254 v->UrgLatency[i], 5255 v->ExtraLatency, 5256 v->SOCCLKPerState[i], 5257 v->ProjectedDCFCLKDeepSleep[i][j], 5258 v->DETBufferSizeYThisState, 5259 v->DETBufferSizeCThisState, 5260 v->SwathHeightYThisState, 5261 v->SwathHeightCThisState, 5262 v->SwathWidthYThisState, 5263 v->SwathWidthCThisState, 5264 v->NoOfDPPThisState, 5265 v->BytePerPixelInDETY, 5266 v->BytePerPixelInDETC, 5267 UnboundedRequestEnabledThisState, 5268 CompressedBufferSizeInkByteThisState, 5269 &v->DRAMClockChangeSupport[i][j], 5270 &dummy, 5271 &dummy, 5272 &dummy, 5273 &dummy); 5274 } 5275 } 5276 5277 /*PTE Buffer Size Check*/ 5278 for (i = 0; i < v->soc.num_states; i++) { 5279 for (j = 0; j < 2; j++) { 5280 v->PTEBufferSizeNotExceeded[i][j] = true; 5281 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5282 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5283 v->PTEBufferSizeNotExceeded[i][j] = false; 5284 } 5285 } 5286 } 5287 } 5288 5289 /*Cursor Support Check*/ 5290 v->CursorSupport = true; 5291 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5292 if (v->CursorWidth[k][0] > 0.0) { 5293 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5294 v->CursorSupport = false; 5295 } 5296 } 5297 } 5298 5299 /*Valid Pitch Check*/ 5300 v->PitchSupport = true; 5301 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5302 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5303 if (v->DCCEnable[k] == true) { 5304 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5305 } else { 5306 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5307 } 5308 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5309 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5310 && v->SourcePixelFormat[k] != dm_mono_8) { 5311 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5312 if (v->DCCEnable[k] == true) { 5313 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5314 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5315 64.0 * v->Read256BlockWidthC[k]); 5316 } else { 5317 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5318 } 5319 } else { 5320 v->AlignedCPitch[k] = v->PitchC[k]; 5321 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5322 } 5323 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5324 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5325 v->PitchSupport = false; 5326 } 5327 } 5328 5329 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5330 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5331 ViewportExceedsSurface = true; 5332 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5333 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5334 && v->SourcePixelFormat[k] != dm_rgbe) { 5335 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5336 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5337 ViewportExceedsSurface = true; 5338 } 5339 } 5340 } 5341 } 5342 5343 /*Mode Support, Voltage State and SOC Configuration*/ 5344 for (i = v->soc.num_states - 1; i >= 0; i--) { 5345 for (j = 0; j < 2; j++) { 5346 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5347 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5348 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5349 && v->DTBCLKRequiredMoreThanSupported[i] == false 5350 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5351 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5352 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5353 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5354 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5355 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5356 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5357 && ((v->HostVMEnable == false 5358 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5359 || v->ImmediateFlipSupportedForState[i][j] == true) 5360 && FMTBufferExceeded == false) { 5361 v->ModeSupport[i][j] = true; 5362 } else { 5363 v->ModeSupport[i][j] = false; 5364 } 5365 } 5366 } 5367 5368 { 5369 unsigned int MaximumMPCCombine = 0; 5370 for (i = v->soc.num_states; i >= 0; i--) { 5371 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5372 v->VoltageLevel = i; 5373 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5374 if (v->ModeSupport[i][0] == true) { 5375 MaximumMPCCombine = 0; 5376 } else { 5377 MaximumMPCCombine = 1; 5378 } 5379 } 5380 } 5381 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5382 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5383 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5384 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5385 } 5386 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5387 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5388 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5389 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5390 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5391 v->maxMpcComb = MaximumMPCCombine; 5392 } 5393 } 5394 5395 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5396 struct display_mode_lib *mode_lib, 5397 unsigned int PrefetchMode, 5398 double DCFCLK, 5399 double ReturnBW, 5400 double UrgentLatency, 5401 double ExtraLatency, 5402 double SOCCLK, 5403 double DCFCLKDeepSleep, 5404 unsigned int DETBufferSizeY[], 5405 unsigned int DETBufferSizeC[], 5406 unsigned int SwathHeightY[], 5407 unsigned int SwathHeightC[], 5408 double SwathWidthY[], 5409 double SwathWidthC[], 5410 unsigned int DPPPerPlane[], 5411 double BytePerPixelDETY[], 5412 double BytePerPixelDETC[], 5413 bool UnboundedRequestEnabled, 5414 int unsigned CompressedBufferSizeInkByte, 5415 enum clock_change_support *DRAMClockChangeSupport, 5416 double *StutterExitWatermark, 5417 double *StutterEnterPlusExitWatermark, 5418 double *Z8StutterExitWatermark, 5419 double *Z8StutterEnterPlusExitWatermark) 5420 { 5421 struct vba_vars_st *v = &mode_lib->vba; 5422 double EffectiveLBLatencyHidingY; 5423 double EffectiveLBLatencyHidingC; 5424 double LinesInDETY[DC__NUM_DPP__MAX]; 5425 double LinesInDETC; 5426 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5427 unsigned int LinesInDETCRoundedDownToSwath; 5428 double FullDETBufferingTimeY; 5429 double FullDETBufferingTimeC; 5430 double ActiveDRAMClockChangeLatencyMarginY; 5431 double ActiveDRAMClockChangeLatencyMarginC; 5432 double WritebackDRAMClockChangeLatencyMargin; 5433 double PlaneWithMinActiveDRAMClockChangeMargin; 5434 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5435 double WritebackDRAMClockChangeLatencyHiding; 5436 double TotalPixelBW = 0.0; 5437 int k, j; 5438 5439 v->UrgentWatermark = UrgentLatency + ExtraLatency; 5440 5441 #ifdef __DML_VBA_DEBUG__ 5442 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5443 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5444 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); 5445 #endif 5446 5447 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; 5448 5449 #ifdef __DML_VBA_DEBUG__ 5450 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); 5451 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); 5452 #endif 5453 5454 v->TotalActiveWriteback = 0; 5455 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5456 if (v->WritebackEnable[k] == true) { 5457 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5458 } 5459 } 5460 5461 if (v->TotalActiveWriteback <= 1) { 5462 v->WritebackUrgentWatermark = v->WritebackLatency; 5463 } else { 5464 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5465 } 5466 5467 if (v->TotalActiveWriteback <= 1) { 5468 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; 5469 } else { 5470 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5471 } 5472 5473 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5474 TotalPixelBW = TotalPixelBW 5475 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) 5476 / (v->HTotal[k] / v->PixelClock[k]); 5477 } 5478 5479 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5480 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5481 5482 v->LBLatencyHidingSourceLinesY = dml_min( 5483 (double) v->MaxLineBufferLines, 5484 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 5485 5486 v->LBLatencyHidingSourceLinesC = dml_min( 5487 (double) v->MaxLineBufferLines, 5488 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 5489 5490 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 5491 5492 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 5493 5494 if (UnboundedRequestEnabled) { 5495 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5496 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 5497 } 5498 5499 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5500 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5501 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 5502 if (BytePerPixelDETC[k] > 0) { 5503 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5504 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5505 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; 5506 } else { 5507 LinesInDETC = 0; 5508 FullDETBufferingTimeC = 999999; 5509 } 5510 5511 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5512 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5513 5514 if (v->NumberOfActivePlanes > 1) { 5515 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5516 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; 5517 } 5518 5519 if (BytePerPixelDETC[k] > 0) { 5520 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5521 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5522 5523 if (v->NumberOfActivePlanes > 1) { 5524 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5525 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; 5526 } 5527 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5528 } else { 5529 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5530 } 5531 5532 if (v->WritebackEnable[k] == true) { 5533 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 5534 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 5535 if (v->WritebackPixelFormat[k] == dm_444_64) { 5536 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5537 } 5538 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5539 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5540 } 5541 } 5542 5543 v->MinActiveDRAMClockChangeMargin = 999999; 5544 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5545 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5546 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5547 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5548 if (v->BlendingAndTiming[k] == k) { 5549 PlaneWithMinActiveDRAMClockChangeMargin = k; 5550 } else { 5551 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 5552 if (v->BlendingAndTiming[k] == j) { 5553 PlaneWithMinActiveDRAMClockChangeMargin = j; 5554 } 5555 } 5556 } 5557 } 5558 } 5559 5560 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; 5561 5562 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5563 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5564 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5565 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5566 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5567 } 5568 } 5569 5570 v->TotalNumberOfActiveOTG = 0; 5571 5572 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5573 if (v->BlendingAndTiming[k] == k) { 5574 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5575 } 5576 } 5577 5578 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5579 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5580 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5581 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5582 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5583 } else { 5584 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5585 } 5586 5587 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5588 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5589 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5590 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5591 5592 #ifdef __DML_VBA_DEBUG__ 5593 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5594 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5595 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5596 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5597 #endif 5598 } 5599 5600 static void CalculateDCFCLKDeepSleep( 5601 struct display_mode_lib *mode_lib, 5602 unsigned int NumberOfActivePlanes, 5603 int BytePerPixelY[], 5604 int BytePerPixelC[], 5605 double VRatio[], 5606 double VRatioChroma[], 5607 double SwathWidthY[], 5608 double SwathWidthC[], 5609 unsigned int DPPPerPlane[], 5610 double HRatio[], 5611 double HRatioChroma[], 5612 double PixelClock[], 5613 double PSCL_THROUGHPUT[], 5614 double PSCL_THROUGHPUT_CHROMA[], 5615 double DPPCLK[], 5616 double ReadBandwidthLuma[], 5617 double ReadBandwidthChroma[], 5618 int ReturnBusWidth, 5619 double *DCFCLKDeepSleep) 5620 { 5621 struct vba_vars_st *v = &mode_lib->vba; 5622 double DisplayPipeLineDeliveryTimeLuma; 5623 double DisplayPipeLineDeliveryTimeChroma; 5624 double ReadBandwidth = 0.0; 5625 int k; 5626 5627 for (k = 0; k < NumberOfActivePlanes; ++k) { 5628 5629 if (VRatio[k] <= 1) { 5630 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5631 } else { 5632 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5633 } 5634 if (BytePerPixelC[k] == 0) { 5635 DisplayPipeLineDeliveryTimeChroma = 0; 5636 } else { 5637 if (VRatioChroma[k] <= 1) { 5638 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5639 } else { 5640 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5641 } 5642 } 5643 5644 if (BytePerPixelC[k] > 0) { 5645 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5646 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5647 } else { 5648 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5649 } 5650 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5651 5652 } 5653 5654 for (k = 0; k < NumberOfActivePlanes; ++k) { 5655 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5656 } 5657 5658 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5659 5660 for (k = 0; k < NumberOfActivePlanes; ++k) { 5661 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5662 } 5663 } 5664 5665 static void CalculateUrgentBurstFactor( 5666 int swath_width_luma_ub, 5667 int swath_width_chroma_ub, 5668 unsigned int SwathHeightY, 5669 unsigned int SwathHeightC, 5670 double LineTime, 5671 double UrgentLatency, 5672 double CursorBufferSize, 5673 unsigned int CursorWidth, 5674 unsigned int CursorBPP, 5675 double VRatio, 5676 double VRatioC, 5677 double BytePerPixelInDETY, 5678 double BytePerPixelInDETC, 5679 double DETBufferSizeY, 5680 double DETBufferSizeC, 5681 double *UrgentBurstFactorCursor, 5682 double *UrgentBurstFactorLuma, 5683 double *UrgentBurstFactorChroma, 5684 bool *NotEnoughUrgentLatencyHiding) 5685 { 5686 double LinesInDETLuma; 5687 double LinesInDETChroma; 5688 unsigned int LinesInCursorBuffer; 5689 double CursorBufferSizeInTime; 5690 double DETBufferSizeInTimeLuma; 5691 double DETBufferSizeInTimeChroma; 5692 5693 *NotEnoughUrgentLatencyHiding = 0; 5694 5695 if (CursorWidth > 0) { 5696 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5697 if (VRatio > 0) { 5698 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5699 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5700 *NotEnoughUrgentLatencyHiding = 1; 5701 *UrgentBurstFactorCursor = 0; 5702 } else { 5703 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 5704 } 5705 } else { 5706 *UrgentBurstFactorCursor = 1; 5707 } 5708 } 5709 5710 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 5711 if (VRatio > 0) { 5712 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5713 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5714 *NotEnoughUrgentLatencyHiding = 1; 5715 *UrgentBurstFactorLuma = 0; 5716 } else { 5717 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 5718 } 5719 } else { 5720 *UrgentBurstFactorLuma = 1; 5721 } 5722 5723 if (BytePerPixelInDETC > 0) { 5724 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 5725 if (VRatio > 0) { 5726 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 5727 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5728 *NotEnoughUrgentLatencyHiding = 1; 5729 *UrgentBurstFactorChroma = 0; 5730 } else { 5731 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 5732 } 5733 } else { 5734 *UrgentBurstFactorChroma = 1; 5735 } 5736 } 5737 } 5738 5739 static void CalculatePixelDeliveryTimes( 5740 unsigned int NumberOfActivePlanes, 5741 double VRatio[], 5742 double VRatioChroma[], 5743 double VRatioPrefetchY[], 5744 double VRatioPrefetchC[], 5745 unsigned int swath_width_luma_ub[], 5746 unsigned int swath_width_chroma_ub[], 5747 unsigned int DPPPerPlane[], 5748 double HRatio[], 5749 double HRatioChroma[], 5750 double PixelClock[], 5751 double PSCL_THROUGHPUT[], 5752 double PSCL_THROUGHPUT_CHROMA[], 5753 double DPPCLK[], 5754 int BytePerPixelC[], 5755 enum scan_direction_class SourceScan[], 5756 unsigned int NumberOfCursors[], 5757 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 5758 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 5759 unsigned int BlockWidth256BytesY[], 5760 unsigned int BlockHeight256BytesY[], 5761 unsigned int BlockWidth256BytesC[], 5762 unsigned int BlockHeight256BytesC[], 5763 double DisplayPipeLineDeliveryTimeLuma[], 5764 double DisplayPipeLineDeliveryTimeChroma[], 5765 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 5766 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 5767 double DisplayPipeRequestDeliveryTimeLuma[], 5768 double DisplayPipeRequestDeliveryTimeChroma[], 5769 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 5770 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 5771 double CursorRequestDeliveryTime[], 5772 double CursorRequestDeliveryTimePrefetch[]) 5773 { 5774 double req_per_swath_ub; 5775 int k; 5776 5777 for (k = 0; k < NumberOfActivePlanes; ++k) { 5778 if (VRatio[k] <= 1) { 5779 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5780 } else { 5781 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5782 } 5783 5784 if (BytePerPixelC[k] == 0) { 5785 DisplayPipeLineDeliveryTimeChroma[k] = 0; 5786 } else { 5787 if (VRatioChroma[k] <= 1) { 5788 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5789 } else { 5790 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5791 } 5792 } 5793 5794 if (VRatioPrefetchY[k] <= 1) { 5795 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5796 } else { 5797 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5798 } 5799 5800 if (BytePerPixelC[k] == 0) { 5801 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 5802 } else { 5803 if (VRatioPrefetchC[k] <= 1) { 5804 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5805 } else { 5806 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5807 } 5808 } 5809 } 5810 5811 for (k = 0; k < NumberOfActivePlanes; ++k) { 5812 if (SourceScan[k] != dm_vert) { 5813 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 5814 } else { 5815 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 5816 } 5817 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 5818 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 5819 if (BytePerPixelC[k] == 0) { 5820 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 5821 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 5822 } else { 5823 if (SourceScan[k] != dm_vert) { 5824 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 5825 } else { 5826 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 5827 } 5828 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 5829 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 5830 } 5831 #ifdef __DML_VBA_DEBUG__ 5832 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 5833 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 5834 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 5835 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 5836 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 5837 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 5838 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 5839 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 5840 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 5841 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 5842 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 5843 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 5844 #endif 5845 } 5846 5847 for (k = 0; k < NumberOfActivePlanes; ++k) { 5848 int cursor_req_per_width; 5849 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 5850 if (NumberOfCursors[k] > 0) { 5851 if (VRatio[k] <= 1) { 5852 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 5853 } else { 5854 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 5855 } 5856 if (VRatioPrefetchY[k] <= 1) { 5857 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 5858 } else { 5859 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 5860 } 5861 } else { 5862 CursorRequestDeliveryTime[k] = 0; 5863 CursorRequestDeliveryTimePrefetch[k] = 0; 5864 } 5865 #ifdef __DML_VBA_DEBUG__ 5866 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 5867 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 5868 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 5869 #endif 5870 } 5871 } 5872 5873 static void CalculateMetaAndPTETimes( 5874 int NumberOfActivePlanes, 5875 bool GPUVMEnable, 5876 int MetaChunkSize, 5877 int MinMetaChunkSizeBytes, 5878 int HTotal[], 5879 double VRatio[], 5880 double VRatioChroma[], 5881 double DestinationLinesToRequestRowInVBlank[], 5882 double DestinationLinesToRequestRowInImmediateFlip[], 5883 bool DCCEnable[], 5884 double PixelClock[], 5885 int BytePerPixelY[], 5886 int BytePerPixelC[], 5887 enum scan_direction_class SourceScan[], 5888 int dpte_row_height[], 5889 int dpte_row_height_chroma[], 5890 int meta_row_width[], 5891 int meta_row_width_chroma[], 5892 int meta_row_height[], 5893 int meta_row_height_chroma[], 5894 int meta_req_width[], 5895 int meta_req_width_chroma[], 5896 int meta_req_height[], 5897 int meta_req_height_chroma[], 5898 int dpte_group_bytes[], 5899 int PTERequestSizeY[], 5900 int PTERequestSizeC[], 5901 int PixelPTEReqWidthY[], 5902 int PixelPTEReqHeightY[], 5903 int PixelPTEReqWidthC[], 5904 int PixelPTEReqHeightC[], 5905 int dpte_row_width_luma_ub[], 5906 int dpte_row_width_chroma_ub[], 5907 double DST_Y_PER_PTE_ROW_NOM_L[], 5908 double DST_Y_PER_PTE_ROW_NOM_C[], 5909 double DST_Y_PER_META_ROW_NOM_L[], 5910 double DST_Y_PER_META_ROW_NOM_C[], 5911 double TimePerMetaChunkNominal[], 5912 double TimePerChromaMetaChunkNominal[], 5913 double TimePerMetaChunkVBlank[], 5914 double TimePerChromaMetaChunkVBlank[], 5915 double TimePerMetaChunkFlip[], 5916 double TimePerChromaMetaChunkFlip[], 5917 double time_per_pte_group_nom_luma[], 5918 double time_per_pte_group_vblank_luma[], 5919 double time_per_pte_group_flip_luma[], 5920 double time_per_pte_group_nom_chroma[], 5921 double time_per_pte_group_vblank_chroma[], 5922 double time_per_pte_group_flip_chroma[]) 5923 { 5924 unsigned int meta_chunk_width; 5925 unsigned int min_meta_chunk_width; 5926 unsigned int meta_chunk_per_row_int; 5927 unsigned int meta_row_remainder; 5928 unsigned int meta_chunk_threshold; 5929 unsigned int meta_chunks_per_row_ub; 5930 unsigned int meta_chunk_width_chroma; 5931 unsigned int min_meta_chunk_width_chroma; 5932 unsigned int meta_chunk_per_row_int_chroma; 5933 unsigned int meta_row_remainder_chroma; 5934 unsigned int meta_chunk_threshold_chroma; 5935 unsigned int meta_chunks_per_row_ub_chroma; 5936 unsigned int dpte_group_width_luma; 5937 unsigned int dpte_groups_per_row_luma_ub; 5938 unsigned int dpte_group_width_chroma; 5939 unsigned int dpte_groups_per_row_chroma_ub; 5940 int k; 5941 5942 for (k = 0; k < NumberOfActivePlanes; ++k) { 5943 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 5944 if (BytePerPixelC[k] == 0) { 5945 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 5946 } else { 5947 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 5948 } 5949 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 5950 if (BytePerPixelC[k] == 0) { 5951 DST_Y_PER_META_ROW_NOM_C[k] = 0; 5952 } else { 5953 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 5954 } 5955 } 5956 5957 for (k = 0; k < NumberOfActivePlanes; ++k) { 5958 if (DCCEnable[k] == true) { 5959 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 5960 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 5961 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 5962 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 5963 if (SourceScan[k] != dm_vert) { 5964 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 5965 } else { 5966 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 5967 } 5968 if (meta_row_remainder <= meta_chunk_threshold) { 5969 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 5970 } else { 5971 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 5972 } 5973 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 5974 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 5975 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 5976 if (BytePerPixelC[k] == 0) { 5977 TimePerChromaMetaChunkNominal[k] = 0; 5978 TimePerChromaMetaChunkVBlank[k] = 0; 5979 TimePerChromaMetaChunkFlip[k] = 0; 5980 } else { 5981 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 5982 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 5983 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 5984 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 5985 if (SourceScan[k] != dm_vert) { 5986 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 5987 } else { 5988 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 5989 } 5990 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 5991 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 5992 } else { 5993 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 5994 } 5995 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 5996 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 5997 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 5998 } 5999 } else { 6000 TimePerMetaChunkNominal[k] = 0; 6001 TimePerMetaChunkVBlank[k] = 0; 6002 TimePerMetaChunkFlip[k] = 0; 6003 TimePerChromaMetaChunkNominal[k] = 0; 6004 TimePerChromaMetaChunkVBlank[k] = 0; 6005 TimePerChromaMetaChunkFlip[k] = 0; 6006 } 6007 } 6008 6009 for (k = 0; k < NumberOfActivePlanes; ++k) { 6010 if (GPUVMEnable == true) { 6011 if (SourceScan[k] != dm_vert) { 6012 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6013 } else { 6014 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6015 } 6016 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6017 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6018 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6019 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6020 if (BytePerPixelC[k] == 0) { 6021 time_per_pte_group_nom_chroma[k] = 0; 6022 time_per_pte_group_vblank_chroma[k] = 0; 6023 time_per_pte_group_flip_chroma[k] = 0; 6024 } else { 6025 if (SourceScan[k] != dm_vert) { 6026 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6027 } else { 6028 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6029 } 6030 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6031 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6032 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6033 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6034 } 6035 } else { 6036 time_per_pte_group_nom_luma[k] = 0; 6037 time_per_pte_group_vblank_luma[k] = 0; 6038 time_per_pte_group_flip_luma[k] = 0; 6039 time_per_pte_group_nom_chroma[k] = 0; 6040 time_per_pte_group_vblank_chroma[k] = 0; 6041 time_per_pte_group_flip_chroma[k] = 0; 6042 } 6043 } 6044 } 6045 6046 static void CalculateVMGroupAndRequestTimes( 6047 unsigned int NumberOfActivePlanes, 6048 bool GPUVMEnable, 6049 unsigned int GPUVMMaxPageTableLevels, 6050 unsigned int HTotal[], 6051 int BytePerPixelC[], 6052 double DestinationLinesToRequestVMInVBlank[], 6053 double DestinationLinesToRequestVMInImmediateFlip[], 6054 bool DCCEnable[], 6055 double PixelClock[], 6056 int dpte_row_width_luma_ub[], 6057 int dpte_row_width_chroma_ub[], 6058 int vm_group_bytes[], 6059 unsigned int dpde0_bytes_per_frame_ub_l[], 6060 unsigned int dpde0_bytes_per_frame_ub_c[], 6061 int meta_pte_bytes_per_frame_ub_l[], 6062 int meta_pte_bytes_per_frame_ub_c[], 6063 double TimePerVMGroupVBlank[], 6064 double TimePerVMGroupFlip[], 6065 double TimePerVMRequestVBlank[], 6066 double TimePerVMRequestFlip[]) 6067 { 6068 int num_group_per_lower_vm_stage; 6069 int num_req_per_lower_vm_stage; 6070 int k; 6071 6072 for (k = 0; k < NumberOfActivePlanes; ++k) { 6073 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6074 if (DCCEnable[k] == false) { 6075 if (BytePerPixelC[k] > 0) { 6076 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6077 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6078 } else { 6079 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6080 } 6081 } else { 6082 if (GPUVMMaxPageTableLevels == 1) { 6083 if (BytePerPixelC[k] > 0) { 6084 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6085 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6086 } else { 6087 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6088 } 6089 } else { 6090 if (BytePerPixelC[k] > 0) { 6091 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6092 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6093 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6094 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6095 } else { 6096 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6097 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6098 } 6099 } 6100 } 6101 6102 if (DCCEnable[k] == false) { 6103 if (BytePerPixelC[k] > 0) { 6104 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6105 } else { 6106 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6107 } 6108 } else { 6109 if (GPUVMMaxPageTableLevels == 1) { 6110 if (BytePerPixelC[k] > 0) { 6111 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6112 } else { 6113 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6114 } 6115 } else { 6116 if (BytePerPixelC[k] > 0) { 6117 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6118 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6119 } else { 6120 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6121 } 6122 } 6123 } 6124 6125 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6126 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6127 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6128 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6129 6130 if (GPUVMMaxPageTableLevels > 2) { 6131 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6132 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6133 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6134 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6135 } 6136 6137 } else { 6138 TimePerVMGroupVBlank[k] = 0; 6139 TimePerVMGroupFlip[k] = 0; 6140 TimePerVMRequestVBlank[k] = 0; 6141 TimePerVMRequestFlip[k] = 0; 6142 } 6143 } 6144 } 6145 6146 static void CalculateStutterEfficiency( 6147 struct display_mode_lib *mode_lib, 6148 int CompressedBufferSizeInkByte, 6149 bool UnboundedRequestEnabled, 6150 int ConfigReturnBufferSizeInKByte, 6151 int MetaFIFOSizeInKEntries, 6152 int ZeroSizeBufferEntries, 6153 int NumberOfActivePlanes, 6154 int ROBBufferSizeInKByte, 6155 double TotalDataReadBandwidth, 6156 double DCFCLK, 6157 double ReturnBW, 6158 double COMPBUF_RESERVED_SPACE_64B, 6159 double COMPBUF_RESERVED_SPACE_ZS, 6160 double SRExitTime, 6161 double SRExitZ8Time, 6162 bool SynchronizedVBlank, 6163 double Z8StutterEnterPlusExitWatermark, 6164 double StutterEnterPlusExitWatermark, 6165 bool ProgressiveToInterlaceUnitInOPP, 6166 bool Interlace[], 6167 double MinTTUVBlank[], 6168 int DPPPerPlane[], 6169 unsigned int DETBufferSizeY[], 6170 int BytePerPixelY[], 6171 double BytePerPixelDETY[], 6172 double SwathWidthY[], 6173 int SwathHeightY[], 6174 int SwathHeightC[], 6175 double NetDCCRateLuma[], 6176 double NetDCCRateChroma[], 6177 double DCCFractionOfZeroSizeRequestsLuma[], 6178 double DCCFractionOfZeroSizeRequestsChroma[], 6179 int HTotal[], 6180 int VTotal[], 6181 double PixelClock[], 6182 double VRatio[], 6183 enum scan_direction_class SourceScan[], 6184 int BlockHeight256BytesY[], 6185 int BlockWidth256BytesY[], 6186 int BlockHeight256BytesC[], 6187 int BlockWidth256BytesC[], 6188 int DCCYMaxUncompressedBlock[], 6189 int DCCCMaxUncompressedBlock[], 6190 int VActive[], 6191 bool DCCEnable[], 6192 bool WritebackEnable[], 6193 double ReadBandwidthPlaneLuma[], 6194 double ReadBandwidthPlaneChroma[], 6195 double meta_row_bw[], 6196 double dpte_row_bw[], 6197 double *StutterEfficiencyNotIncludingVBlank, 6198 double *StutterEfficiency, 6199 int *NumberOfStutterBurstsPerFrame, 6200 double *Z8StutterEfficiencyNotIncludingVBlank, 6201 double *Z8StutterEfficiency, 6202 int *Z8NumberOfStutterBurstsPerFrame, 6203 double *StutterPeriod) 6204 { 6205 struct vba_vars_st *v = &mode_lib->vba; 6206 6207 double DETBufferingTimeY; 6208 double SwathWidthYCriticalPlane = 0; 6209 double VActiveTimeCriticalPlane = 0; 6210 double FrameTimeCriticalPlane = 0; 6211 int BytePerPixelYCriticalPlane = 0; 6212 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6213 double MinTTUVBlankCriticalPlane = 0; 6214 double TotalCompressedReadBandwidth; 6215 double TotalRowReadBandwidth; 6216 double AverageDCCCompressionRate; 6217 double EffectiveCompressedBufferSize; 6218 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6219 double StutterBurstTime; 6220 int TotalActiveWriteback; 6221 double LinesInDETY; 6222 double LinesInDETYRoundedDownToSwath; 6223 double MaximumEffectiveCompressionLuma; 6224 double MaximumEffectiveCompressionChroma; 6225 double TotalZeroSizeRequestReadBandwidth; 6226 double TotalZeroSizeCompressedReadBandwidth; 6227 double AverageDCCZeroSizeFraction; 6228 double AverageZeroSizeCompressionRate; 6229 int TotalNumberOfActiveOTG = 0; 6230 double LastStutterPeriod = 0.0; 6231 double LastZ8StutterPeriod = 0.0; 6232 int k; 6233 6234 TotalZeroSizeRequestReadBandwidth = 0; 6235 TotalZeroSizeCompressedReadBandwidth = 0; 6236 TotalRowReadBandwidth = 0; 6237 TotalCompressedReadBandwidth = 0; 6238 6239 for (k = 0; k < NumberOfActivePlanes; ++k) { 6240 if (DCCEnable[k] == true) { 6241 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6242 || DCCYMaxUncompressedBlock[k] < 256) { 6243 MaximumEffectiveCompressionLuma = 2; 6244 } else { 6245 MaximumEffectiveCompressionLuma = 4; 6246 } 6247 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6248 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6249 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6250 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6251 if (ReadBandwidthPlaneChroma[k] > 0) { 6252 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6253 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6254 MaximumEffectiveCompressionChroma = 2; 6255 } else { 6256 MaximumEffectiveCompressionChroma = 4; 6257 } 6258 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6259 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6260 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6261 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6262 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6263 } 6264 } else { 6265 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6266 } 6267 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6268 } 6269 6270 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6271 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6272 6273 #ifdef __DML_VBA_DEBUG__ 6274 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6275 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6276 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6277 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6278 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6279 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6280 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6281 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6282 #endif 6283 6284 if (AverageDCCZeroSizeFraction == 1) { 6285 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6286 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6287 } else if (AverageDCCZeroSizeFraction > 0) { 6288 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6289 EffectiveCompressedBufferSize = dml_min( 6290 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6291 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6292 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6293 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6294 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6295 dml_print( 6296 "DML::%s: min 2 = %f\n", 6297 __func__, 6298 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6299 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6300 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6301 } else { 6302 EffectiveCompressedBufferSize = dml_min( 6303 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6304 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6305 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6306 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6307 } 6308 6309 #ifdef __DML_VBA_DEBUG__ 6310 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6311 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6312 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6313 #endif 6314 6315 *StutterPeriod = 0; 6316 for (k = 0; k < NumberOfActivePlanes; ++k) { 6317 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6318 / BytePerPixelDETY[k] / SwathWidthY[k]; 6319 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6320 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6321 #ifdef __DML_VBA_DEBUG__ 6322 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6323 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6324 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6325 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6326 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6327 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6328 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6329 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6330 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6331 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6332 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6333 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6334 #endif 6335 6336 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6337 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6338 6339 *StutterPeriod = DETBufferingTimeY; 6340 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6341 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6342 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6343 SwathWidthYCriticalPlane = SwathWidthY[k]; 6344 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6345 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6346 6347 #ifdef __DML_VBA_DEBUG__ 6348 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6349 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6350 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6351 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6352 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6353 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6354 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6355 #endif 6356 } 6357 } 6358 6359 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6360 #ifdef __DML_VBA_DEBUG__ 6361 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6362 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6363 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6364 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6365 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6366 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6367 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6368 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6369 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6370 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6371 #endif 6372 6373 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6374 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6375 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6376 #ifdef __DML_VBA_DEBUG__ 6377 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6378 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6379 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6380 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6381 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6382 #endif 6383 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6384 6385 dml_print( 6386 "DML::%s: Time to finish residue swath=%f\n", 6387 __func__, 6388 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6389 6390 TotalActiveWriteback = 0; 6391 for (k = 0; k < NumberOfActivePlanes; ++k) { 6392 if (WritebackEnable[k]) { 6393 TotalActiveWriteback = TotalActiveWriteback + 1; 6394 } 6395 } 6396 6397 if (TotalActiveWriteback == 0) { 6398 #ifdef __DML_VBA_DEBUG__ 6399 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6400 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6401 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6402 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6403 #endif 6404 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6405 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6406 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6407 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6408 } else { 6409 *StutterEfficiencyNotIncludingVBlank = 0.; 6410 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6411 *NumberOfStutterBurstsPerFrame = 0; 6412 *Z8NumberOfStutterBurstsPerFrame = 0; 6413 } 6414 #ifdef __DML_VBA_DEBUG__ 6415 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6416 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6417 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6418 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6419 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6420 #endif 6421 6422 for (k = 0; k < NumberOfActivePlanes; ++k) { 6423 if (v->BlendingAndTiming[k] == k) { 6424 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6425 } 6426 } 6427 6428 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6429 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6430 6431 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6432 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6433 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6434 } else { 6435 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6436 } 6437 } else { 6438 *StutterEfficiency = 0; 6439 } 6440 6441 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6442 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6443 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6444 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6445 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6446 } else { 6447 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6448 } 6449 } else { 6450 *Z8StutterEfficiency = 0.; 6451 } 6452 6453 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6454 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6455 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6456 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6457 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6458 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6459 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6460 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6461 } 6462 6463 static void CalculateSwathAndDETConfiguration( 6464 bool ForceSingleDPP, 6465 int NumberOfActivePlanes, 6466 unsigned int DETBufferSizeInKByte, 6467 double MaximumSwathWidthLuma[], 6468 double MaximumSwathWidthChroma[], 6469 enum scan_direction_class SourceScan[], 6470 enum source_format_class SourcePixelFormat[], 6471 enum dm_swizzle_mode SurfaceTiling[], 6472 int ViewportWidth[], 6473 int ViewportHeight[], 6474 int SurfaceWidthY[], 6475 int SurfaceWidthC[], 6476 int SurfaceHeightY[], 6477 int SurfaceHeightC[], 6478 int Read256BytesBlockHeightY[], 6479 int Read256BytesBlockHeightC[], 6480 int Read256BytesBlockWidthY[], 6481 int Read256BytesBlockWidthC[], 6482 enum odm_combine_mode ODMCombineEnabled[], 6483 int BlendingAndTiming[], 6484 int BytePerPixY[], 6485 int BytePerPixC[], 6486 double BytePerPixDETY[], 6487 double BytePerPixDETC[], 6488 int HActive[], 6489 double HRatio[], 6490 double HRatioChroma[], 6491 int DPPPerPlane[], 6492 int swath_width_luma_ub[], 6493 int swath_width_chroma_ub[], 6494 double SwathWidth[], 6495 double SwathWidthChroma[], 6496 int SwathHeightY[], 6497 int SwathHeightC[], 6498 unsigned int DETBufferSizeY[], 6499 unsigned int DETBufferSizeC[], 6500 bool ViewportSizeSupportPerPlane[], 6501 bool *ViewportSizeSupport) 6502 { 6503 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6504 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6505 int MinimumSwathHeightY; 6506 int MinimumSwathHeightC; 6507 int RoundedUpMaxSwathSizeBytesY; 6508 int RoundedUpMaxSwathSizeBytesC; 6509 int RoundedUpMinSwathSizeBytesY; 6510 int RoundedUpMinSwathSizeBytesC; 6511 int RoundedUpSwathSizeBytesY; 6512 int RoundedUpSwathSizeBytesC; 6513 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6514 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6515 int k; 6516 6517 CalculateSwathWidth( 6518 ForceSingleDPP, 6519 NumberOfActivePlanes, 6520 SourcePixelFormat, 6521 SourceScan, 6522 ViewportWidth, 6523 ViewportHeight, 6524 SurfaceWidthY, 6525 SurfaceWidthC, 6526 SurfaceHeightY, 6527 SurfaceHeightC, 6528 ODMCombineEnabled, 6529 BytePerPixY, 6530 BytePerPixC, 6531 Read256BytesBlockHeightY, 6532 Read256BytesBlockHeightC, 6533 Read256BytesBlockWidthY, 6534 Read256BytesBlockWidthC, 6535 BlendingAndTiming, 6536 HActive, 6537 HRatio, 6538 DPPPerPlane, 6539 SwathWidthSingleDPP, 6540 SwathWidthSingleDPPChroma, 6541 SwathWidth, 6542 SwathWidthChroma, 6543 MaximumSwathHeightY, 6544 MaximumSwathHeightC, 6545 swath_width_luma_ub, 6546 swath_width_chroma_ub); 6547 6548 *ViewportSizeSupport = true; 6549 for (k = 0; k < NumberOfActivePlanes; ++k) { 6550 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6551 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6552 if (SurfaceTiling[k] == dm_sw_linear 6553 || (SourcePixelFormat[k] == dm_444_64 6554 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6555 && SourceScan[k] != dm_vert)) { 6556 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6557 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6558 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6559 } else { 6560 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6561 } 6562 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6563 } else { 6564 if (SurfaceTiling[k] == dm_sw_linear) { 6565 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6566 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6567 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6568 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6569 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6570 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6571 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6572 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6573 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6574 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6575 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6576 } else { 6577 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6578 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6579 } 6580 } 6581 6582 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6583 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6584 if (SourcePixelFormat[k] == dm_420_10) { 6585 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6586 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6587 } 6588 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6589 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6590 if (SourcePixelFormat[k] == dm_420_10) { 6591 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6592 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6593 } 6594 6595 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6596 SwathHeightY[k] = MaximumSwathHeightY[k]; 6597 SwathHeightC[k] = MaximumSwathHeightC[k]; 6598 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6599 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6600 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6601 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6602 SwathHeightY[k] = MinimumSwathHeightY; 6603 SwathHeightC[k] = MaximumSwathHeightC[k]; 6604 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6605 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6606 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6607 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6608 SwathHeightY[k] = MaximumSwathHeightY[k]; 6609 SwathHeightC[k] = MinimumSwathHeightC; 6610 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6611 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6612 } else { 6613 SwathHeightY[k] = MinimumSwathHeightY; 6614 SwathHeightC[k] = MinimumSwathHeightC; 6615 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6616 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6617 } 6618 { 6619 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6620 if (SwathHeightC[k] == 0) { 6621 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6622 DETBufferSizeC[k] = 0; 6623 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6624 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6625 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6626 } else { 6627 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6628 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6629 } 6630 6631 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6632 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6633 *ViewportSizeSupport = false; 6634 ViewportSizeSupportPerPlane[k] = false; 6635 } else { 6636 ViewportSizeSupportPerPlane[k] = true; 6637 } 6638 } 6639 } 6640 } 6641 6642 static void CalculateSwathWidth( 6643 bool ForceSingleDPP, 6644 int NumberOfActivePlanes, 6645 enum source_format_class SourcePixelFormat[], 6646 enum scan_direction_class SourceScan[], 6647 int ViewportWidth[], 6648 int ViewportHeight[], 6649 int SurfaceWidthY[], 6650 int SurfaceWidthC[], 6651 int SurfaceHeightY[], 6652 int SurfaceHeightC[], 6653 enum odm_combine_mode ODMCombineEnabled[], 6654 int BytePerPixY[], 6655 int BytePerPixC[], 6656 int Read256BytesBlockHeightY[], 6657 int Read256BytesBlockHeightC[], 6658 int Read256BytesBlockWidthY[], 6659 int Read256BytesBlockWidthC[], 6660 int BlendingAndTiming[], 6661 int HActive[], 6662 double HRatio[], 6663 int DPPPerPlane[], 6664 double SwathWidthSingleDPPY[], 6665 double SwathWidthSingleDPPC[], 6666 double SwathWidthY[], 6667 double SwathWidthC[], 6668 int MaximumSwathHeightY[], 6669 int MaximumSwathHeightC[], 6670 int swath_width_luma_ub[], 6671 int swath_width_chroma_ub[]) 6672 { 6673 enum odm_combine_mode MainPlaneODMCombine; 6674 int j, k; 6675 6676 #ifdef __DML_VBA_DEBUG__ 6677 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6678 #endif 6679 6680 for (k = 0; k < NumberOfActivePlanes; ++k) { 6681 if (SourceScan[k] != dm_vert) { 6682 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6683 } else { 6684 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6685 } 6686 6687 #ifdef __DML_VBA_DEBUG__ 6688 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6689 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6690 #endif 6691 6692 MainPlaneODMCombine = ODMCombineEnabled[k]; 6693 for (j = 0; j < NumberOfActivePlanes; ++j) { 6694 if (BlendingAndTiming[k] == j) { 6695 MainPlaneODMCombine = ODMCombineEnabled[j]; 6696 } 6697 } 6698 6699 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { 6700 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 6701 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { 6702 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 6703 } else if (DPPPerPlane[k] == 2) { 6704 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 6705 } else { 6706 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6707 } 6708 6709 #ifdef __DML_VBA_DEBUG__ 6710 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 6711 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 6712 #endif 6713 6714 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 6715 SwathWidthC[k] = SwathWidthY[k] / 2; 6716 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 6717 } else { 6718 SwathWidthC[k] = SwathWidthY[k]; 6719 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 6720 } 6721 6722 if (ForceSingleDPP == true) { 6723 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6724 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 6725 } 6726 { 6727 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 6728 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 6729 6730 #ifdef __DML_VBA_DEBUG__ 6731 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 6732 #endif 6733 6734 if (SourceScan[k] != dm_vert) { 6735 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 6736 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 6737 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 6738 if (BytePerPixC[k] > 0) { 6739 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 6740 6741 swath_width_chroma_ub[k] = dml_min( 6742 surface_width_ub_c, 6743 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 6744 } else { 6745 swath_width_chroma_ub[k] = 0; 6746 } 6747 } else { 6748 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 6749 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 6750 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 6751 if (BytePerPixC[k] > 0) { 6752 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 6753 6754 swath_width_chroma_ub[k] = dml_min( 6755 surface_height_ub_c, 6756 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 6757 } else { 6758 swath_width_chroma_ub[k] = 0; 6759 } 6760 } 6761 } 6762 } 6763 } 6764 6765 static double CalculateExtraLatency( 6766 int RoundTripPingLatencyCycles, 6767 int ReorderingBytes, 6768 double DCFCLK, 6769 int TotalNumberOfActiveDPP, 6770 int PixelChunkSizeInKByte, 6771 int TotalNumberOfDCCActiveDPP, 6772 int MetaChunkSize, 6773 double ReturnBW, 6774 bool GPUVMEnable, 6775 bool HostVMEnable, 6776 int NumberOfActivePlanes, 6777 int NumberOfDPP[], 6778 int dpte_group_bytes[], 6779 double HostVMInefficiencyFactor, 6780 double HostVMMinPageSize, 6781 int HostVMMaxNonCachedPageTableLevels) 6782 { 6783 double ExtraLatencyBytes; 6784 double ExtraLatency; 6785 6786 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6787 ReorderingBytes, 6788 TotalNumberOfActiveDPP, 6789 PixelChunkSizeInKByte, 6790 TotalNumberOfDCCActiveDPP, 6791 MetaChunkSize, 6792 GPUVMEnable, 6793 HostVMEnable, 6794 NumberOfActivePlanes, 6795 NumberOfDPP, 6796 dpte_group_bytes, 6797 HostVMInefficiencyFactor, 6798 HostVMMinPageSize, 6799 HostVMMaxNonCachedPageTableLevels); 6800 6801 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 6802 6803 #ifdef __DML_VBA_DEBUG__ 6804 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 6805 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 6806 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 6807 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 6808 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 6809 #endif 6810 6811 return ExtraLatency; 6812 } 6813 6814 static double CalculateExtraLatencyBytes( 6815 int ReorderingBytes, 6816 int TotalNumberOfActiveDPP, 6817 int PixelChunkSizeInKByte, 6818 int TotalNumberOfDCCActiveDPP, 6819 int MetaChunkSize, 6820 bool GPUVMEnable, 6821 bool HostVMEnable, 6822 int NumberOfActivePlanes, 6823 int NumberOfDPP[], 6824 int dpte_group_bytes[], 6825 double HostVMInefficiencyFactor, 6826 double HostVMMinPageSize, 6827 int HostVMMaxNonCachedPageTableLevels) 6828 { 6829 double ret; 6830 int HostVMDynamicLevels = 0, k; 6831 6832 if (GPUVMEnable == true && HostVMEnable == true) { 6833 if (HostVMMinPageSize < 2048) { 6834 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 6835 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 6836 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 6837 } else { 6838 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 6839 } 6840 } else { 6841 HostVMDynamicLevels = 0; 6842 } 6843 6844 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 6845 6846 if (GPUVMEnable == true) { 6847 for (k = 0; k < NumberOfActivePlanes; ++k) { 6848 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 6849 } 6850 } 6851 return ret; 6852 } 6853 6854 static double CalculateUrgentLatency( 6855 double UrgentLatencyPixelDataOnly, 6856 double UrgentLatencyPixelMixedWithVMData, 6857 double UrgentLatencyVMDataOnly, 6858 bool DoUrgentLatencyAdjustment, 6859 double UrgentLatencyAdjustmentFabricClockComponent, 6860 double UrgentLatencyAdjustmentFabricClockReference, 6861 double FabricClock) 6862 { 6863 double ret; 6864 6865 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 6866 if (DoUrgentLatencyAdjustment == true) { 6867 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 6868 } 6869 return ret; 6870 } 6871 6872 static void UseMinimumDCFCLK( 6873 struct display_mode_lib *mode_lib, 6874 int MaxPrefetchMode, 6875 int ReorderingBytes) 6876 { 6877 struct vba_vars_st *v = &mode_lib->vba; 6878 int dummy1, i, j, k; 6879 double NormalEfficiency, dummy2, dummy3; 6880 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 6881 6882 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 6883 for (i = 0; i < v->soc.num_states; ++i) { 6884 for (j = 0; j <= 1; ++j) { 6885 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 6886 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 6887 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 6888 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 6889 double MinimumTWait; 6890 double NonDPTEBandwidth; 6891 double DPTEBandwidth; 6892 double DCFCLKRequiredForAverageBandwidth; 6893 double ExtraLatencyBytes; 6894 double ExtraLatencyCycles; 6895 double DCFCLKRequiredForPeakBandwidth; 6896 int NoOfDPPState[DC__NUM_DPP__MAX]; 6897 double MinimumTvmPlus2Tr0; 6898 6899 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 6900 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 6901 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 6902 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 6903 } 6904 6905 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 6906 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 6907 } 6908 6909 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 6910 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 6911 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 6912 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 6913 DCFCLKRequiredForAverageBandwidth = dml_max3( 6914 v->ProjectedDCFCLKDeepSleep[i][j], 6915 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 6916 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 6917 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 6918 6919 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6920 ReorderingBytes, 6921 v->TotalNumberOfActiveDPP[i][j], 6922 v->PixelChunkSizeInKByte, 6923 v->TotalNumberOfDCCActiveDPP[i][j], 6924 v->MetaChunkSize, 6925 v->GPUVMEnable, 6926 v->HostVMEnable, 6927 v->NumberOfActivePlanes, 6928 NoOfDPPState, 6929 v->dpte_group_bytes, 6930 1, 6931 v->HostVMMinPageSize, 6932 v->HostVMMaxNonCachedPageTableLevels); 6933 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 6934 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 6935 double DCFCLKCyclesRequiredInPrefetch; 6936 double ExpectedPrefetchBWAcceleration; 6937 double PrefetchTime; 6938 6939 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 6940 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 6941 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 6942 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 6943 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 6944 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 6945 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 6946 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 6947 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 6948 DynamicMetadataVMExtraLatency[k] = 6949 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 6950 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 6951 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 6952 - v->UrgLatency[i] 6953 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 6954 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 6955 - DynamicMetadataVMExtraLatency[k]; 6956 6957 if (PrefetchTime > 0) { 6958 double ExpectedVRatioPrefetch; 6959 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 6960 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 6961 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 6962 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 6963 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 6964 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 6965 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 6966 } 6967 } else { 6968 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 6969 } 6970 if (v->DynamicMetadataEnable[k] == true) { 6971 double TSetupPipe; 6972 double TdmbfPipe; 6973 double TdmsksPipe; 6974 double TdmecPipe; 6975 double AllowedTimeForUrgentExtraLatency; 6976 6977 CalculateVupdateAndDynamicMetadataParameters( 6978 v->MaxInterDCNTileRepeaters, 6979 v->RequiredDPPCLK[i][j][k], 6980 v->RequiredDISPCLK[i][j], 6981 v->ProjectedDCFCLKDeepSleep[i][j], 6982 v->PixelClock[k], 6983 v->HTotal[k], 6984 v->VTotal[k] - v->VActive[k], 6985 v->DynamicMetadataTransmittedBytes[k], 6986 v->DynamicMetadataLinesBeforeActiveRequired[k], 6987 v->Interlace[k], 6988 v->ProgressiveToInterlaceUnitInOPP, 6989 &TSetupPipe, 6990 &TdmbfPipe, 6991 &TdmecPipe, 6992 &TdmsksPipe, 6993 &dummy1, 6994 &dummy2, 6995 &dummy3); 6996 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 6997 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 6998 if (AllowedTimeForUrgentExtraLatency > 0) { 6999 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7000 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7001 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7002 } else { 7003 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7004 } 7005 } 7006 } 7007 DCFCLKRequiredForPeakBandwidth = 0; 7008 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 7009 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7010 } 7011 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7012 * (v->GPUVMEnable == true ? 7013 (v->HostVMEnable == true ? 7014 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7015 0); 7016 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7017 double MaximumTvmPlus2Tr0PlusTsw; 7018 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7019 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7020 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7021 } else { 7022 DCFCLKRequiredForPeakBandwidth = dml_max3( 7023 DCFCLKRequiredForPeakBandwidth, 7024 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7025 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7026 } 7027 } 7028 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7029 } 7030 } 7031 } 7032 7033 static void CalculateUnboundedRequestAndCompressedBufferSize( 7034 unsigned int DETBufferSizeInKByte, 7035 int ConfigReturnBufferSizeInKByte, 7036 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7037 int TotalActiveDPP, 7038 bool NoChromaPlanes, 7039 int MaxNumDPP, 7040 int CompressedBufferSegmentSizeInkByteFinal, 7041 enum output_encoder_class *Output, 7042 bool *UnboundedRequestEnabled, 7043 int *CompressedBufferSizeInkByte) 7044 { 7045 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7046 7047 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7048 *CompressedBufferSizeInkByte = ( 7049 *UnboundedRequestEnabled == true ? 7050 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7051 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7052 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7053 7054 #ifdef __DML_VBA_DEBUG__ 7055 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7056 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7057 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7058 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7059 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7060 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7061 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7062 #endif 7063 } 7064 7065 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7066 { 7067 bool ret_val = false; 7068 7069 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7070 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { 7071 ret_val = false; 7072 } 7073 return (ret_val); 7074 } 7075 7076