1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dc.h" 27 #include "dc_link.h" 28 #include "../display_mode_lib.h" 29 #include "../dcn30/display_mode_vba_30.h" 30 #include "display_mode_vba_31.h" 31 #include "../dml_inline_defs.h" 32 33 /* 34 * NOTE: 35 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 36 * 37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 38 * ways. Unless there is something clearly wrong with it the code should 39 * remain as-is as it provides us with a guarantee from HW that it is correct. 40 */ 41 42 #define BPP_INVALID 0 43 #define BPP_BLENDED_PIPE 0xffffffff 44 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 45 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 46 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 47 #define DCN3_15_MAX_DET_SIZE 384 48 49 // For DML-C changes that hasn't been propagated to VBA yet 50 //#define __DML_VBA_ALLOW_DELTA__ 51 52 // Move these to ip paramaters/constant 53 54 // At which vstartup the DML start to try if the mode can be supported 55 #define __DML_VBA_MIN_VSTARTUP__ 9 56 57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 59 60 // fudge factor for min dcfclk calclation 61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 62 63 typedef struct { 64 double DPPCLK; 65 double DISPCLK; 66 double PixelClock; 67 double DCFCLKDeepSleep; 68 unsigned int DPPPerPlane; 69 bool ScalerEnabled; 70 double VRatio; 71 double VRatioChroma; 72 enum scan_direction_class SourceScan; 73 unsigned int BlockWidth256BytesY; 74 unsigned int BlockHeight256BytesY; 75 unsigned int BlockWidth256BytesC; 76 unsigned int BlockHeight256BytesC; 77 unsigned int InterlaceEnable; 78 unsigned int NumberOfCursors; 79 unsigned int VBlank; 80 unsigned int HTotal; 81 unsigned int DCCEnable; 82 bool ODMCombineIsEnabled; 83 enum source_format_class SourcePixelFormat; 84 int BytePerPixelY; 85 int BytePerPixelC; 86 bool ProgressiveToInterlaceUnitInOPP; 87 } Pipe; 88 89 #define BPP_INVALID 0 90 #define BPP_BLENDED_PIPE 0xffffffff 91 92 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 93 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 94 static unsigned int dscceComputeDelay( 95 unsigned int bpc, 96 double BPP, 97 unsigned int sliceWidth, 98 unsigned int numSlices, 99 enum output_format_class pixelFormat, 100 enum output_encoder_class Output); 101 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 102 static bool CalculatePrefetchSchedule( 103 struct display_mode_lib *mode_lib, 104 double HostVMInefficiencyFactor, 105 Pipe *myPipe, 106 unsigned int DSCDelay, 107 double DPPCLKDelaySubtotalPlusCNVCFormater, 108 double DPPCLKDelaySCL, 109 double DPPCLKDelaySCLLBOnly, 110 double DPPCLKDelayCNVCCursor, 111 double DISPCLKDelaySubtotal, 112 unsigned int DPP_RECOUT_WIDTH, 113 enum output_format_class OutputFormat, 114 unsigned int MaxInterDCNTileRepeaters, 115 unsigned int VStartup, 116 unsigned int MaxVStartup, 117 unsigned int GPUVMPageTableLevels, 118 bool GPUVMEnable, 119 bool HostVMEnable, 120 unsigned int HostVMMaxNonCachedPageTableLevels, 121 double HostVMMinPageSize, 122 bool DynamicMetadataEnable, 123 bool DynamicMetadataVMEnabled, 124 int DynamicMetadataLinesBeforeActiveRequired, 125 unsigned int DynamicMetadataTransmittedBytes, 126 double UrgentLatency, 127 double UrgentExtraLatency, 128 double TCalc, 129 unsigned int PDEAndMetaPTEBytesFrame, 130 unsigned int MetaRowByte, 131 unsigned int PixelPTEBytesPerRow, 132 double PrefetchSourceLinesY, 133 unsigned int SwathWidthY, 134 double VInitPreFillY, 135 unsigned int MaxNumSwathY, 136 double PrefetchSourceLinesC, 137 unsigned int SwathWidthC, 138 double VInitPreFillC, 139 unsigned int MaxNumSwathC, 140 int swath_width_luma_ub, 141 int swath_width_chroma_ub, 142 unsigned int SwathHeightY, 143 unsigned int SwathHeightC, 144 double TWait, 145 double *DSTXAfterScaler, 146 double *DSTYAfterScaler, 147 double *DestinationLinesForPrefetch, 148 double *PrefetchBandwidth, 149 double *DestinationLinesToRequestVMInVBlank, 150 double *DestinationLinesToRequestRowInVBlank, 151 double *VRatioPrefetchY, 152 double *VRatioPrefetchC, 153 double *RequiredPrefetchPixDataBWLuma, 154 double *RequiredPrefetchPixDataBWChroma, 155 bool *NotEnoughTimeForDynamicMetadata, 156 double *Tno_bw, 157 double *prefetch_vmrow_bw, 158 double *Tdmdl_vm, 159 double *Tdmdl, 160 double *TSetup, 161 int *VUpdateOffsetPix, 162 double *VUpdateWidthPix, 163 double *VReadyOffsetPix); 164 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 165 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 166 static void CalculateDCCConfiguration( 167 bool DCCEnabled, 168 bool DCCProgrammingAssumesScanDirectionUnknown, 169 enum source_format_class SourcePixelFormat, 170 unsigned int SurfaceWidthLuma, 171 unsigned int SurfaceWidthChroma, 172 unsigned int SurfaceHeightLuma, 173 unsigned int SurfaceHeightChroma, 174 double DETBufferSize, 175 unsigned int RequestHeight256ByteLuma, 176 unsigned int RequestHeight256ByteChroma, 177 enum dm_swizzle_mode TilingFormat, 178 unsigned int BytePerPixelY, 179 unsigned int BytePerPixelC, 180 double BytePerPixelDETY, 181 double BytePerPixelDETC, 182 enum scan_direction_class ScanOrientation, 183 unsigned int *MaxUncompressedBlockLuma, 184 unsigned int *MaxUncompressedBlockChroma, 185 unsigned int *MaxCompressedBlockLuma, 186 unsigned int *MaxCompressedBlockChroma, 187 unsigned int *IndependentBlockLuma, 188 unsigned int *IndependentBlockChroma); 189 static double CalculatePrefetchSourceLines( 190 struct display_mode_lib *mode_lib, 191 double VRatio, 192 double vtaps, 193 bool Interlace, 194 bool ProgressiveToInterlaceUnitInOPP, 195 unsigned int SwathHeight, 196 unsigned int ViewportYStart, 197 double *VInitPreFill, 198 unsigned int *MaxNumSwath); 199 static unsigned int CalculateVMAndRowBytes( 200 struct display_mode_lib *mode_lib, 201 bool DCCEnable, 202 unsigned int BlockHeight256Bytes, 203 unsigned int BlockWidth256Bytes, 204 enum source_format_class SourcePixelFormat, 205 unsigned int SurfaceTiling, 206 unsigned int BytePerPixel, 207 enum scan_direction_class ScanDirection, 208 unsigned int SwathWidth, 209 unsigned int ViewportHeight, 210 bool GPUVMEnable, 211 bool HostVMEnable, 212 unsigned int HostVMMaxNonCachedPageTableLevels, 213 unsigned int GPUVMMinPageSize, 214 unsigned int HostVMMinPageSize, 215 unsigned int PTEBufferSizeInRequests, 216 unsigned int Pitch, 217 unsigned int DCCMetaPitch, 218 unsigned int *MacroTileWidth, 219 unsigned int *MetaRowByte, 220 unsigned int *PixelPTEBytesPerRow, 221 bool *PTEBufferSizeNotExceeded, 222 int *dpte_row_width_ub, 223 unsigned int *dpte_row_height, 224 unsigned int *MetaRequestWidth, 225 unsigned int *MetaRequestHeight, 226 unsigned int *meta_row_width, 227 unsigned int *meta_row_height, 228 int *vm_group_bytes, 229 unsigned int *dpte_group_bytes, 230 unsigned int *PixelPTEReqWidth, 231 unsigned int *PixelPTEReqHeight, 232 unsigned int *PTERequestSize, 233 int *DPDE0BytesFrame, 234 int *MetaPTEBytesFrame); 235 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 236 static void CalculateRowBandwidth( 237 bool GPUVMEnable, 238 enum source_format_class SourcePixelFormat, 239 double VRatio, 240 double VRatioChroma, 241 bool DCCEnable, 242 double LineTime, 243 unsigned int MetaRowByteLuma, 244 unsigned int MetaRowByteChroma, 245 unsigned int meta_row_height_luma, 246 unsigned int meta_row_height_chroma, 247 unsigned int PixelPTEBytesPerRowLuma, 248 unsigned int PixelPTEBytesPerRowChroma, 249 unsigned int dpte_row_height_luma, 250 unsigned int dpte_row_height_chroma, 251 double *meta_row_bw, 252 double *dpte_row_bw); 253 254 static void CalculateFlipSchedule( 255 struct display_mode_lib *mode_lib, 256 unsigned int k, 257 double HostVMInefficiencyFactor, 258 double UrgentExtraLatency, 259 double UrgentLatency, 260 double PDEAndMetaPTEBytesPerFrame, 261 double MetaRowBytes, 262 double DPTEBytesPerRow); 263 static double CalculateWriteBackDelay( 264 enum source_format_class WritebackPixelFormat, 265 double WritebackHRatio, 266 double WritebackVRatio, 267 unsigned int WritebackVTaps, 268 int WritebackDestinationWidth, 269 int WritebackDestinationHeight, 270 int WritebackSourceHeight, 271 unsigned int HTotal); 272 273 static void CalculateVupdateAndDynamicMetadataParameters( 274 int MaxInterDCNTileRepeaters, 275 double DPPCLK, 276 double DISPCLK, 277 double DCFClkDeepSleep, 278 double PixelClock, 279 int HTotal, 280 int VBlank, 281 int DynamicMetadataTransmittedBytes, 282 int DynamicMetadataLinesBeforeActiveRequired, 283 int InterlaceEnable, 284 bool ProgressiveToInterlaceUnitInOPP, 285 double *TSetup, 286 double *Tdmbf, 287 double *Tdmec, 288 double *Tdmsks, 289 int *VUpdateOffsetPix, 290 double *VUpdateWidthPix, 291 double *VReadyOffsetPix); 292 293 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 294 struct display_mode_lib *mode_lib, 295 unsigned int PrefetchMode, 296 double DCFCLK, 297 double ReturnBW, 298 double UrgentLatency, 299 double ExtraLatency, 300 double SOCCLK, 301 double DCFCLKDeepSleep, 302 unsigned int DETBufferSizeY[], 303 unsigned int DETBufferSizeC[], 304 unsigned int SwathHeightY[], 305 unsigned int SwathHeightC[], 306 double SwathWidthY[], 307 double SwathWidthC[], 308 unsigned int DPPPerPlane[], 309 double BytePerPixelDETY[], 310 double BytePerPixelDETC[], 311 bool UnboundedRequestEnabled, 312 int unsigned CompressedBufferSizeInkByte, 313 enum clock_change_support *DRAMClockChangeSupport, 314 double *StutterExitWatermark, 315 double *StutterEnterPlusExitWatermark, 316 double *Z8StutterExitWatermark, 317 double *Z8StutterEnterPlusExitWatermark); 318 319 static void CalculateDCFCLKDeepSleep( 320 struct display_mode_lib *mode_lib, 321 unsigned int NumberOfActivePlanes, 322 int BytePerPixelY[], 323 int BytePerPixelC[], 324 double VRatio[], 325 double VRatioChroma[], 326 double SwathWidthY[], 327 double SwathWidthC[], 328 unsigned int DPPPerPlane[], 329 double HRatio[], 330 double HRatioChroma[], 331 double PixelClock[], 332 double PSCL_THROUGHPUT[], 333 double PSCL_THROUGHPUT_CHROMA[], 334 double DPPCLK[], 335 double ReadBandwidthLuma[], 336 double ReadBandwidthChroma[], 337 int ReturnBusWidth, 338 double *DCFCLKDeepSleep); 339 340 static void CalculateUrgentBurstFactor( 341 int swath_width_luma_ub, 342 int swath_width_chroma_ub, 343 unsigned int SwathHeightY, 344 unsigned int SwathHeightC, 345 double LineTime, 346 double UrgentLatency, 347 double CursorBufferSize, 348 unsigned int CursorWidth, 349 unsigned int CursorBPP, 350 double VRatio, 351 double VRatioC, 352 double BytePerPixelInDETY, 353 double BytePerPixelInDETC, 354 double DETBufferSizeY, 355 double DETBufferSizeC, 356 double *UrgentBurstFactorCursor, 357 double *UrgentBurstFactorLuma, 358 double *UrgentBurstFactorChroma, 359 bool *NotEnoughUrgentLatencyHiding); 360 361 static void UseMinimumDCFCLK( 362 struct display_mode_lib *mode_lib, 363 int MaxPrefetchMode, 364 int ReorderingBytes); 365 366 static void CalculatePixelDeliveryTimes( 367 unsigned int NumberOfActivePlanes, 368 double VRatio[], 369 double VRatioChroma[], 370 double VRatioPrefetchY[], 371 double VRatioPrefetchC[], 372 unsigned int swath_width_luma_ub[], 373 unsigned int swath_width_chroma_ub[], 374 unsigned int DPPPerPlane[], 375 double HRatio[], 376 double HRatioChroma[], 377 double PixelClock[], 378 double PSCL_THROUGHPUT[], 379 double PSCL_THROUGHPUT_CHROMA[], 380 double DPPCLK[], 381 int BytePerPixelC[], 382 enum scan_direction_class SourceScan[], 383 unsigned int NumberOfCursors[], 384 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 385 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 386 unsigned int BlockWidth256BytesY[], 387 unsigned int BlockHeight256BytesY[], 388 unsigned int BlockWidth256BytesC[], 389 unsigned int BlockHeight256BytesC[], 390 double DisplayPipeLineDeliveryTimeLuma[], 391 double DisplayPipeLineDeliveryTimeChroma[], 392 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 393 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 394 double DisplayPipeRequestDeliveryTimeLuma[], 395 double DisplayPipeRequestDeliveryTimeChroma[], 396 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 397 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 398 double CursorRequestDeliveryTime[], 399 double CursorRequestDeliveryTimePrefetch[]); 400 401 static void CalculateMetaAndPTETimes( 402 int NumberOfActivePlanes, 403 bool GPUVMEnable, 404 int MetaChunkSize, 405 int MinMetaChunkSizeBytes, 406 int HTotal[], 407 double VRatio[], 408 double VRatioChroma[], 409 double DestinationLinesToRequestRowInVBlank[], 410 double DestinationLinesToRequestRowInImmediateFlip[], 411 bool DCCEnable[], 412 double PixelClock[], 413 int BytePerPixelY[], 414 int BytePerPixelC[], 415 enum scan_direction_class SourceScan[], 416 int dpte_row_height[], 417 int dpte_row_height_chroma[], 418 int meta_row_width[], 419 int meta_row_width_chroma[], 420 int meta_row_height[], 421 int meta_row_height_chroma[], 422 int meta_req_width[], 423 int meta_req_width_chroma[], 424 int meta_req_height[], 425 int meta_req_height_chroma[], 426 int dpte_group_bytes[], 427 int PTERequestSizeY[], 428 int PTERequestSizeC[], 429 int PixelPTEReqWidthY[], 430 int PixelPTEReqHeightY[], 431 int PixelPTEReqWidthC[], 432 int PixelPTEReqHeightC[], 433 int dpte_row_width_luma_ub[], 434 int dpte_row_width_chroma_ub[], 435 double DST_Y_PER_PTE_ROW_NOM_L[], 436 double DST_Y_PER_PTE_ROW_NOM_C[], 437 double DST_Y_PER_META_ROW_NOM_L[], 438 double DST_Y_PER_META_ROW_NOM_C[], 439 double TimePerMetaChunkNominal[], 440 double TimePerChromaMetaChunkNominal[], 441 double TimePerMetaChunkVBlank[], 442 double TimePerChromaMetaChunkVBlank[], 443 double TimePerMetaChunkFlip[], 444 double TimePerChromaMetaChunkFlip[], 445 double time_per_pte_group_nom_luma[], 446 double time_per_pte_group_vblank_luma[], 447 double time_per_pte_group_flip_luma[], 448 double time_per_pte_group_nom_chroma[], 449 double time_per_pte_group_vblank_chroma[], 450 double time_per_pte_group_flip_chroma[]); 451 452 static void CalculateVMGroupAndRequestTimes( 453 unsigned int NumberOfActivePlanes, 454 bool GPUVMEnable, 455 unsigned int GPUVMMaxPageTableLevels, 456 unsigned int HTotal[], 457 int BytePerPixelC[], 458 double DestinationLinesToRequestVMInVBlank[], 459 double DestinationLinesToRequestVMInImmediateFlip[], 460 bool DCCEnable[], 461 double PixelClock[], 462 int dpte_row_width_luma_ub[], 463 int dpte_row_width_chroma_ub[], 464 int vm_group_bytes[], 465 unsigned int dpde0_bytes_per_frame_ub_l[], 466 unsigned int dpde0_bytes_per_frame_ub_c[], 467 int meta_pte_bytes_per_frame_ub_l[], 468 int meta_pte_bytes_per_frame_ub_c[], 469 double TimePerVMGroupVBlank[], 470 double TimePerVMGroupFlip[], 471 double TimePerVMRequestVBlank[], 472 double TimePerVMRequestFlip[]); 473 474 static void CalculateStutterEfficiency( 475 struct display_mode_lib *mode_lib, 476 int CompressedBufferSizeInkByte, 477 bool UnboundedRequestEnabled, 478 int ConfigReturnBufferSizeInKByte, 479 int MetaFIFOSizeInKEntries, 480 int ZeroSizeBufferEntries, 481 int NumberOfActivePlanes, 482 int ROBBufferSizeInKByte, 483 double TotalDataReadBandwidth, 484 double DCFCLK, 485 double ReturnBW, 486 double COMPBUF_RESERVED_SPACE_64B, 487 double COMPBUF_RESERVED_SPACE_ZS, 488 double SRExitTime, 489 double SRExitZ8Time, 490 bool SynchronizedVBlank, 491 double Z8StutterEnterPlusExitWatermark, 492 double StutterEnterPlusExitWatermark, 493 bool ProgressiveToInterlaceUnitInOPP, 494 bool Interlace[], 495 double MinTTUVBlank[], 496 int DPPPerPlane[], 497 unsigned int DETBufferSizeY[], 498 int BytePerPixelY[], 499 double BytePerPixelDETY[], 500 double SwathWidthY[], 501 int SwathHeightY[], 502 int SwathHeightC[], 503 double NetDCCRateLuma[], 504 double NetDCCRateChroma[], 505 double DCCFractionOfZeroSizeRequestsLuma[], 506 double DCCFractionOfZeroSizeRequestsChroma[], 507 int HTotal[], 508 int VTotal[], 509 double PixelClock[], 510 double VRatio[], 511 enum scan_direction_class SourceScan[], 512 int BlockHeight256BytesY[], 513 int BlockWidth256BytesY[], 514 int BlockHeight256BytesC[], 515 int BlockWidth256BytesC[], 516 int DCCYMaxUncompressedBlock[], 517 int DCCCMaxUncompressedBlock[], 518 int VActive[], 519 bool DCCEnable[], 520 bool WritebackEnable[], 521 double ReadBandwidthPlaneLuma[], 522 double ReadBandwidthPlaneChroma[], 523 double meta_row_bw[], 524 double dpte_row_bw[], 525 double *StutterEfficiencyNotIncludingVBlank, 526 double *StutterEfficiency, 527 int *NumberOfStutterBurstsPerFrame, 528 double *Z8StutterEfficiencyNotIncludingVBlank, 529 double *Z8StutterEfficiency, 530 int *Z8NumberOfStutterBurstsPerFrame, 531 double *StutterPeriod); 532 533 static void CalculateSwathAndDETConfiguration( 534 bool ForceSingleDPP, 535 int NumberOfActivePlanes, 536 unsigned int DETBufferSizeInKByte, 537 double MaximumSwathWidthLuma[], 538 double MaximumSwathWidthChroma[], 539 enum scan_direction_class SourceScan[], 540 enum source_format_class SourcePixelFormat[], 541 enum dm_swizzle_mode SurfaceTiling[], 542 int ViewportWidth[], 543 int ViewportHeight[], 544 int SurfaceWidthY[], 545 int SurfaceWidthC[], 546 int SurfaceHeightY[], 547 int SurfaceHeightC[], 548 int Read256BytesBlockHeightY[], 549 int Read256BytesBlockHeightC[], 550 int Read256BytesBlockWidthY[], 551 int Read256BytesBlockWidthC[], 552 enum odm_combine_mode ODMCombineEnabled[], 553 int BlendingAndTiming[], 554 int BytePerPixY[], 555 int BytePerPixC[], 556 double BytePerPixDETY[], 557 double BytePerPixDETC[], 558 int HActive[], 559 double HRatio[], 560 double HRatioChroma[], 561 int DPPPerPlane[], 562 int swath_width_luma_ub[], 563 int swath_width_chroma_ub[], 564 double SwathWidth[], 565 double SwathWidthChroma[], 566 int SwathHeightY[], 567 int SwathHeightC[], 568 unsigned int DETBufferSizeY[], 569 unsigned int DETBufferSizeC[], 570 bool ViewportSizeSupportPerPlane[], 571 bool *ViewportSizeSupport); 572 static void CalculateSwathWidth( 573 bool ForceSingleDPP, 574 int NumberOfActivePlanes, 575 enum source_format_class SourcePixelFormat[], 576 enum scan_direction_class SourceScan[], 577 int ViewportWidth[], 578 int ViewportHeight[], 579 int SurfaceWidthY[], 580 int SurfaceWidthC[], 581 int SurfaceHeightY[], 582 int SurfaceHeightC[], 583 enum odm_combine_mode ODMCombineEnabled[], 584 int BytePerPixY[], 585 int BytePerPixC[], 586 int Read256BytesBlockHeightY[], 587 int Read256BytesBlockHeightC[], 588 int Read256BytesBlockWidthY[], 589 int Read256BytesBlockWidthC[], 590 int BlendingAndTiming[], 591 int HActive[], 592 double HRatio[], 593 int DPPPerPlane[], 594 double SwathWidthSingleDPPY[], 595 double SwathWidthSingleDPPC[], 596 double SwathWidthY[], 597 double SwathWidthC[], 598 int MaximumSwathHeightY[], 599 int MaximumSwathHeightC[], 600 int swath_width_luma_ub[], 601 int swath_width_chroma_ub[]); 602 603 static double CalculateExtraLatency( 604 int RoundTripPingLatencyCycles, 605 int ReorderingBytes, 606 double DCFCLK, 607 int TotalNumberOfActiveDPP, 608 int PixelChunkSizeInKByte, 609 int TotalNumberOfDCCActiveDPP, 610 int MetaChunkSize, 611 double ReturnBW, 612 bool GPUVMEnable, 613 bool HostVMEnable, 614 int NumberOfActivePlanes, 615 int NumberOfDPP[], 616 int dpte_group_bytes[], 617 double HostVMInefficiencyFactor, 618 double HostVMMinPageSize, 619 int HostVMMaxNonCachedPageTableLevels); 620 621 static double CalculateExtraLatencyBytes( 622 int ReorderingBytes, 623 int TotalNumberOfActiveDPP, 624 int PixelChunkSizeInKByte, 625 int TotalNumberOfDCCActiveDPP, 626 int MetaChunkSize, 627 bool GPUVMEnable, 628 bool HostVMEnable, 629 int NumberOfActivePlanes, 630 int NumberOfDPP[], 631 int dpte_group_bytes[], 632 double HostVMInefficiencyFactor, 633 double HostVMMinPageSize, 634 int HostVMMaxNonCachedPageTableLevels); 635 636 static double CalculateUrgentLatency( 637 double UrgentLatencyPixelDataOnly, 638 double UrgentLatencyPixelMixedWithVMData, 639 double UrgentLatencyVMDataOnly, 640 bool DoUrgentLatencyAdjustment, 641 double UrgentLatencyAdjustmentFabricClockComponent, 642 double UrgentLatencyAdjustmentFabricClockReference, 643 double FabricClockSingle); 644 645 static void CalculateUnboundedRequestAndCompressedBufferSize( 646 unsigned int DETBufferSizeInKByte, 647 int ConfigReturnBufferSizeInKByte, 648 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 649 int TotalActiveDPP, 650 bool NoChromaPlanes, 651 int MaxNumDPP, 652 int CompressedBufferSegmentSizeInkByteFinal, 653 enum output_encoder_class *Output, 654 bool *UnboundedRequestEnabled, 655 int *CompressedBufferSizeInkByte); 656 657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 658 659 void dml31_recalculate(struct display_mode_lib *mode_lib) 660 { 661 ModeSupportAndSystemConfiguration(mode_lib); 662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 663 DisplayPipeConfiguration(mode_lib); 664 #ifdef __DML_VBA_DEBUG__ 665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 666 #endif 667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 668 } 669 670 static unsigned int dscceComputeDelay( 671 unsigned int bpc, 672 double BPP, 673 unsigned int sliceWidth, 674 unsigned int numSlices, 675 enum output_format_class pixelFormat, 676 enum output_encoder_class Output) 677 { 678 // valid bpc = source bits per component in the set of {8, 10, 12} 679 // valid bpp = increments of 1/16 of a bit 680 // min = 6/7/8 in N420/N422/444, respectively 681 // max = such that compression is 1:1 682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 685 686 // fixed value 687 unsigned int rcModelSize = 8192; 688 689 // N422/N420 operate at 2 pixels per clock 690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 691 692 if (pixelFormat == dm_420) 693 pixelsPerClock = 2; 694 else if (pixelFormat == dm_444) 695 pixelsPerClock = 1; 696 else if (pixelFormat == dm_n422) 697 pixelsPerClock = 2; 698 // #all other modes operate at 1 pixel per clock 699 else 700 pixelsPerClock = 1; 701 702 //initial transmit delay as per PPS 703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 704 705 //compute ssm delay 706 if (bpc == 8) 707 D = 81; 708 else if (bpc == 10) 709 D = 89; 710 else 711 D = 113; 712 713 //divide by pixel per cycle to compute slice width as seen by DSC 714 w = sliceWidth / pixelsPerClock; 715 716 //422 mode has an additional cycle of delay 717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 718 s = 0; 719 else 720 s = 1; 721 722 //main calculation for the dscce 723 ix = initalXmitDelay + 45; 724 wx = (w + 2) / 3; 725 P = 3 * wx - w; 726 l0 = ix / w; 727 a = ix + P * l0; 728 ax = (a + 2) / 3 + D + 6 + 1; 729 L = (ax + wx - 1) / wx; 730 if ((ix % w) == 0 && P != 0) 731 lstall = 1; 732 else 733 lstall = 0; 734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 735 736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 737 pixels = Delay * 3 * pixelsPerClock; 738 return pixels; 739 } 740 741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 742 { 743 unsigned int Delay = 0; 744 745 if (pixelFormat == dm_420) { 746 // sfr 747 Delay = Delay + 2; 748 // dsccif 749 Delay = Delay + 0; 750 // dscc - input deserializer 751 Delay = Delay + 3; 752 // dscc gets pixels every other cycle 753 Delay = Delay + 2; 754 // dscc - input cdc fifo 755 Delay = Delay + 12; 756 // dscc gets pixels every other cycle 757 Delay = Delay + 13; 758 // dscc - cdc uncertainty 759 Delay = Delay + 2; 760 // dscc - output cdc fifo 761 Delay = Delay + 7; 762 // dscc gets pixels every other cycle 763 Delay = Delay + 3; 764 // dscc - cdc uncertainty 765 Delay = Delay + 2; 766 // dscc - output serializer 767 Delay = Delay + 1; 768 // sft 769 Delay = Delay + 1; 770 } else if (pixelFormat == dm_n422) { 771 // sfr 772 Delay = Delay + 2; 773 // dsccif 774 Delay = Delay + 1; 775 // dscc - input deserializer 776 Delay = Delay + 5; 777 // dscc - input cdc fifo 778 Delay = Delay + 25; 779 // dscc - cdc uncertainty 780 Delay = Delay + 2; 781 // dscc - output cdc fifo 782 Delay = Delay + 10; 783 // dscc - cdc uncertainty 784 Delay = Delay + 2; 785 // dscc - output serializer 786 Delay = Delay + 1; 787 // sft 788 Delay = Delay + 1; 789 } else { 790 // sfr 791 Delay = Delay + 2; 792 // dsccif 793 Delay = Delay + 0; 794 // dscc - input deserializer 795 Delay = Delay + 3; 796 // dscc - input cdc fifo 797 Delay = Delay + 12; 798 // dscc - cdc uncertainty 799 Delay = Delay + 2; 800 // dscc - output cdc fifo 801 Delay = Delay + 7; 802 // dscc - output serializer 803 Delay = Delay + 1; 804 // dscc - cdc uncertainty 805 Delay = Delay + 2; 806 // sft 807 Delay = Delay + 1; 808 } 809 810 return Delay; 811 } 812 813 static bool CalculatePrefetchSchedule( 814 struct display_mode_lib *mode_lib, 815 double HostVMInefficiencyFactor, 816 Pipe *myPipe, 817 unsigned int DSCDelay, 818 double DPPCLKDelaySubtotalPlusCNVCFormater, 819 double DPPCLKDelaySCL, 820 double DPPCLKDelaySCLLBOnly, 821 double DPPCLKDelayCNVCCursor, 822 double DISPCLKDelaySubtotal, 823 unsigned int DPP_RECOUT_WIDTH, 824 enum output_format_class OutputFormat, 825 unsigned int MaxInterDCNTileRepeaters, 826 unsigned int VStartup, 827 unsigned int MaxVStartup, 828 unsigned int GPUVMPageTableLevels, 829 bool GPUVMEnable, 830 bool HostVMEnable, 831 unsigned int HostVMMaxNonCachedPageTableLevels, 832 double HostVMMinPageSize, 833 bool DynamicMetadataEnable, 834 bool DynamicMetadataVMEnabled, 835 int DynamicMetadataLinesBeforeActiveRequired, 836 unsigned int DynamicMetadataTransmittedBytes, 837 double UrgentLatency, 838 double UrgentExtraLatency, 839 double TCalc, 840 unsigned int PDEAndMetaPTEBytesFrame, 841 unsigned int MetaRowByte, 842 unsigned int PixelPTEBytesPerRow, 843 double PrefetchSourceLinesY, 844 unsigned int SwathWidthY, 845 double VInitPreFillY, 846 unsigned int MaxNumSwathY, 847 double PrefetchSourceLinesC, 848 unsigned int SwathWidthC, 849 double VInitPreFillC, 850 unsigned int MaxNumSwathC, 851 int swath_width_luma_ub, 852 int swath_width_chroma_ub, 853 unsigned int SwathHeightY, 854 unsigned int SwathHeightC, 855 double TWait, 856 double *DSTXAfterScaler, 857 double *DSTYAfterScaler, 858 double *DestinationLinesForPrefetch, 859 double *PrefetchBandwidth, 860 double *DestinationLinesToRequestVMInVBlank, 861 double *DestinationLinesToRequestRowInVBlank, 862 double *VRatioPrefetchY, 863 double *VRatioPrefetchC, 864 double *RequiredPrefetchPixDataBWLuma, 865 double *RequiredPrefetchPixDataBWChroma, 866 bool *NotEnoughTimeForDynamicMetadata, 867 double *Tno_bw, 868 double *prefetch_vmrow_bw, 869 double *Tdmdl_vm, 870 double *Tdmdl, 871 double *TSetup, 872 int *VUpdateOffsetPix, 873 double *VUpdateWidthPix, 874 double *VReadyOffsetPix) 875 { 876 bool MyError = false; 877 unsigned int DPPCycles, DISPCLKCycles; 878 double DSTTotalPixelsAfterScaler; 879 double LineTime; 880 double dst_y_prefetch_equ; 881 #ifdef __DML_VBA_DEBUG__ 882 double Tsw_oto; 883 #endif 884 double prefetch_bw_oto; 885 double prefetch_bw_pr; 886 double Tvm_oto; 887 double Tr0_oto; 888 double Tvm_oto_lines; 889 double Tr0_oto_lines; 890 double dst_y_prefetch_oto; 891 double TimeForFetchingMetaPTE = 0; 892 double TimeForFetchingRowInVBlank = 0; 893 double LinesToRequestPrefetchPixelData = 0; 894 unsigned int HostVMDynamicLevelsTrips; 895 double trip_to_mem; 896 double Tvm_trips; 897 double Tr0_trips; 898 double Tvm_trips_rounded; 899 double Tr0_trips_rounded; 900 double Lsw_oto; 901 double Tpre_rounded; 902 double prefetch_bw_equ; 903 double Tvm_equ; 904 double Tr0_equ; 905 double Tdmbf; 906 double Tdmec; 907 double Tdmsks; 908 double prefetch_sw_bytes; 909 double bytes_pp; 910 double dep_bytes; 911 int max_vratio_pre = 4; 912 double min_Lsw; 913 double Tsw_est1 = 0; 914 double Tsw_est3 = 0; 915 double max_Tsw = 0; 916 917 if (GPUVMEnable == true && HostVMEnable == true) { 918 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 919 } else { 920 HostVMDynamicLevelsTrips = 0; 921 } 922 #ifdef __DML_VBA_DEBUG__ 923 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 924 #endif 925 CalculateVupdateAndDynamicMetadataParameters( 926 MaxInterDCNTileRepeaters, 927 myPipe->DPPCLK, 928 myPipe->DISPCLK, 929 myPipe->DCFCLKDeepSleep, 930 myPipe->PixelClock, 931 myPipe->HTotal, 932 myPipe->VBlank, 933 DynamicMetadataTransmittedBytes, 934 DynamicMetadataLinesBeforeActiveRequired, 935 myPipe->InterlaceEnable, 936 myPipe->ProgressiveToInterlaceUnitInOPP, 937 TSetup, 938 &Tdmbf, 939 &Tdmec, 940 &Tdmsks, 941 VUpdateOffsetPix, 942 VUpdateWidthPix, 943 VReadyOffsetPix); 944 945 LineTime = myPipe->HTotal / myPipe->PixelClock; 946 trip_to_mem = UrgentLatency; 947 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 948 949 #ifdef __DML_VBA_ALLOW_DELTA__ 950 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 951 #else 952 if (DynamicMetadataVMEnabled == true) { 953 #endif 954 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 955 } else { 956 *Tdmdl = TWait + UrgentExtraLatency; 957 } 958 959 #ifdef __DML_VBA_ALLOW_DELTA__ 960 if (DynamicMetadataEnable == false) { 961 *Tdmdl = 0.0; 962 } 963 #endif 964 965 if (DynamicMetadataEnable == true) { 966 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 967 *NotEnoughTimeForDynamicMetadata = true; 968 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 969 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 970 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 971 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 972 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); 973 } else { 974 *NotEnoughTimeForDynamicMetadata = false; 975 } 976 } else { 977 *NotEnoughTimeForDynamicMetadata = false; 978 } 979 980 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 981 982 if (myPipe->ScalerEnabled) 983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 984 else 985 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 986 987 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 988 989 DISPCLKCycles = DISPCLKDelaySubtotal; 990 991 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 992 return true; 993 994 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 995 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 998 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 999 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1000 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1001 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1002 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1003 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1004 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1005 #endif 1006 1007 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1008 1009 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1010 *DSTYAfterScaler = 1; 1011 else 1012 *DSTYAfterScaler = 0; 1013 1014 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1015 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1016 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1017 1018 #ifdef __DML_VBA_DEBUG__ 1019 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1020 #endif 1021 1022 MyError = false; 1023 1024 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1025 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1026 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1027 1028 #ifdef __DML_VBA_ALLOW_DELTA__ 1029 if (!myPipe->DCCEnable) { 1030 Tr0_trips = 0.0; 1031 Tr0_trips_rounded = 0.0; 1032 } 1033 #endif 1034 1035 if (!GPUVMEnable) { 1036 Tvm_trips = 0.0; 1037 Tvm_trips_rounded = 0.0; 1038 } 1039 1040 if (GPUVMEnable) { 1041 if (GPUVMPageTableLevels >= 3) { 1042 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1043 } else { 1044 *Tno_bw = 0; 1045 } 1046 } else if (!myPipe->DCCEnable) { 1047 *Tno_bw = LineTime; 1048 } else { 1049 *Tno_bw = LineTime / 4; 1050 } 1051 1052 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1054 else 1055 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1056 /*rev 99*/ 1057 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; 1058 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; 1059 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1060 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1061 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1062 1063 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1064 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1065 #ifdef __DML_VBA_DEBUG__ 1066 Tsw_oto = Lsw_oto * LineTime; 1067 #endif 1068 1069 1070 #ifdef __DML_VBA_DEBUG__ 1071 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1072 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1073 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1074 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1075 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1076 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1077 #endif 1078 1079 if (GPUVMEnable == true) 1080 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1081 else 1082 Tvm_oto = LineTime / 4.0; 1083 1084 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1085 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1086 LineTime - Tvm_oto, 1087 LineTime / 4); 1088 } else { 1089 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1090 } 1091 1092 #ifdef __DML_VBA_DEBUG__ 1093 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1094 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1095 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1096 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1097 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1098 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1099 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1100 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1101 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1102 #endif 1103 1104 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1105 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1106 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1107 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1108 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1109 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1110 1111 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1112 1113 if (prefetch_sw_bytes < dep_bytes) 1114 prefetch_sw_bytes = 2 * dep_bytes; 1115 1116 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1117 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1118 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1119 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1120 dml_print("DML: LineTime: %f\n", LineTime); 1121 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1122 1123 dml_print("DML: LineTime: %f\n", LineTime); 1124 dml_print("DML: VStartup: %d\n", VStartup); 1125 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1126 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1127 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1128 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1129 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1130 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1131 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1132 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 1133 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 1134 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 1135 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); 1136 1137 *PrefetchBandwidth = 0; 1138 *DestinationLinesToRequestVMInVBlank = 0; 1139 *DestinationLinesToRequestRowInVBlank = 0; 1140 *VRatioPrefetchY = 0; 1141 *VRatioPrefetchC = 0; 1142 *RequiredPrefetchPixDataBWLuma = 0; 1143 if (dst_y_prefetch_equ > 1) { 1144 double PrefetchBandwidth1; 1145 double PrefetchBandwidth2; 1146 double PrefetchBandwidth3; 1147 double PrefetchBandwidth4; 1148 1149 if (Tpre_rounded - *Tno_bw > 0) { 1150 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1151 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1152 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1153 } else { 1154 PrefetchBandwidth1 = 0; 1155 } 1156 1157 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1158 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1159 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1160 } 1161 1162 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1163 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1164 else 1165 PrefetchBandwidth2 = 0; 1166 1167 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1168 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1169 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1170 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1171 } else { 1172 PrefetchBandwidth3 = 0; 1173 } 1174 1175 #ifdef __DML_VBA_DEBUG__ 1176 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1177 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1178 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1179 #endif 1180 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1181 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1182 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1183 } 1184 1185 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1186 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1187 else 1188 PrefetchBandwidth4 = 0; 1189 1190 { 1191 bool Case1OK; 1192 bool Case2OK; 1193 bool Case3OK; 1194 1195 if (PrefetchBandwidth1 > 0) { 1196 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1197 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1198 Case1OK = true; 1199 } else { 1200 Case1OK = false; 1201 } 1202 } else { 1203 Case1OK = false; 1204 } 1205 1206 if (PrefetchBandwidth2 > 0) { 1207 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1208 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1209 Case2OK = true; 1210 } else { 1211 Case2OK = false; 1212 } 1213 } else { 1214 Case2OK = false; 1215 } 1216 1217 if (PrefetchBandwidth3 > 0) { 1218 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1219 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1220 Case3OK = true; 1221 } else { 1222 Case3OK = false; 1223 } 1224 } else { 1225 Case3OK = false; 1226 } 1227 1228 if (Case1OK) { 1229 prefetch_bw_equ = PrefetchBandwidth1; 1230 } else if (Case2OK) { 1231 prefetch_bw_equ = PrefetchBandwidth2; 1232 } else if (Case3OK) { 1233 prefetch_bw_equ = PrefetchBandwidth3; 1234 } else { 1235 prefetch_bw_equ = PrefetchBandwidth4; 1236 } 1237 1238 #ifdef __DML_VBA_DEBUG__ 1239 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1240 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1241 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1242 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1243 #endif 1244 1245 if (prefetch_bw_equ > 0) { 1246 if (GPUVMEnable == true) { 1247 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1248 } else { 1249 Tvm_equ = LineTime / 4; 1250 } 1251 1252 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1253 Tr0_equ = dml_max4( 1254 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1255 Tr0_trips, 1256 (LineTime - Tvm_equ) / 2, 1257 LineTime / 4); 1258 } else { 1259 Tr0_equ = (LineTime - Tvm_equ) / 2; 1260 } 1261 } else { 1262 Tvm_equ = 0; 1263 Tr0_equ = 0; 1264 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1265 } 1266 } 1267 1268 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1269 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1270 TimeForFetchingMetaPTE = Tvm_oto; 1271 TimeForFetchingRowInVBlank = Tr0_oto; 1272 *PrefetchBandwidth = prefetch_bw_oto; 1273 } else { 1274 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1275 TimeForFetchingMetaPTE = Tvm_equ; 1276 TimeForFetchingRowInVBlank = Tr0_equ; 1277 *PrefetchBandwidth = prefetch_bw_equ; 1278 } 1279 1280 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1281 1282 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1283 1284 #ifdef __DML_VBA_ALLOW_DELTA__ 1285 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1286 // See note above dated 5/30/2018 1287 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1288 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1289 #else 1290 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1291 #endif 1292 1293 #ifdef __DML_VBA_DEBUG__ 1294 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1295 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1296 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1297 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1298 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1299 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1300 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1301 #endif 1302 1303 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1304 1305 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1306 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1307 #ifdef __DML_VBA_DEBUG__ 1308 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1309 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1310 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1311 #endif 1312 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1313 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1314 *VRatioPrefetchY = dml_max( 1315 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1316 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1317 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1318 } else { 1319 MyError = true; 1320 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1321 *VRatioPrefetchY = 0; 1322 } 1323 #ifdef __DML_VBA_DEBUG__ 1324 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1325 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1326 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1327 #endif 1328 } 1329 1330 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1331 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1332 1333 #ifdef __DML_VBA_DEBUG__ 1334 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1335 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1336 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1337 #endif 1338 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1339 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1340 *VRatioPrefetchC = dml_max( 1341 *VRatioPrefetchC, 1342 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1343 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1344 } else { 1345 MyError = true; 1346 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1347 *VRatioPrefetchC = 0; 1348 } 1349 #ifdef __DML_VBA_DEBUG__ 1350 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1351 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1352 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1353 #endif 1354 } 1355 1356 #ifdef __DML_VBA_DEBUG__ 1357 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1358 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1359 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1360 #endif 1361 1362 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1363 1364 #ifdef __DML_VBA_DEBUG__ 1365 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1366 #endif 1367 1368 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1369 / LineTime; 1370 } else { 1371 MyError = true; 1372 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1373 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1374 *VRatioPrefetchY = 0; 1375 *VRatioPrefetchC = 0; 1376 *RequiredPrefetchPixDataBWLuma = 0; 1377 *RequiredPrefetchPixDataBWChroma = 0; 1378 } 1379 1380 dml_print( 1381 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1382 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1383 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1384 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1385 dml_print( 1386 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1387 (double) LinesToRequestPrefetchPixelData * LineTime); 1388 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 1389 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / 1390 (double) myPipe->HTotal)) * LineTime); 1391 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1392 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", 1393 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1394 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1395 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1396 1397 } else { 1398 MyError = true; 1399 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1400 } 1401 1402 { 1403 double prefetch_vm_bw; 1404 double prefetch_row_bw; 1405 1406 if (PDEAndMetaPTEBytesFrame == 0) { 1407 prefetch_vm_bw = 0; 1408 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1409 #ifdef __DML_VBA_DEBUG__ 1410 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1411 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1412 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1413 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1414 #endif 1415 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1416 #ifdef __DML_VBA_DEBUG__ 1417 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1418 #endif 1419 } else { 1420 prefetch_vm_bw = 0; 1421 MyError = true; 1422 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1423 } 1424 1425 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1426 prefetch_row_bw = 0; 1427 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1428 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1429 1430 #ifdef __DML_VBA_DEBUG__ 1431 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1432 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1433 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1434 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1435 #endif 1436 } else { 1437 prefetch_row_bw = 0; 1438 MyError = true; 1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1440 } 1441 1442 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1443 } 1444 1445 if (MyError) { 1446 *PrefetchBandwidth = 0; 1447 TimeForFetchingMetaPTE = 0; 1448 TimeForFetchingRowInVBlank = 0; 1449 *DestinationLinesToRequestVMInVBlank = 0; 1450 *DestinationLinesToRequestRowInVBlank = 0; 1451 *DestinationLinesForPrefetch = 0; 1452 LinesToRequestPrefetchPixelData = 0; 1453 *VRatioPrefetchY = 0; 1454 *VRatioPrefetchC = 0; 1455 *RequiredPrefetchPixDataBWLuma = 0; 1456 *RequiredPrefetchPixDataBWChroma = 0; 1457 } 1458 1459 return MyError; 1460 } 1461 1462 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1463 { 1464 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1465 } 1466 1467 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1468 { 1469 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1470 } 1471 1472 static void CalculateDCCConfiguration( 1473 bool DCCEnabled, 1474 bool DCCProgrammingAssumesScanDirectionUnknown, 1475 enum source_format_class SourcePixelFormat, 1476 unsigned int SurfaceWidthLuma, 1477 unsigned int SurfaceWidthChroma, 1478 unsigned int SurfaceHeightLuma, 1479 unsigned int SurfaceHeightChroma, 1480 double DETBufferSize, 1481 unsigned int RequestHeight256ByteLuma, 1482 unsigned int RequestHeight256ByteChroma, 1483 enum dm_swizzle_mode TilingFormat, 1484 unsigned int BytePerPixelY, 1485 unsigned int BytePerPixelC, 1486 double BytePerPixelDETY, 1487 double BytePerPixelDETC, 1488 enum scan_direction_class ScanOrientation, 1489 unsigned int *MaxUncompressedBlockLuma, 1490 unsigned int *MaxUncompressedBlockChroma, 1491 unsigned int *MaxCompressedBlockLuma, 1492 unsigned int *MaxCompressedBlockChroma, 1493 unsigned int *IndependentBlockLuma, 1494 unsigned int *IndependentBlockChroma) 1495 { 1496 int yuv420; 1497 int horz_div_l; 1498 int horz_div_c; 1499 int vert_div_l; 1500 int vert_div_c; 1501 1502 int swath_buf_size; 1503 double detile_buf_vp_horz_limit; 1504 double detile_buf_vp_vert_limit; 1505 1506 int MAS_vp_horz_limit; 1507 int MAS_vp_vert_limit; 1508 int max_vp_horz_width; 1509 int max_vp_vert_height; 1510 int eff_surf_width_l; 1511 int eff_surf_width_c; 1512 int eff_surf_height_l; 1513 int eff_surf_height_c; 1514 1515 int full_swath_bytes_horz_wc_l; 1516 int full_swath_bytes_horz_wc_c; 1517 int full_swath_bytes_vert_wc_l; 1518 int full_swath_bytes_vert_wc_c; 1519 int req128_horz_wc_l; 1520 int req128_horz_wc_c; 1521 int req128_vert_wc_l; 1522 int req128_vert_wc_c; 1523 int segment_order_horz_contiguous_luma; 1524 int segment_order_horz_contiguous_chroma; 1525 int segment_order_vert_contiguous_luma; 1526 int segment_order_vert_contiguous_chroma; 1527 1528 typedef enum { 1529 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1530 } RequestType; 1531 RequestType RequestLuma; 1532 RequestType RequestChroma; 1533 1534 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1535 horz_div_l = 1; 1536 horz_div_c = 1; 1537 vert_div_l = 1; 1538 vert_div_c = 1; 1539 1540 if (BytePerPixelY == 1) 1541 vert_div_l = 0; 1542 if (BytePerPixelC == 1) 1543 vert_div_c = 0; 1544 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1545 horz_div_l = 0; 1546 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1547 horz_div_c = 0; 1548 1549 if (BytePerPixelC == 0) { 1550 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1551 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1552 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1553 } else { 1554 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1555 detile_buf_vp_horz_limit = (double) swath_buf_size 1556 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1557 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1558 detile_buf_vp_vert_limit = (double) swath_buf_size 1559 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1560 } 1561 1562 if (SourcePixelFormat == dm_420_10) { 1563 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1564 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1565 } 1566 1567 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1568 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1569 1570 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1571 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1572 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1573 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1574 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1575 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1576 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1577 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1578 1579 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1580 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1581 if (BytePerPixelC > 0) { 1582 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1583 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1584 } else { 1585 full_swath_bytes_horz_wc_c = 0; 1586 full_swath_bytes_vert_wc_c = 0; 1587 } 1588 1589 if (SourcePixelFormat == dm_420_10) { 1590 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1591 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1592 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1593 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1594 } 1595 1596 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1597 req128_horz_wc_l = 0; 1598 req128_horz_wc_c = 0; 1599 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1600 req128_horz_wc_l = 0; 1601 req128_horz_wc_c = 1; 1602 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1603 req128_horz_wc_l = 1; 1604 req128_horz_wc_c = 0; 1605 } else { 1606 req128_horz_wc_l = 1; 1607 req128_horz_wc_c = 1; 1608 } 1609 1610 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1611 req128_vert_wc_l = 0; 1612 req128_vert_wc_c = 0; 1613 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1614 req128_vert_wc_l = 0; 1615 req128_vert_wc_c = 1; 1616 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1617 req128_vert_wc_l = 1; 1618 req128_vert_wc_c = 0; 1619 } else { 1620 req128_vert_wc_l = 1; 1621 req128_vert_wc_c = 1; 1622 } 1623 1624 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1625 segment_order_horz_contiguous_luma = 0; 1626 } else { 1627 segment_order_horz_contiguous_luma = 1; 1628 } 1629 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1630 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1631 segment_order_vert_contiguous_luma = 0; 1632 } else { 1633 segment_order_vert_contiguous_luma = 1; 1634 } 1635 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1636 segment_order_horz_contiguous_chroma = 0; 1637 } else { 1638 segment_order_horz_contiguous_chroma = 1; 1639 } 1640 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1641 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1642 segment_order_vert_contiguous_chroma = 0; 1643 } else { 1644 segment_order_vert_contiguous_chroma = 1; 1645 } 1646 1647 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1648 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1649 RequestLuma = REQ_256Bytes; 1650 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1651 RequestLuma = REQ_128BytesNonContiguous; 1652 } else { 1653 RequestLuma = REQ_128BytesContiguous; 1654 } 1655 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1656 RequestChroma = REQ_256Bytes; 1657 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1658 RequestChroma = REQ_128BytesNonContiguous; 1659 } else { 1660 RequestChroma = REQ_128BytesContiguous; 1661 } 1662 } else if (ScanOrientation != dm_vert) { 1663 if (req128_horz_wc_l == 0) { 1664 RequestLuma = REQ_256Bytes; 1665 } else if (segment_order_horz_contiguous_luma == 0) { 1666 RequestLuma = REQ_128BytesNonContiguous; 1667 } else { 1668 RequestLuma = REQ_128BytesContiguous; 1669 } 1670 if (req128_horz_wc_c == 0) { 1671 RequestChroma = REQ_256Bytes; 1672 } else if (segment_order_horz_contiguous_chroma == 0) { 1673 RequestChroma = REQ_128BytesNonContiguous; 1674 } else { 1675 RequestChroma = REQ_128BytesContiguous; 1676 } 1677 } else { 1678 if (req128_vert_wc_l == 0) { 1679 RequestLuma = REQ_256Bytes; 1680 } else if (segment_order_vert_contiguous_luma == 0) { 1681 RequestLuma = REQ_128BytesNonContiguous; 1682 } else { 1683 RequestLuma = REQ_128BytesContiguous; 1684 } 1685 if (req128_vert_wc_c == 0) { 1686 RequestChroma = REQ_256Bytes; 1687 } else if (segment_order_vert_contiguous_chroma == 0) { 1688 RequestChroma = REQ_128BytesNonContiguous; 1689 } else { 1690 RequestChroma = REQ_128BytesContiguous; 1691 } 1692 } 1693 1694 if (RequestLuma == REQ_256Bytes) { 1695 *MaxUncompressedBlockLuma = 256; 1696 *MaxCompressedBlockLuma = 256; 1697 *IndependentBlockLuma = 0; 1698 } else if (RequestLuma == REQ_128BytesContiguous) { 1699 *MaxUncompressedBlockLuma = 256; 1700 *MaxCompressedBlockLuma = 128; 1701 *IndependentBlockLuma = 128; 1702 } else { 1703 *MaxUncompressedBlockLuma = 256; 1704 *MaxCompressedBlockLuma = 64; 1705 *IndependentBlockLuma = 64; 1706 } 1707 1708 if (RequestChroma == REQ_256Bytes) { 1709 *MaxUncompressedBlockChroma = 256; 1710 *MaxCompressedBlockChroma = 256; 1711 *IndependentBlockChroma = 0; 1712 } else if (RequestChroma == REQ_128BytesContiguous) { 1713 *MaxUncompressedBlockChroma = 256; 1714 *MaxCompressedBlockChroma = 128; 1715 *IndependentBlockChroma = 128; 1716 } else { 1717 *MaxUncompressedBlockChroma = 256; 1718 *MaxCompressedBlockChroma = 64; 1719 *IndependentBlockChroma = 64; 1720 } 1721 1722 if (DCCEnabled != true || BytePerPixelC == 0) { 1723 *MaxUncompressedBlockChroma = 0; 1724 *MaxCompressedBlockChroma = 0; 1725 *IndependentBlockChroma = 0; 1726 } 1727 1728 if (DCCEnabled != true) { 1729 *MaxUncompressedBlockLuma = 0; 1730 *MaxCompressedBlockLuma = 0; 1731 *IndependentBlockLuma = 0; 1732 } 1733 } 1734 1735 static double CalculatePrefetchSourceLines( 1736 struct display_mode_lib *mode_lib, 1737 double VRatio, 1738 double vtaps, 1739 bool Interlace, 1740 bool ProgressiveToInterlaceUnitInOPP, 1741 unsigned int SwathHeight, 1742 unsigned int ViewportYStart, 1743 double *VInitPreFill, 1744 unsigned int *MaxNumSwath) 1745 { 1746 struct vba_vars_st *v = &mode_lib->vba; 1747 unsigned int MaxPartialSwath; 1748 1749 if (ProgressiveToInterlaceUnitInOPP) 1750 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1751 else 1752 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1753 1754 if (!v->IgnoreViewportPositioning) { 1755 1756 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1757 1758 if (*VInitPreFill > 1.0) 1759 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1760 else 1761 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1762 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1763 1764 } else { 1765 1766 if (ViewportYStart != 0) 1767 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1768 1769 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1770 1771 if (*VInitPreFill > 1.0) 1772 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1773 else 1774 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1775 } 1776 1777 #ifdef __DML_VBA_DEBUG__ 1778 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1779 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1780 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1781 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1782 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1783 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1784 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1785 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1786 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1787 #endif 1788 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1789 } 1790 1791 static unsigned int CalculateVMAndRowBytes( 1792 struct display_mode_lib *mode_lib, 1793 bool DCCEnable, 1794 unsigned int BlockHeight256Bytes, 1795 unsigned int BlockWidth256Bytes, 1796 enum source_format_class SourcePixelFormat, 1797 unsigned int SurfaceTiling, 1798 unsigned int BytePerPixel, 1799 enum scan_direction_class ScanDirection, 1800 unsigned int SwathWidth, 1801 unsigned int ViewportHeight, 1802 bool GPUVMEnable, 1803 bool HostVMEnable, 1804 unsigned int HostVMMaxNonCachedPageTableLevels, 1805 unsigned int GPUVMMinPageSize, 1806 unsigned int HostVMMinPageSize, 1807 unsigned int PTEBufferSizeInRequests, 1808 unsigned int Pitch, 1809 unsigned int DCCMetaPitch, 1810 unsigned int *MacroTileWidth, 1811 unsigned int *MetaRowByte, 1812 unsigned int *PixelPTEBytesPerRow, 1813 bool *PTEBufferSizeNotExceeded, 1814 int *dpte_row_width_ub, 1815 unsigned int *dpte_row_height, 1816 unsigned int *MetaRequestWidth, 1817 unsigned int *MetaRequestHeight, 1818 unsigned int *meta_row_width, 1819 unsigned int *meta_row_height, 1820 int *vm_group_bytes, 1821 unsigned int *dpte_group_bytes, 1822 unsigned int *PixelPTEReqWidth, 1823 unsigned int *PixelPTEReqHeight, 1824 unsigned int *PTERequestSize, 1825 int *DPDE0BytesFrame, 1826 int *MetaPTEBytesFrame) 1827 { 1828 struct vba_vars_st *v = &mode_lib->vba; 1829 unsigned int MPDEBytesFrame; 1830 unsigned int DCCMetaSurfaceBytes; 1831 unsigned int MacroTileSizeBytes; 1832 unsigned int MacroTileHeight; 1833 unsigned int ExtraDPDEBytesFrame; 1834 unsigned int PDEAndMetaPTEBytesFrame; 1835 unsigned int PixelPTEReqHeightPTEs = 0; 1836 unsigned int HostVMDynamicLevels = 0; 1837 double FractionOfPTEReturnDrop; 1838 1839 if (GPUVMEnable == true && HostVMEnable == true) { 1840 if (HostVMMinPageSize < 2048) { 1841 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1842 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1843 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1844 } else { 1845 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1846 } 1847 } 1848 1849 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1850 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1851 if (ScanDirection != dm_vert) { 1852 *meta_row_height = *MetaRequestHeight; 1853 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1854 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1855 } else { 1856 *meta_row_height = *MetaRequestWidth; 1857 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1858 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1859 } 1860 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1861 if (GPUVMEnable == true) { 1862 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1863 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1864 } else { 1865 *MetaPTEBytesFrame = 0; 1866 MPDEBytesFrame = 0; 1867 } 1868 1869 if (DCCEnable != true) { 1870 *MetaPTEBytesFrame = 0; 1871 MPDEBytesFrame = 0; 1872 *MetaRowByte = 0; 1873 } 1874 1875 if (SurfaceTiling == dm_sw_linear) { 1876 MacroTileSizeBytes = 256; 1877 MacroTileHeight = BlockHeight256Bytes; 1878 } else { 1879 MacroTileSizeBytes = 65536; 1880 MacroTileHeight = 16 * BlockHeight256Bytes; 1881 } 1882 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1883 1884 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1885 if (ScanDirection != dm_vert) { 1886 *DPDE0BytesFrame = 64 1887 * (dml_ceil( 1888 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1889 / (8 * 2097152), 1890 1) + 1); 1891 } else { 1892 *DPDE0BytesFrame = 64 1893 * (dml_ceil( 1894 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1895 / (8 * 2097152), 1896 1) + 1); 1897 } 1898 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1899 } else { 1900 *DPDE0BytesFrame = 0; 1901 ExtraDPDEBytesFrame = 0; 1902 } 1903 1904 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1905 1906 #ifdef __DML_VBA_DEBUG__ 1907 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1908 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1909 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1910 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1911 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1912 #endif 1913 1914 if (HostVMEnable == true) { 1915 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1916 } 1917 #ifdef __DML_VBA_DEBUG__ 1918 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1919 #endif 1920 1921 if (SurfaceTiling == dm_sw_linear) { 1922 PixelPTEReqHeightPTEs = 1; 1923 *PixelPTEReqHeight = 1; 1924 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1925 *PTERequestSize = 64; 1926 FractionOfPTEReturnDrop = 0; 1927 } else if (MacroTileSizeBytes == 4096) { 1928 PixelPTEReqHeightPTEs = 1; 1929 *PixelPTEReqHeight = MacroTileHeight; 1930 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1931 *PTERequestSize = 64; 1932 if (ScanDirection != dm_vert) 1933 FractionOfPTEReturnDrop = 0; 1934 else 1935 FractionOfPTEReturnDrop = 7 / 8; 1936 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1937 PixelPTEReqHeightPTEs = 16; 1938 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1939 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1940 *PTERequestSize = 128; 1941 FractionOfPTEReturnDrop = 0; 1942 } else { 1943 PixelPTEReqHeightPTEs = 1; 1944 *PixelPTEReqHeight = MacroTileHeight; 1945 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1946 *PTERequestSize = 64; 1947 FractionOfPTEReturnDrop = 0; 1948 } 1949 1950 if (SurfaceTiling == dm_sw_linear) { 1951 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 1952 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1953 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1954 } else if (ScanDirection != dm_vert) { 1955 *dpte_row_height = *PixelPTEReqHeight; 1956 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1957 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1958 } else { 1959 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1960 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1961 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1962 } 1963 1964 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 1965 *PTEBufferSizeNotExceeded = true; 1966 } else { 1967 *PTEBufferSizeNotExceeded = false; 1968 } 1969 1970 if (GPUVMEnable != true) { 1971 *PixelPTEBytesPerRow = 0; 1972 *PTEBufferSizeNotExceeded = true; 1973 } 1974 1975 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 1976 1977 if (HostVMEnable == true) { 1978 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 1979 } 1980 1981 if (HostVMEnable == true) { 1982 *vm_group_bytes = 512; 1983 *dpte_group_bytes = 512; 1984 } else if (GPUVMEnable == true) { 1985 *vm_group_bytes = 2048; 1986 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 1987 *dpte_group_bytes = 512; 1988 } else { 1989 *dpte_group_bytes = 2048; 1990 } 1991 } else { 1992 *vm_group_bytes = 0; 1993 *dpte_group_bytes = 0; 1994 } 1995 return PDEAndMetaPTEBytesFrame; 1996 } 1997 1998 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 1999 { 2000 struct vba_vars_st *v = &mode_lib->vba; 2001 unsigned int j, k; 2002 double HostVMInefficiencyFactor = 1.0; 2003 bool NoChromaPlanes = true; 2004 int ReorderBytes; 2005 double VMDataOnlyReturnBW; 2006 double MaxTotalRDBandwidth = 0; 2007 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2008 2009 v->WritebackDISPCLK = 0.0; 2010 v->DISPCLKWithRamping = 0; 2011 v->DISPCLKWithoutRamping = 0; 2012 v->GlobalDPPCLK = 0.0; 2013 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ 2014 { 2015 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2016 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2017 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2018 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2019 if (v->HostVMEnable != true) { 2020 v->ReturnBW = dml_min( 2021 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2022 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2023 } else { 2024 v->ReturnBW = dml_min( 2025 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2026 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2027 } 2028 } 2029 /* End DAL custom code */ 2030 2031 // DISPCLK and DPPCLK Calculation 2032 // 2033 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2034 if (v->WritebackEnable[k]) { 2035 v->WritebackDISPCLK = dml_max( 2036 v->WritebackDISPCLK, 2037 dml31_CalculateWriteBackDISPCLK( 2038 v->WritebackPixelFormat[k], 2039 v->PixelClock[k], 2040 v->WritebackHRatio[k], 2041 v->WritebackVRatio[k], 2042 v->WritebackHTaps[k], 2043 v->WritebackVTaps[k], 2044 v->WritebackSourceWidth[k], 2045 v->WritebackDestinationWidth[k], 2046 v->HTotal[k], 2047 v->WritebackLineBufferSize)); 2048 } 2049 } 2050 2051 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2052 if (v->HRatio[k] > 1) { 2053 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2054 v->MaxDCHUBToPSCLThroughput, 2055 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2056 } else { 2057 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2058 } 2059 2060 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2061 * dml_max( 2062 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2063 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2064 2065 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2066 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2067 } 2068 2069 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2070 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2071 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2072 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2073 } else { 2074 if (v->HRatioChroma[k] > 1) { 2075 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2076 v->MaxDCHUBToPSCLThroughput, 2077 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2078 } else { 2079 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2080 } 2081 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2082 * dml_max3( 2083 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2084 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2085 1.0); 2086 2087 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2088 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2089 } 2090 2091 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2092 } 2093 } 2094 2095 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2096 if (v->BlendingAndTiming[k] != k) 2097 continue; 2098 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2099 v->DISPCLKWithRamping = dml_max( 2100 v->DISPCLKWithRamping, 2101 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2102 * (1 + v->DISPCLKRampingMargin / 100)); 2103 v->DISPCLKWithoutRamping = dml_max( 2104 v->DISPCLKWithoutRamping, 2105 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2106 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2107 v->DISPCLKWithRamping = dml_max( 2108 v->DISPCLKWithRamping, 2109 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2110 * (1 + v->DISPCLKRampingMargin / 100)); 2111 v->DISPCLKWithoutRamping = dml_max( 2112 v->DISPCLKWithoutRamping, 2113 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2114 } else { 2115 v->DISPCLKWithRamping = dml_max( 2116 v->DISPCLKWithRamping, 2117 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2118 v->DISPCLKWithoutRamping = dml_max( 2119 v->DISPCLKWithoutRamping, 2120 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2121 } 2122 } 2123 2124 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2125 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2126 2127 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2128 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2129 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2130 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2131 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2132 v->DISPCLKDPPCLKVCOSpeed); 2133 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2134 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2135 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2136 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2137 } else { 2138 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2139 } 2140 v->DISPCLK = v->DISPCLK_calculated; 2141 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2142 2143 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2144 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2145 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2146 } 2147 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2148 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2149 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2150 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2151 } 2152 2153 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2154 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2155 } 2156 2157 // Urgent and B P-State/DRAM Clock Change Watermark 2158 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2159 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2160 2161 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2162 dml30_CalculateBytePerPixelAnd256BBlockSizes( 2163 v->SourcePixelFormat[k], 2164 v->SurfaceTiling[k], 2165 &v->BytePerPixelY[k], 2166 &v->BytePerPixelC[k], 2167 &v->BytePerPixelDETY[k], 2168 &v->BytePerPixelDETC[k], 2169 &v->BlockHeight256BytesY[k], 2170 &v->BlockHeight256BytesC[k], 2171 &v->BlockWidth256BytesY[k], 2172 &v->BlockWidth256BytesC[k]); 2173 } 2174 2175 CalculateSwathWidth( 2176 false, 2177 v->NumberOfActivePlanes, 2178 v->SourcePixelFormat, 2179 v->SourceScan, 2180 v->ViewportWidth, 2181 v->ViewportHeight, 2182 v->SurfaceWidthY, 2183 v->SurfaceWidthC, 2184 v->SurfaceHeightY, 2185 v->SurfaceHeightC, 2186 v->ODMCombineEnabled, 2187 v->BytePerPixelY, 2188 v->BytePerPixelC, 2189 v->BlockHeight256BytesY, 2190 v->BlockHeight256BytesC, 2191 v->BlockWidth256BytesY, 2192 v->BlockWidth256BytesC, 2193 v->BlendingAndTiming, 2194 v->HActive, 2195 v->HRatio, 2196 v->DPPPerPlane, 2197 v->SwathWidthSingleDPPY, 2198 v->SwathWidthSingleDPPC, 2199 v->SwathWidthY, 2200 v->SwathWidthC, 2201 v->dummyinteger3, 2202 v->dummyinteger4, 2203 v->swath_width_luma_ub, 2204 v->swath_width_chroma_ub); 2205 2206 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2207 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2208 * v->VRatio[k]; 2209 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2210 * v->VRatioChroma[k]; 2211 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2212 } 2213 2214 // DCFCLK Deep Sleep 2215 CalculateDCFCLKDeepSleep( 2216 mode_lib, 2217 v->NumberOfActivePlanes, 2218 v->BytePerPixelY, 2219 v->BytePerPixelC, 2220 v->VRatio, 2221 v->VRatioChroma, 2222 v->SwathWidthY, 2223 v->SwathWidthC, 2224 v->DPPPerPlane, 2225 v->HRatio, 2226 v->HRatioChroma, 2227 v->PixelClock, 2228 v->PSCL_THROUGHPUT_LUMA, 2229 v->PSCL_THROUGHPUT_CHROMA, 2230 v->DPPCLK, 2231 v->ReadBandwidthPlaneLuma, 2232 v->ReadBandwidthPlaneChroma, 2233 v->ReturnBusWidth, 2234 &v->DCFCLKDeepSleep); 2235 2236 // DSCCLK 2237 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2238 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2239 v->DSCCLK_calculated[k] = 0.0; 2240 } else { 2241 if (v->OutputFormat[k] == dm_420) 2242 v->DSCFormatFactor = 2; 2243 else if (v->OutputFormat[k] == dm_444) 2244 v->DSCFormatFactor = 1; 2245 else if (v->OutputFormat[k] == dm_n422) 2246 v->DSCFormatFactor = 2; 2247 else 2248 v->DSCFormatFactor = 1; 2249 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2250 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2251 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2252 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2253 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2254 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2255 else 2256 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2257 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2258 } 2259 } 2260 2261 // DSC Delay 2262 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2263 double BPP = v->OutputBpp[k]; 2264 2265 if (v->DSCEnabled[k] && BPP != 0) { 2266 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2267 v->DSCDelay[k] = dscceComputeDelay( 2268 v->DSCInputBitPerComponent[k], 2269 BPP, 2270 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2271 v->NumberOfDSCSlices[k], 2272 v->OutputFormat[k], 2273 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2274 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2275 v->DSCDelay[k] = 2 2276 * (dscceComputeDelay( 2277 v->DSCInputBitPerComponent[k], 2278 BPP, 2279 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2280 v->NumberOfDSCSlices[k] / 2.0, 2281 v->OutputFormat[k], 2282 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2283 } else { 2284 v->DSCDelay[k] = 4 2285 * (dscceComputeDelay( 2286 v->DSCInputBitPerComponent[k], 2287 BPP, 2288 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2289 v->NumberOfDSCSlices[k] / 4.0, 2290 v->OutputFormat[k], 2291 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2292 } 2293 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2294 } else { 2295 v->DSCDelay[k] = 0; 2296 } 2297 } 2298 2299 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2300 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2301 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2302 v->DSCDelay[k] = v->DSCDelay[j]; 2303 2304 // Prefetch 2305 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2306 unsigned int PDEAndMetaPTEBytesFrameY; 2307 unsigned int PixelPTEBytesPerRowY; 2308 unsigned int MetaRowByteY; 2309 unsigned int MetaRowByteC; 2310 unsigned int PDEAndMetaPTEBytesFrameC; 2311 unsigned int PixelPTEBytesPerRowC; 2312 bool PTEBufferSizeNotExceededY; 2313 bool PTEBufferSizeNotExceededC; 2314 2315 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2316 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2317 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2318 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2319 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2320 } else { 2321 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2322 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2323 } 2324 2325 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2326 mode_lib, 2327 v->DCCEnable[k], 2328 v->BlockHeight256BytesC[k], 2329 v->BlockWidth256BytesC[k], 2330 v->SourcePixelFormat[k], 2331 v->SurfaceTiling[k], 2332 v->BytePerPixelC[k], 2333 v->SourceScan[k], 2334 v->SwathWidthC[k], 2335 v->ViewportHeightChroma[k], 2336 v->GPUVMEnable, 2337 v->HostVMEnable, 2338 v->HostVMMaxNonCachedPageTableLevels, 2339 v->GPUVMMinPageSize, 2340 v->HostVMMinPageSize, 2341 v->PTEBufferSizeInRequestsForChroma, 2342 v->PitchC[k], 2343 v->DCCMetaPitchC[k], 2344 &v->MacroTileWidthC[k], 2345 &MetaRowByteC, 2346 &PixelPTEBytesPerRowC, 2347 &PTEBufferSizeNotExceededC, 2348 &v->dpte_row_width_chroma_ub[k], 2349 &v->dpte_row_height_chroma[k], 2350 &v->meta_req_width_chroma[k], 2351 &v->meta_req_height_chroma[k], 2352 &v->meta_row_width_chroma[k], 2353 &v->meta_row_height_chroma[k], 2354 &v->dummyinteger1, 2355 &v->dummyinteger2, 2356 &v->PixelPTEReqWidthC[k], 2357 &v->PixelPTEReqHeightC[k], 2358 &v->PTERequestSizeC[k], 2359 &v->dpde0_bytes_per_frame_ub_c[k], 2360 &v->meta_pte_bytes_per_frame_ub_c[k]); 2361 2362 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2363 mode_lib, 2364 v->VRatioChroma[k], 2365 v->VTAPsChroma[k], 2366 v->Interlace[k], 2367 v->ProgressiveToInterlaceUnitInOPP, 2368 v->SwathHeightC[k], 2369 v->ViewportYStartC[k], 2370 &v->VInitPreFillC[k], 2371 &v->MaxNumSwathC[k]); 2372 } else { 2373 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2374 v->PTEBufferSizeInRequestsForChroma = 0; 2375 PixelPTEBytesPerRowC = 0; 2376 PDEAndMetaPTEBytesFrameC = 0; 2377 MetaRowByteC = 0; 2378 v->MaxNumSwathC[k] = 0; 2379 v->PrefetchSourceLinesC[k] = 0; 2380 } 2381 2382 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2383 mode_lib, 2384 v->DCCEnable[k], 2385 v->BlockHeight256BytesY[k], 2386 v->BlockWidth256BytesY[k], 2387 v->SourcePixelFormat[k], 2388 v->SurfaceTiling[k], 2389 v->BytePerPixelY[k], 2390 v->SourceScan[k], 2391 v->SwathWidthY[k], 2392 v->ViewportHeight[k], 2393 v->GPUVMEnable, 2394 v->HostVMEnable, 2395 v->HostVMMaxNonCachedPageTableLevels, 2396 v->GPUVMMinPageSize, 2397 v->HostVMMinPageSize, 2398 v->PTEBufferSizeInRequestsForLuma, 2399 v->PitchY[k], 2400 v->DCCMetaPitchY[k], 2401 &v->MacroTileWidthY[k], 2402 &MetaRowByteY, 2403 &PixelPTEBytesPerRowY, 2404 &PTEBufferSizeNotExceededY, 2405 &v->dpte_row_width_luma_ub[k], 2406 &v->dpte_row_height[k], 2407 &v->meta_req_width[k], 2408 &v->meta_req_height[k], 2409 &v->meta_row_width[k], 2410 &v->meta_row_height[k], 2411 &v->vm_group_bytes[k], 2412 &v->dpte_group_bytes[k], 2413 &v->PixelPTEReqWidthY[k], 2414 &v->PixelPTEReqHeightY[k], 2415 &v->PTERequestSizeY[k], 2416 &v->dpde0_bytes_per_frame_ub_l[k], 2417 &v->meta_pte_bytes_per_frame_ub_l[k]); 2418 2419 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2420 mode_lib, 2421 v->VRatio[k], 2422 v->vtaps[k], 2423 v->Interlace[k], 2424 v->ProgressiveToInterlaceUnitInOPP, 2425 v->SwathHeightY[k], 2426 v->ViewportYStartY[k], 2427 &v->VInitPreFillY[k], 2428 &v->MaxNumSwathY[k]); 2429 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2430 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2431 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2432 2433 CalculateRowBandwidth( 2434 v->GPUVMEnable, 2435 v->SourcePixelFormat[k], 2436 v->VRatio[k], 2437 v->VRatioChroma[k], 2438 v->DCCEnable[k], 2439 v->HTotal[k] / v->PixelClock[k], 2440 MetaRowByteY, 2441 MetaRowByteC, 2442 v->meta_row_height[k], 2443 v->meta_row_height_chroma[k], 2444 PixelPTEBytesPerRowY, 2445 PixelPTEBytesPerRowC, 2446 v->dpte_row_height[k], 2447 v->dpte_row_height_chroma[k], 2448 &v->meta_row_bw[k], 2449 &v->dpte_row_bw[k]); 2450 } 2451 2452 v->TotalDCCActiveDPP = 0; 2453 v->TotalActiveDPP = 0; 2454 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2455 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2456 if (v->DCCEnable[k]) 2457 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2458 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2459 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2460 NoChromaPlanes = false; 2461 } 2462 2463 ReorderBytes = v->NumberOfChannels 2464 * dml_max3( 2465 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2466 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2467 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2468 2469 VMDataOnlyReturnBW = dml_min( 2470 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2471 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2472 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2473 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2474 2475 #ifdef __DML_VBA_DEBUG__ 2476 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2477 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2478 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2479 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2480 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2481 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2482 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2483 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2484 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2485 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2486 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2487 #endif 2488 2489 if (v->GPUVMEnable && v->HostVMEnable) 2490 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2491 2492 v->UrgentExtraLatency = CalculateExtraLatency( 2493 v->RoundTripPingLatencyCycles, 2494 ReorderBytes, 2495 v->DCFCLK, 2496 v->TotalActiveDPP, 2497 v->PixelChunkSizeInKByte, 2498 v->TotalDCCActiveDPP, 2499 v->MetaChunkSize, 2500 v->ReturnBW, 2501 v->GPUVMEnable, 2502 v->HostVMEnable, 2503 v->NumberOfActivePlanes, 2504 v->DPPPerPlane, 2505 v->dpte_group_bytes, 2506 HostVMInefficiencyFactor, 2507 v->HostVMMinPageSize, 2508 v->HostVMMaxNonCachedPageTableLevels); 2509 2510 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2511 2512 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2513 if (v->BlendingAndTiming[k] == k) { 2514 if (v->WritebackEnable[k] == true) { 2515 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2516 + CalculateWriteBackDelay( 2517 v->WritebackPixelFormat[k], 2518 v->WritebackHRatio[k], 2519 v->WritebackVRatio[k], 2520 v->WritebackVTaps[k], 2521 v->WritebackDestinationWidth[k], 2522 v->WritebackDestinationHeight[k], 2523 v->WritebackSourceHeight[k], 2524 v->HTotal[k]) / v->DISPCLK; 2525 } else 2526 v->WritebackDelay[v->VoltageLevel][k] = 0; 2527 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2528 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2529 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2530 v->WritebackDelay[v->VoltageLevel][k], 2531 v->WritebackLatency 2532 + CalculateWriteBackDelay( 2533 v->WritebackPixelFormat[j], 2534 v->WritebackHRatio[j], 2535 v->WritebackVRatio[j], 2536 v->WritebackVTaps[j], 2537 v->WritebackDestinationWidth[j], 2538 v->WritebackDestinationHeight[j], 2539 v->WritebackSourceHeight[j], 2540 v->HTotal[k]) / v->DISPCLK); 2541 } 2542 } 2543 } 2544 } 2545 2546 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2547 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2548 if (v->BlendingAndTiming[k] == j) 2549 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2550 2551 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2552 v->MaxVStartupLines[k] = 2553 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 2554 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 2555 v->VTotal[k] - v->VActive[k] 2556 - dml_max( 2557 1.0, 2558 dml_ceil( 2559 (double) v->WritebackDelay[v->VoltageLevel][k] 2560 / (v->HTotal[k] / v->PixelClock[k]), 2561 1)); 2562 if (v->MaxVStartupLines[k] > 1023) 2563 v->MaxVStartupLines[k] = 1023; 2564 2565 #ifdef __DML_VBA_DEBUG__ 2566 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2567 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2568 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2569 #endif 2570 } 2571 2572 v->MaximumMaxVStartupLines = 0; 2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2574 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2575 2576 // VBA_DELTA 2577 // We don't really care to iterate between the various prefetch modes 2578 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2579 2580 v->UrgentLatency = CalculateUrgentLatency( 2581 v->UrgentLatencyPixelDataOnly, 2582 v->UrgentLatencyPixelMixedWithVMData, 2583 v->UrgentLatencyVMDataOnly, 2584 v->DoUrgentLatencyAdjustment, 2585 v->UrgentLatencyAdjustmentFabricClockComponent, 2586 v->UrgentLatencyAdjustmentFabricClockReference, 2587 v->FabricClock); 2588 2589 v->FractionOfUrgentBandwidth = 0.0; 2590 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2591 2592 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2593 2594 do { 2595 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2596 bool DestinationLineTimesForPrefetchLessThan2 = false; 2597 bool VRatioPrefetchMoreThan4 = false; 2598 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2599 MaxTotalRDBandwidth = 0; 2600 2601 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2602 2603 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2604 Pipe myPipe; 2605 2606 myPipe.DPPCLK = v->DPPCLK[k]; 2607 myPipe.DISPCLK = v->DISPCLK; 2608 myPipe.PixelClock = v->PixelClock[k]; 2609 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2610 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2611 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2612 myPipe.VRatio = v->VRatio[k]; 2613 myPipe.VRatioChroma = v->VRatioChroma[k]; 2614 myPipe.SourceScan = v->SourceScan[k]; 2615 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2616 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2617 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2618 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2619 myPipe.InterlaceEnable = v->Interlace[k]; 2620 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2621 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2622 myPipe.HTotal = v->HTotal[k]; 2623 myPipe.DCCEnable = v->DCCEnable[k]; 2624 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2625 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2626 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2627 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2628 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2629 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2630 v->ErrorResult[k] = CalculatePrefetchSchedule( 2631 mode_lib, 2632 HostVMInefficiencyFactor, 2633 &myPipe, 2634 v->DSCDelay[k], 2635 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2636 v->DPPCLKDelaySCL, 2637 v->DPPCLKDelaySCLLBOnly, 2638 v->DPPCLKDelayCNVCCursor, 2639 v->DISPCLKDelaySubtotal, 2640 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2641 v->OutputFormat[k], 2642 v->MaxInterDCNTileRepeaters, 2643 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2644 v->MaxVStartupLines[k], 2645 v->GPUVMMaxPageTableLevels, 2646 v->GPUVMEnable, 2647 v->HostVMEnable, 2648 v->HostVMMaxNonCachedPageTableLevels, 2649 v->HostVMMinPageSize, 2650 v->DynamicMetadataEnable[k], 2651 v->DynamicMetadataVMEnabled, 2652 v->DynamicMetadataLinesBeforeActiveRequired[k], 2653 v->DynamicMetadataTransmittedBytes[k], 2654 v->UrgentLatency, 2655 v->UrgentExtraLatency, 2656 v->TCalc, 2657 v->PDEAndMetaPTEBytesFrame[k], 2658 v->MetaRowByte[k], 2659 v->PixelPTEBytesPerRow[k], 2660 v->PrefetchSourceLinesY[k], 2661 v->SwathWidthY[k], 2662 v->VInitPreFillY[k], 2663 v->MaxNumSwathY[k], 2664 v->PrefetchSourceLinesC[k], 2665 v->SwathWidthC[k], 2666 v->VInitPreFillC[k], 2667 v->MaxNumSwathC[k], 2668 v->swath_width_luma_ub[k], 2669 v->swath_width_chroma_ub[k], 2670 v->SwathHeightY[k], 2671 v->SwathHeightC[k], 2672 TWait, 2673 &v->DSTXAfterScaler[k], 2674 &v->DSTYAfterScaler[k], 2675 &v->DestinationLinesForPrefetch[k], 2676 &v->PrefetchBandwidth[k], 2677 &v->DestinationLinesToRequestVMInVBlank[k], 2678 &v->DestinationLinesToRequestRowInVBlank[k], 2679 &v->VRatioPrefetchY[k], 2680 &v->VRatioPrefetchC[k], 2681 &v->RequiredPrefetchPixDataBWLuma[k], 2682 &v->RequiredPrefetchPixDataBWChroma[k], 2683 &v->NotEnoughTimeForDynamicMetadata[k], 2684 &v->Tno_bw[k], 2685 &v->prefetch_vmrow_bw[k], 2686 &v->Tdmdl_vm[k], 2687 &v->Tdmdl[k], 2688 &v->TSetup[k], 2689 &v->VUpdateOffsetPix[k], 2690 &v->VUpdateWidthPix[k], 2691 &v->VReadyOffsetPix[k]); 2692 2693 #ifdef __DML_VBA_DEBUG__ 2694 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2695 #endif 2696 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2697 } 2698 2699 v->NoEnoughUrgentLatencyHiding = false; 2700 v->NoEnoughUrgentLatencyHidingPre = false; 2701 2702 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2703 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2704 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2705 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2706 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2707 2708 CalculateUrgentBurstFactor( 2709 v->swath_width_luma_ub[k], 2710 v->swath_width_chroma_ub[k], 2711 v->SwathHeightY[k], 2712 v->SwathHeightC[k], 2713 v->HTotal[k] / v->PixelClock[k], 2714 v->UrgentLatency, 2715 v->CursorBufferSize, 2716 v->CursorWidth[k][0], 2717 v->CursorBPP[k][0], 2718 v->VRatio[k], 2719 v->VRatioChroma[k], 2720 v->BytePerPixelDETY[k], 2721 v->BytePerPixelDETC[k], 2722 v->DETBufferSizeY[k], 2723 v->DETBufferSizeC[k], 2724 &v->UrgBurstFactorCursor[k], 2725 &v->UrgBurstFactorLuma[k], 2726 &v->UrgBurstFactorChroma[k], 2727 &v->NoUrgentLatencyHiding[k]); 2728 2729 CalculateUrgentBurstFactor( 2730 v->swath_width_luma_ub[k], 2731 v->swath_width_chroma_ub[k], 2732 v->SwathHeightY[k], 2733 v->SwathHeightC[k], 2734 v->HTotal[k] / v->PixelClock[k], 2735 v->UrgentLatency, 2736 v->CursorBufferSize, 2737 v->CursorWidth[k][0], 2738 v->CursorBPP[k][0], 2739 v->VRatioPrefetchY[k], 2740 v->VRatioPrefetchC[k], 2741 v->BytePerPixelDETY[k], 2742 v->BytePerPixelDETC[k], 2743 v->DETBufferSizeY[k], 2744 v->DETBufferSizeC[k], 2745 &v->UrgBurstFactorCursorPre[k], 2746 &v->UrgBurstFactorLumaPre[k], 2747 &v->UrgBurstFactorChromaPre[k], 2748 &v->NoUrgentLatencyHidingPre[k]); 2749 2750 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2751 + dml_max3( 2752 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2753 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2754 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2755 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2756 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2757 v->DPPPerPlane[k] 2758 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2759 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2760 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2761 2762 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2763 + dml_max3( 2764 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2765 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2766 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2767 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2768 + v->cursor_bw_pre[k]); 2769 2770 #ifdef __DML_VBA_DEBUG__ 2771 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2772 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2773 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2774 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2775 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2776 2777 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2778 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2779 2780 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2781 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2782 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2783 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2784 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2785 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2786 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2787 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2788 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2789 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2790 #endif 2791 2792 if (v->DestinationLinesForPrefetch[k] < 2) 2793 DestinationLineTimesForPrefetchLessThan2 = true; 2794 2795 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2796 VRatioPrefetchMoreThan4 = true; 2797 2798 if (v->NoUrgentLatencyHiding[k] == true) 2799 v->NoEnoughUrgentLatencyHiding = true; 2800 2801 if (v->NoUrgentLatencyHidingPre[k] == true) 2802 v->NoEnoughUrgentLatencyHidingPre = true; 2803 } 2804 2805 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2806 2807 #ifdef __DML_VBA_DEBUG__ 2808 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2809 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); 2810 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); 2811 #endif 2812 2813 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2814 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2815 v->PrefetchModeSupported = true; 2816 else { 2817 v->PrefetchModeSupported = false; 2818 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2819 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2820 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2821 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2822 } 2823 2824 // PREVIOUS_ERROR 2825 // This error result check was done after the PrefetchModeSupported. So we will 2826 // still try to calculate flip schedule even prefetch mode not supported 2827 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2828 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2829 v->PrefetchModeSupported = false; 2830 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2831 } 2832 } 2833 2834 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2835 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2836 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2837 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2838 - dml_max( 2839 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2840 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2841 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2842 v->DPPPerPlane[k] 2843 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2844 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2845 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2846 } 2847 2848 v->TotImmediateFlipBytes = 0; 2849 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2850 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2851 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2852 } 2853 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2854 CalculateFlipSchedule( 2855 mode_lib, 2856 k, 2857 HostVMInefficiencyFactor, 2858 v->UrgentExtraLatency, 2859 v->UrgentLatency, 2860 v->PDEAndMetaPTEBytesFrame[k], 2861 v->MetaRowByte[k], 2862 v->PixelPTEBytesPerRow[k]); 2863 } 2864 2865 v->total_dcn_read_bw_with_flip = 0.0; 2866 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2867 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2868 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2869 + dml_max3( 2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2871 v->DPPPerPlane[k] * v->final_flip_bw[k] 2872 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2873 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2874 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2875 v->DPPPerPlane[k] 2876 * (v->final_flip_bw[k] 2877 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2878 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2879 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2880 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2881 + dml_max3( 2882 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2883 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2884 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2885 v->DPPPerPlane[k] 2886 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2887 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2888 } 2889 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2890 2891 v->ImmediateFlipSupported = true; 2892 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2893 #ifdef __DML_VBA_DEBUG__ 2894 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2895 #endif 2896 v->ImmediateFlipSupported = false; 2897 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2898 } 2899 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2900 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2901 #ifdef __DML_VBA_DEBUG__ 2902 dml_print("DML::%s: Pipe %0d not supporting iflip\n", 2903 __func__, k); 2904 #endif 2905 v->ImmediateFlipSupported = false; 2906 } 2907 } 2908 } else { 2909 v->ImmediateFlipSupported = false; 2910 } 2911 2912 v->PrefetchAndImmediateFlipSupported = 2913 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2914 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2915 v->ImmediateFlipSupported)) ? true : false; 2916 #ifdef __DML_VBA_DEBUG__ 2917 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2918 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required); 2919 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 2920 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 2921 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 2922 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 2923 #endif 2924 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 2925 2926 v->VStartupLines = v->VStartupLines + 1; 2927 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 2928 ASSERT(v->PrefetchAndImmediateFlipSupported); 2929 2930 // Unbounded Request Enabled 2931 CalculateUnboundedRequestAndCompressedBufferSize( 2932 v->DETBufferSizeInKByte[0], 2933 v->ConfigReturnBufferSizeInKByte, 2934 v->UseUnboundedRequesting, 2935 v->TotalActiveDPP, 2936 NoChromaPlanes, 2937 v->MaxNumDPP, 2938 v->CompressedBufferSegmentSizeInkByte, 2939 v->Output, 2940 &v->UnboundedRequestEnabled, 2941 &v->CompressedBufferSizeInkByte); 2942 2943 //Watermarks and NB P-State/DRAM Clock Change Support 2944 { 2945 enum clock_change_support DRAMClockChangeSupport; // dummy 2946 CalculateWatermarksAndDRAMSpeedChangeSupport( 2947 mode_lib, 2948 PrefetchMode, 2949 v->DCFCLK, 2950 v->ReturnBW, 2951 v->UrgentLatency, 2952 v->UrgentExtraLatency, 2953 v->SOCCLK, 2954 v->DCFCLKDeepSleep, 2955 v->DETBufferSizeY, 2956 v->DETBufferSizeC, 2957 v->SwathHeightY, 2958 v->SwathHeightC, 2959 v->SwathWidthY, 2960 v->SwathWidthC, 2961 v->DPPPerPlane, 2962 v->BytePerPixelDETY, 2963 v->BytePerPixelDETC, 2964 v->UnboundedRequestEnabled, 2965 v->CompressedBufferSizeInkByte, 2966 &DRAMClockChangeSupport, 2967 &v->StutterExitWatermark, 2968 &v->StutterEnterPlusExitWatermark, 2969 &v->Z8StutterExitWatermark, 2970 &v->Z8StutterEnterPlusExitWatermark); 2971 2972 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2973 if (v->WritebackEnable[k] == true) { 2974 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 2975 0, 2976 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 2977 } else { 2978 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 2979 } 2980 } 2981 } 2982 2983 //Display Pipeline Delivery Time in Prefetch, Groups 2984 CalculatePixelDeliveryTimes( 2985 v->NumberOfActivePlanes, 2986 v->VRatio, 2987 v->VRatioChroma, 2988 v->VRatioPrefetchY, 2989 v->VRatioPrefetchC, 2990 v->swath_width_luma_ub, 2991 v->swath_width_chroma_ub, 2992 v->DPPPerPlane, 2993 v->HRatio, 2994 v->HRatioChroma, 2995 v->PixelClock, 2996 v->PSCL_THROUGHPUT_LUMA, 2997 v->PSCL_THROUGHPUT_CHROMA, 2998 v->DPPCLK, 2999 v->BytePerPixelC, 3000 v->SourceScan, 3001 v->NumberOfCursors, 3002 v->CursorWidth, 3003 v->CursorBPP, 3004 v->BlockWidth256BytesY, 3005 v->BlockHeight256BytesY, 3006 v->BlockWidth256BytesC, 3007 v->BlockHeight256BytesC, 3008 v->DisplayPipeLineDeliveryTimeLuma, 3009 v->DisplayPipeLineDeliveryTimeChroma, 3010 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3011 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3012 v->DisplayPipeRequestDeliveryTimeLuma, 3013 v->DisplayPipeRequestDeliveryTimeChroma, 3014 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3015 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3016 v->CursorRequestDeliveryTime, 3017 v->CursorRequestDeliveryTimePrefetch); 3018 3019 CalculateMetaAndPTETimes( 3020 v->NumberOfActivePlanes, 3021 v->GPUVMEnable, 3022 v->MetaChunkSize, 3023 v->MinMetaChunkSizeBytes, 3024 v->HTotal, 3025 v->VRatio, 3026 v->VRatioChroma, 3027 v->DestinationLinesToRequestRowInVBlank, 3028 v->DestinationLinesToRequestRowInImmediateFlip, 3029 v->DCCEnable, 3030 v->PixelClock, 3031 v->BytePerPixelY, 3032 v->BytePerPixelC, 3033 v->SourceScan, 3034 v->dpte_row_height, 3035 v->dpte_row_height_chroma, 3036 v->meta_row_width, 3037 v->meta_row_width_chroma, 3038 v->meta_row_height, 3039 v->meta_row_height_chroma, 3040 v->meta_req_width, 3041 v->meta_req_width_chroma, 3042 v->meta_req_height, 3043 v->meta_req_height_chroma, 3044 v->dpte_group_bytes, 3045 v->PTERequestSizeY, 3046 v->PTERequestSizeC, 3047 v->PixelPTEReqWidthY, 3048 v->PixelPTEReqHeightY, 3049 v->PixelPTEReqWidthC, 3050 v->PixelPTEReqHeightC, 3051 v->dpte_row_width_luma_ub, 3052 v->dpte_row_width_chroma_ub, 3053 v->DST_Y_PER_PTE_ROW_NOM_L, 3054 v->DST_Y_PER_PTE_ROW_NOM_C, 3055 v->DST_Y_PER_META_ROW_NOM_L, 3056 v->DST_Y_PER_META_ROW_NOM_C, 3057 v->TimePerMetaChunkNominal, 3058 v->TimePerChromaMetaChunkNominal, 3059 v->TimePerMetaChunkVBlank, 3060 v->TimePerChromaMetaChunkVBlank, 3061 v->TimePerMetaChunkFlip, 3062 v->TimePerChromaMetaChunkFlip, 3063 v->time_per_pte_group_nom_luma, 3064 v->time_per_pte_group_vblank_luma, 3065 v->time_per_pte_group_flip_luma, 3066 v->time_per_pte_group_nom_chroma, 3067 v->time_per_pte_group_vblank_chroma, 3068 v->time_per_pte_group_flip_chroma); 3069 3070 CalculateVMGroupAndRequestTimes( 3071 v->NumberOfActivePlanes, 3072 v->GPUVMEnable, 3073 v->GPUVMMaxPageTableLevels, 3074 v->HTotal, 3075 v->BytePerPixelC, 3076 v->DestinationLinesToRequestVMInVBlank, 3077 v->DestinationLinesToRequestVMInImmediateFlip, 3078 v->DCCEnable, 3079 v->PixelClock, 3080 v->dpte_row_width_luma_ub, 3081 v->dpte_row_width_chroma_ub, 3082 v->vm_group_bytes, 3083 v->dpde0_bytes_per_frame_ub_l, 3084 v->dpde0_bytes_per_frame_ub_c, 3085 v->meta_pte_bytes_per_frame_ub_l, 3086 v->meta_pte_bytes_per_frame_ub_c, 3087 v->TimePerVMGroupVBlank, 3088 v->TimePerVMGroupFlip, 3089 v->TimePerVMRequestVBlank, 3090 v->TimePerVMRequestFlip); 3091 3092 // Min TTUVBlank 3093 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3094 if (PrefetchMode == 0) { 3095 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3096 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3097 v->MinTTUVBlank[k] = dml_max( 3098 v->DRAMClockChangeWatermark, 3099 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3100 } else if (PrefetchMode == 1) { 3101 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3102 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3103 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3104 } else { 3105 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3106 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3107 v->MinTTUVBlank[k] = v->UrgentWatermark; 3108 } 3109 if (!v->DynamicMetadataEnable[k]) 3110 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3111 } 3112 3113 // DCC Configuration 3114 v->ActiveDPPs = 0; 3115 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3116 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3117 v->SourcePixelFormat[k], 3118 v->SurfaceWidthY[k], 3119 v->SurfaceWidthC[k], 3120 v->SurfaceHeightY[k], 3121 v->SurfaceHeightC[k], 3122 v->DETBufferSizeInKByte[0] * 1024, 3123 v->BlockHeight256BytesY[k], 3124 v->BlockHeight256BytesC[k], 3125 v->SurfaceTiling[k], 3126 v->BytePerPixelY[k], 3127 v->BytePerPixelC[k], 3128 v->BytePerPixelDETY[k], 3129 v->BytePerPixelDETC[k], 3130 v->SourceScan[k], 3131 &v->DCCYMaxUncompressedBlock[k], 3132 &v->DCCCMaxUncompressedBlock[k], 3133 &v->DCCYMaxCompressedBlock[k], 3134 &v->DCCCMaxCompressedBlock[k], 3135 &v->DCCYIndependentBlock[k], 3136 &v->DCCCIndependentBlock[k]); 3137 } 3138 3139 // VStartup Adjustment 3140 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3141 bool isInterlaceTiming; 3142 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3143 #ifdef __DML_VBA_DEBUG__ 3144 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3145 #endif 3146 3147 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3148 3149 #ifdef __DML_VBA_DEBUG__ 3150 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3151 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3152 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3153 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3154 #endif 3155 3156 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3157 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3158 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3159 } 3160 3161 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3162 3163 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) 3164 - v->VFrontPorch[k]) 3165 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) 3166 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3167 3168 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3169 3170 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3171 <= (isInterlaceTiming ? 3172 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3173 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3174 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3175 } else { 3176 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3177 } 3178 #ifdef __DML_VBA_DEBUG__ 3179 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3180 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3181 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3182 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3183 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3184 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3185 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3186 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3187 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3188 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3189 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3190 #endif 3191 } 3192 3193 { 3194 //Maximum Bandwidth Used 3195 double TotalWRBandwidth = 0; 3196 double MaxPerPlaneVActiveWRBandwidth = 0; 3197 double WRBandwidth = 0; 3198 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3199 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3200 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3201 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3202 } else if (v->WritebackEnable[k] == true) { 3203 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3204 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3205 } 3206 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3207 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3208 } 3209 3210 v->TotalDataReadBandwidth = 0; 3211 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3212 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3213 } 3214 } 3215 // Stutter Efficiency 3216 CalculateStutterEfficiency( 3217 mode_lib, 3218 v->CompressedBufferSizeInkByte, 3219 v->UnboundedRequestEnabled, 3220 v->ConfigReturnBufferSizeInKByte, 3221 v->MetaFIFOSizeInKEntries, 3222 v->ZeroSizeBufferEntries, 3223 v->NumberOfActivePlanes, 3224 v->ROBBufferSizeInKByte, 3225 v->TotalDataReadBandwidth, 3226 v->DCFCLK, 3227 v->ReturnBW, 3228 v->COMPBUF_RESERVED_SPACE_64B, 3229 v->COMPBUF_RESERVED_SPACE_ZS, 3230 v->SRExitTime, 3231 v->SRExitZ8Time, 3232 v->SynchronizedVBlank, 3233 v->StutterEnterPlusExitWatermark, 3234 v->Z8StutterEnterPlusExitWatermark, 3235 v->ProgressiveToInterlaceUnitInOPP, 3236 v->Interlace, 3237 v->MinTTUVBlank, 3238 v->DPPPerPlane, 3239 v->DETBufferSizeY, 3240 v->BytePerPixelY, 3241 v->BytePerPixelDETY, 3242 v->SwathWidthY, 3243 v->SwathHeightY, 3244 v->SwathHeightC, 3245 v->DCCRateLuma, 3246 v->DCCRateChroma, 3247 v->DCCFractionOfZeroSizeRequestsLuma, 3248 v->DCCFractionOfZeroSizeRequestsChroma, 3249 v->HTotal, 3250 v->VTotal, 3251 v->PixelClock, 3252 v->VRatio, 3253 v->SourceScan, 3254 v->BlockHeight256BytesY, 3255 v->BlockWidth256BytesY, 3256 v->BlockHeight256BytesC, 3257 v->BlockWidth256BytesC, 3258 v->DCCYMaxUncompressedBlock, 3259 v->DCCCMaxUncompressedBlock, 3260 v->VActive, 3261 v->DCCEnable, 3262 v->WritebackEnable, 3263 v->ReadBandwidthPlaneLuma, 3264 v->ReadBandwidthPlaneChroma, 3265 v->meta_row_bw, 3266 v->dpte_row_bw, 3267 &v->StutterEfficiencyNotIncludingVBlank, 3268 &v->StutterEfficiency, 3269 &v->NumberOfStutterBurstsPerFrame, 3270 &v->Z8StutterEfficiencyNotIncludingVBlank, 3271 &v->Z8StutterEfficiency, 3272 &v->Z8NumberOfStutterBurstsPerFrame, 3273 &v->StutterPeriod); 3274 } 3275 3276 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3277 { 3278 struct vba_vars_st *v = &mode_lib->vba; 3279 // Display Pipe Configuration 3280 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3281 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3282 int BytePerPixY[DC__NUM_DPP__MAX]; 3283 int BytePerPixC[DC__NUM_DPP__MAX]; 3284 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3285 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3286 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3287 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3288 double dummy1[DC__NUM_DPP__MAX]; 3289 double dummy2[DC__NUM_DPP__MAX]; 3290 double dummy3[DC__NUM_DPP__MAX]; 3291 double dummy4[DC__NUM_DPP__MAX]; 3292 int dummy5[DC__NUM_DPP__MAX]; 3293 int dummy6[DC__NUM_DPP__MAX]; 3294 bool dummy7[DC__NUM_DPP__MAX]; 3295 bool dummysinglestring; 3296 3297 unsigned int k; 3298 3299 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3300 3301 dml30_CalculateBytePerPixelAnd256BBlockSizes( 3302 v->SourcePixelFormat[k], 3303 v->SurfaceTiling[k], 3304 &BytePerPixY[k], 3305 &BytePerPixC[k], 3306 &BytePerPixDETY[k], 3307 &BytePerPixDETC[k], 3308 &Read256BytesBlockHeightY[k], 3309 &Read256BytesBlockHeightC[k], 3310 &Read256BytesBlockWidthY[k], 3311 &Read256BytesBlockWidthC[k]); 3312 } 3313 3314 CalculateSwathAndDETConfiguration( 3315 false, 3316 v->NumberOfActivePlanes, 3317 v->DETBufferSizeInKByte[0], 3318 dummy1, 3319 dummy2, 3320 v->SourceScan, 3321 v->SourcePixelFormat, 3322 v->SurfaceTiling, 3323 v->ViewportWidth, 3324 v->ViewportHeight, 3325 v->SurfaceWidthY, 3326 v->SurfaceWidthC, 3327 v->SurfaceHeightY, 3328 v->SurfaceHeightC, 3329 Read256BytesBlockHeightY, 3330 Read256BytesBlockHeightC, 3331 Read256BytesBlockWidthY, 3332 Read256BytesBlockWidthC, 3333 v->ODMCombineEnabled, 3334 v->BlendingAndTiming, 3335 BytePerPixY, 3336 BytePerPixC, 3337 BytePerPixDETY, 3338 BytePerPixDETC, 3339 v->HActive, 3340 v->HRatio, 3341 v->HRatioChroma, 3342 v->DPPPerPlane, 3343 dummy5, 3344 dummy6, 3345 dummy3, 3346 dummy4, 3347 v->SwathHeightY, 3348 v->SwathHeightC, 3349 v->DETBufferSizeY, 3350 v->DETBufferSizeC, 3351 dummy7, 3352 &dummysinglestring); 3353 } 3354 3355 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3356 { 3357 if (PrefetchMode == 0) { 3358 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3359 } else if (PrefetchMode == 1) { 3360 return dml_max(SREnterPlusExitTime, UrgentLatency); 3361 } else { 3362 return UrgentLatency; 3363 } 3364 } 3365 3366 double dml31_CalculateWriteBackDISPCLK( 3367 enum source_format_class WritebackPixelFormat, 3368 double PixelClock, 3369 double WritebackHRatio, 3370 double WritebackVRatio, 3371 unsigned int WritebackHTaps, 3372 unsigned int WritebackVTaps, 3373 long WritebackSourceWidth, 3374 long WritebackDestinationWidth, 3375 unsigned int HTotal, 3376 unsigned int WritebackLineBufferSize) 3377 { 3378 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3379 3380 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3381 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3382 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3383 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3384 } 3385 3386 static double CalculateWriteBackDelay( 3387 enum source_format_class WritebackPixelFormat, 3388 double WritebackHRatio, 3389 double WritebackVRatio, 3390 unsigned int WritebackVTaps, 3391 int WritebackDestinationWidth, 3392 int WritebackDestinationHeight, 3393 int WritebackSourceHeight, 3394 unsigned int HTotal) 3395 { 3396 double CalculateWriteBackDelay; 3397 double Line_length; 3398 double Output_lines_last_notclamped; 3399 double WritebackVInit; 3400 3401 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3402 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3403 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3404 if (Output_lines_last_notclamped < 0) { 3405 CalculateWriteBackDelay = 0; 3406 } else { 3407 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3408 } 3409 return CalculateWriteBackDelay; 3410 } 3411 3412 static void CalculateVupdateAndDynamicMetadataParameters( 3413 int MaxInterDCNTileRepeaters, 3414 double DPPCLK, 3415 double DISPCLK, 3416 double DCFClkDeepSleep, 3417 double PixelClock, 3418 int HTotal, 3419 int VBlank, 3420 int DynamicMetadataTransmittedBytes, 3421 int DynamicMetadataLinesBeforeActiveRequired, 3422 int InterlaceEnable, 3423 bool ProgressiveToInterlaceUnitInOPP, 3424 double *TSetup, 3425 double *Tdmbf, 3426 double *Tdmec, 3427 double *Tdmsks, 3428 int *VUpdateOffsetPix, 3429 double *VUpdateWidthPix, 3430 double *VReadyOffsetPix) 3431 { 3432 double TotalRepeaterDelayTime; 3433 3434 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3435 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3436 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3437 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3438 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3439 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3440 *Tdmec = HTotal / PixelClock; 3441 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3442 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3443 } else { 3444 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3445 } 3446 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3447 *Tdmsks = *Tdmsks / 2; 3448 } 3449 #ifdef __DML_VBA_DEBUG__ 3450 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3451 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3452 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3453 #endif 3454 } 3455 3456 static void CalculateRowBandwidth( 3457 bool GPUVMEnable, 3458 enum source_format_class SourcePixelFormat, 3459 double VRatio, 3460 double VRatioChroma, 3461 bool DCCEnable, 3462 double LineTime, 3463 unsigned int MetaRowByteLuma, 3464 unsigned int MetaRowByteChroma, 3465 unsigned int meta_row_height_luma, 3466 unsigned int meta_row_height_chroma, 3467 unsigned int PixelPTEBytesPerRowLuma, 3468 unsigned int PixelPTEBytesPerRowChroma, 3469 unsigned int dpte_row_height_luma, 3470 unsigned int dpte_row_height_chroma, 3471 double *meta_row_bw, 3472 double *dpte_row_bw) 3473 { 3474 if (DCCEnable != true) { 3475 *meta_row_bw = 0; 3476 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3477 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3478 } else { 3479 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3480 } 3481 3482 if (GPUVMEnable != true) { 3483 *dpte_row_bw = 0; 3484 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3485 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3486 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3487 } else { 3488 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3489 } 3490 } 3491 3492 static void CalculateFlipSchedule( 3493 struct display_mode_lib *mode_lib, 3494 unsigned int k, 3495 double HostVMInefficiencyFactor, 3496 double UrgentExtraLatency, 3497 double UrgentLatency, 3498 double PDEAndMetaPTEBytesPerFrame, 3499 double MetaRowBytes, 3500 double DPTEBytesPerRow) 3501 { 3502 struct vba_vars_st *v = &mode_lib->vba; 3503 double min_row_time = 0.0; 3504 unsigned int HostVMDynamicLevelsTrips; 3505 double TimeForFetchingMetaPTEImmediateFlip; 3506 double TimeForFetchingRowInVBlankImmediateFlip; 3507 double ImmediateFlipBW; 3508 double LineTime = v->HTotal[k] / v->PixelClock[k]; 3509 3510 if (v->GPUVMEnable == true && v->HostVMEnable == true) { 3511 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3512 } else { 3513 HostVMDynamicLevelsTrips = 0; 3514 } 3515 3516 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { 3517 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; 3518 } 3519 3520 if (v->GPUVMEnable == true) { 3521 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3522 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3523 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3524 LineTime / 4.0); 3525 } else { 3526 TimeForFetchingMetaPTEImmediateFlip = 0; 3527 } 3528 3529 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3530 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3531 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3532 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3533 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3534 LineTime / 4); 3535 } else { 3536 TimeForFetchingRowInVBlankImmediateFlip = 0; 3537 } 3538 3539 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3540 3541 if (v->GPUVMEnable == true) { 3542 v->final_flip_bw[k] = dml_max( 3543 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), 3544 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); 3545 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3546 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); 3547 } else { 3548 v->final_flip_bw[k] = 0; 3549 } 3550 3551 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 3552 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3553 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3554 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3555 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3556 } else { 3557 min_row_time = dml_min4( 3558 v->dpte_row_height[k] * LineTime / v->VRatio[k], 3559 v->meta_row_height[k] * LineTime / v->VRatio[k], 3560 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], 3561 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3562 } 3563 } else { 3564 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3565 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; 3566 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3567 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; 3568 } else { 3569 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); 3570 } 3571 } 3572 3573 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 3574 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3575 v->ImmediateFlipSupportedForPipe[k] = false; 3576 } else { 3577 v->ImmediateFlipSupportedForPipe[k] = true; 3578 } 3579 3580 #ifdef __DML_VBA_DEBUG__ 3581 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); 3582 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); 3583 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3584 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3585 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3586 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); 3587 #endif 3588 3589 } 3590 3591 static double TruncToValidBPP( 3592 double LinkBitRate, 3593 int Lanes, 3594 int HTotal, 3595 int HActive, 3596 double PixelClock, 3597 double DesiredBPP, 3598 bool DSCEnable, 3599 enum output_encoder_class Output, 3600 enum output_format_class Format, 3601 unsigned int DSCInputBitPerComponent, 3602 int DSCSlices, 3603 int AudioRate, 3604 int AudioLayout, 3605 enum odm_combine_mode ODMCombine) 3606 { 3607 double MaxLinkBPP; 3608 int MinDSCBPP; 3609 double MaxDSCBPP; 3610 int NonDSCBPP0; 3611 int NonDSCBPP1; 3612 int NonDSCBPP2; 3613 3614 if (Format == dm_420) { 3615 NonDSCBPP0 = 12; 3616 NonDSCBPP1 = 15; 3617 NonDSCBPP2 = 18; 3618 MinDSCBPP = 6; 3619 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3620 } else if (Format == dm_444) { 3621 NonDSCBPP0 = 24; 3622 NonDSCBPP1 = 30; 3623 NonDSCBPP2 = 36; 3624 MinDSCBPP = 8; 3625 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3626 } else { 3627 3628 NonDSCBPP0 = 16; 3629 NonDSCBPP1 = 20; 3630 NonDSCBPP2 = 24; 3631 3632 if (Format == dm_n422) { 3633 MinDSCBPP = 7; 3634 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3635 } else { 3636 MinDSCBPP = 8; 3637 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3638 } 3639 } 3640 3641 if (DSCEnable && Output == dm_dp) { 3642 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3643 } else { 3644 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3645 } 3646 3647 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3648 MaxLinkBPP = 16; 3649 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3650 MaxLinkBPP = 32; 3651 } 3652 3653 if (DesiredBPP == 0) { 3654 if (DSCEnable) { 3655 if (MaxLinkBPP < MinDSCBPP) { 3656 return BPP_INVALID; 3657 } else if (MaxLinkBPP >= MaxDSCBPP) { 3658 return MaxDSCBPP; 3659 } else { 3660 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3661 } 3662 } else { 3663 if (MaxLinkBPP >= NonDSCBPP2) { 3664 return NonDSCBPP2; 3665 } else if (MaxLinkBPP >= NonDSCBPP1) { 3666 return NonDSCBPP1; 3667 } else if (MaxLinkBPP >= NonDSCBPP0) { 3668 return 16.0; 3669 } else { 3670 return BPP_INVALID; 3671 } 3672 } 3673 } else { 3674 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3675 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3676 return BPP_INVALID; 3677 } else { 3678 return DesiredBPP; 3679 } 3680 } 3681 return BPP_INVALID; 3682 } 3683 3684 static noinline void CalculatePrefetchSchedulePerPlane( 3685 struct display_mode_lib *mode_lib, 3686 double HostVMInefficiencyFactor, 3687 int i, 3688 unsigned j, 3689 unsigned k) 3690 { 3691 struct vba_vars_st *v = &mode_lib->vba; 3692 Pipe myPipe; 3693 3694 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3695 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3696 myPipe.PixelClock = v->PixelClock[k]; 3697 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3698 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3699 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3700 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3701 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3702 3703 myPipe.SourceScan = v->SourceScan[k]; 3704 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3705 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3706 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3707 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3708 myPipe.InterlaceEnable = v->Interlace[k]; 3709 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3710 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3711 myPipe.HTotal = v->HTotal[k]; 3712 myPipe.DCCEnable = v->DCCEnable[k]; 3713 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3714 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3715 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3716 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3717 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3718 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3719 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3720 mode_lib, 3721 HostVMInefficiencyFactor, 3722 &myPipe, 3723 v->DSCDelayPerState[i][k], 3724 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3725 v->DPPCLKDelaySCL, 3726 v->DPPCLKDelaySCLLBOnly, 3727 v->DPPCLKDelayCNVCCursor, 3728 v->DISPCLKDelaySubtotal, 3729 v->SwathWidthYThisState[k] / v->HRatio[k], 3730 v->OutputFormat[k], 3731 v->MaxInterDCNTileRepeaters, 3732 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3733 v->MaximumVStartup[i][j][k], 3734 v->GPUVMMaxPageTableLevels, 3735 v->GPUVMEnable, 3736 v->HostVMEnable, 3737 v->HostVMMaxNonCachedPageTableLevels, 3738 v->HostVMMinPageSize, 3739 v->DynamicMetadataEnable[k], 3740 v->DynamicMetadataVMEnabled, 3741 v->DynamicMetadataLinesBeforeActiveRequired[k], 3742 v->DynamicMetadataTransmittedBytes[k], 3743 v->UrgLatency[i], 3744 v->ExtraLatency, 3745 v->TimeCalc, 3746 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3747 v->MetaRowBytes[i][j][k], 3748 v->DPTEBytesPerRow[i][j][k], 3749 v->PrefetchLinesY[i][j][k], 3750 v->SwathWidthYThisState[k], 3751 v->PrefillY[k], 3752 v->MaxNumSwY[k], 3753 v->PrefetchLinesC[i][j][k], 3754 v->SwathWidthCThisState[k], 3755 v->PrefillC[k], 3756 v->MaxNumSwC[k], 3757 v->swath_width_luma_ub_this_state[k], 3758 v->swath_width_chroma_ub_this_state[k], 3759 v->SwathHeightYThisState[k], 3760 v->SwathHeightCThisState[k], 3761 v->TWait, 3762 &v->DSTXAfterScaler[k], 3763 &v->DSTYAfterScaler[k], 3764 &v->LineTimesForPrefetch[k], 3765 &v->PrefetchBW[k], 3766 &v->LinesForMetaPTE[k], 3767 &v->LinesForMetaAndDPTERow[k], 3768 &v->VRatioPreY[i][j][k], 3769 &v->VRatioPreC[i][j][k], 3770 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3771 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3772 &v->NoTimeForDynamicMetadata[i][j][k], 3773 &v->Tno_bw[k], 3774 &v->prefetch_vmrow_bw[k], 3775 &v->dummy7[k], 3776 &v->dummy8[k], 3777 &v->dummy13[k], 3778 &v->VUpdateOffsetPix[k], 3779 &v->VUpdateWidthPix[k], 3780 &v->VReadyOffsetPix[k]); 3781 } 3782 3783 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte) 3784 { 3785 int i, total_pipes = 0; 3786 for (i = 0; i < NumberOfActivePlanes; i++) 3787 total_pipes += NoOfDPPThisState[i]; 3788 *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64; 3789 if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE) 3790 *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE; 3791 } 3792 3793 3794 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3795 { 3796 struct vba_vars_st *v = &mode_lib->vba; 3797 3798 int i, j; 3799 unsigned int k, m; 3800 int ReorderingBytes; 3801 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3802 bool NoChroma = true; 3803 bool EnoughWritebackUnits = true; 3804 bool P2IWith420 = false; 3805 bool DSCOnlyIfNecessaryWithBPP = false; 3806 bool DSC422NativeNotSupported = false; 3807 double MaxTotalVActiveRDBandwidth; 3808 bool ViewportExceedsSurface = false; 3809 bool FMTBufferExceeded = false; 3810 3811 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3812 3813 CalculateMinAndMaxPrefetchMode( 3814 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3815 &MinPrefetchMode, &MaxPrefetchMode); 3816 3817 /*Scale Ratio, taps Support Check*/ 3818 3819 v->ScaleRatioAndTapsSupport = true; 3820 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3821 if (v->ScalerEnabled[k] == false 3822 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3823 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3824 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3825 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3826 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3827 v->ScaleRatioAndTapsSupport = false; 3828 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3829 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3830 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3831 || v->VRatio[k] > v->vtaps[k] 3832 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3833 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3834 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3835 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3836 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3837 || v->HRatioChroma[k] > v->MaxHSCLRatio 3838 || v->VRatioChroma[k] > v->MaxVSCLRatio 3839 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3840 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3841 v->ScaleRatioAndTapsSupport = false; 3842 } 3843 } 3844 /*Source Format, Pixel Format and Scan Support Check*/ 3845 3846 v->SourceFormatPixelAndScanSupport = true; 3847 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3848 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 3849 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 3850 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 3851 v->SourceFormatPixelAndScanSupport = false; 3852 } 3853 } 3854 /*Bandwidth Support Check*/ 3855 3856 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3857 dml30_CalculateBytePerPixelAnd256BBlockSizes( 3858 v->SourcePixelFormat[k], 3859 v->SurfaceTiling[k], 3860 &v->BytePerPixelY[k], 3861 &v->BytePerPixelC[k], 3862 &v->BytePerPixelInDETY[k], 3863 &v->BytePerPixelInDETC[k], 3864 &v->Read256BlockHeightY[k], 3865 &v->Read256BlockHeightC[k], 3866 &v->Read256BlockWidthY[k], 3867 &v->Read256BlockWidthC[k]); 3868 } 3869 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3870 if (v->SourceScan[k] != dm_vert) { 3871 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 3872 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 3873 } else { 3874 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 3875 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 3876 } 3877 } 3878 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3879 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 3880 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 3881 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 3882 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 3883 } 3884 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3885 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 3886 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3887 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 3888 } else if (v->WritebackEnable[k] == true) { 3889 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3890 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 3891 } else { 3892 v->WriteBandwidth[k] = 0.0; 3893 } 3894 } 3895 3896 /*Writeback Latency support check*/ 3897 3898 v->WritebackLatencySupport = true; 3899 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3900 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 3901 v->WritebackLatencySupport = false; 3902 } 3903 } 3904 3905 /*Writeback Mode Support Check*/ 3906 3907 v->TotalNumberOfActiveWriteback = 0; 3908 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3909 if (v->WritebackEnable[k] == true) { 3910 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 3911 } 3912 } 3913 3914 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 3915 EnoughWritebackUnits = false; 3916 } 3917 3918 /*Writeback Scale Ratio and Taps Support Check*/ 3919 3920 v->WritebackScaleRatioAndTapsSupport = true; 3921 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3922 if (v->WritebackEnable[k] == true) { 3923 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 3924 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 3925 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 3926 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 3927 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 3928 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 3929 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 3930 v->WritebackScaleRatioAndTapsSupport = false; 3931 } 3932 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 3933 v->WritebackScaleRatioAndTapsSupport = false; 3934 } 3935 } 3936 } 3937 /*Maximum DISPCLK/DPPCLK Support check*/ 3938 3939 v->WritebackRequiredDISPCLK = 0.0; 3940 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3941 if (v->WritebackEnable[k] == true) { 3942 v->WritebackRequiredDISPCLK = dml_max( 3943 v->WritebackRequiredDISPCLK, 3944 dml31_CalculateWriteBackDISPCLK( 3945 v->WritebackPixelFormat[k], 3946 v->PixelClock[k], 3947 v->WritebackHRatio[k], 3948 v->WritebackVRatio[k], 3949 v->WritebackHTaps[k], 3950 v->WritebackVTaps[k], 3951 v->WritebackSourceWidth[k], 3952 v->WritebackDestinationWidth[k], 3953 v->HTotal[k], 3954 v->WritebackLineBufferSize)); 3955 } 3956 } 3957 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3958 if (v->HRatio[k] > 1.0) { 3959 v->PSCL_FACTOR[k] = dml_min( 3960 v->MaxDCHUBToPSCLThroughput, 3961 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 3962 } else { 3963 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 3964 } 3965 if (v->BytePerPixelC[k] == 0.0) { 3966 v->PSCL_FACTOR_CHROMA[k] = 0.0; 3967 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 3968 * dml_max3( 3969 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 3970 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 3971 1.0); 3972 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 3973 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 3974 } 3975 } else { 3976 if (v->HRatioChroma[k] > 1.0) { 3977 v->PSCL_FACTOR_CHROMA[k] = dml_min( 3978 v->MaxDCHUBToPSCLThroughput, 3979 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 3980 } else { 3981 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 3982 } 3983 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 3984 * dml_max5( 3985 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 3986 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 3987 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 3988 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 3989 1.0); 3990 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 3991 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 3992 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 3993 } 3994 } 3995 } 3996 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3997 int MaximumSwathWidthSupportLuma; 3998 int MaximumSwathWidthSupportChroma; 3999 4000 if (v->SurfaceTiling[k] == dm_sw_linear) { 4001 MaximumSwathWidthSupportLuma = 8192.0; 4002 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4003 MaximumSwathWidthSupportLuma = 2880.0; 4004 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4005 MaximumSwathWidthSupportLuma = 3840.0; 4006 } else { 4007 MaximumSwathWidthSupportLuma = 5760.0; 4008 } 4009 4010 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4011 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4012 } else { 4013 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4014 } 4015 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4016 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4017 if (v->BytePerPixelC[k] == 0.0) { 4018 v->MaximumSwathWidthInLineBufferChroma = 0; 4019 } else { 4020 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4021 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4022 } 4023 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4024 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4025 } 4026 4027 CalculateSwathAndDETConfiguration( 4028 true, 4029 v->NumberOfActivePlanes, 4030 v->DETBufferSizeInKByte[0], 4031 v->MaximumSwathWidthLuma, 4032 v->MaximumSwathWidthChroma, 4033 v->SourceScan, 4034 v->SourcePixelFormat, 4035 v->SurfaceTiling, 4036 v->ViewportWidth, 4037 v->ViewportHeight, 4038 v->SurfaceWidthY, 4039 v->SurfaceWidthC, 4040 v->SurfaceHeightY, 4041 v->SurfaceHeightC, 4042 v->Read256BlockHeightY, 4043 v->Read256BlockHeightC, 4044 v->Read256BlockWidthY, 4045 v->Read256BlockWidthC, 4046 v->odm_combine_dummy, 4047 v->BlendingAndTiming, 4048 v->BytePerPixelY, 4049 v->BytePerPixelC, 4050 v->BytePerPixelInDETY, 4051 v->BytePerPixelInDETC, 4052 v->HActive, 4053 v->HRatio, 4054 v->HRatioChroma, 4055 v->NoOfDPPThisState, 4056 v->swath_width_luma_ub_this_state, 4057 v->swath_width_chroma_ub_this_state, 4058 v->SwathWidthYThisState, 4059 v->SwathWidthCThisState, 4060 v->SwathHeightYThisState, 4061 v->SwathHeightCThisState, 4062 v->DETBufferSizeYThisState, 4063 v->DETBufferSizeCThisState, 4064 v->SingleDPPViewportSizeSupportPerPlane, 4065 &v->ViewportSizeSupport[0][0]); 4066 4067 for (i = 0; i < v->soc.num_states; i++) { 4068 for (j = 0; j < 2; j++) { 4069 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4070 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4071 v->RequiredDISPCLK[i][j] = 0.0; 4072 v->DISPCLK_DPPCLK_Support[i][j] = true; 4073 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4074 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4075 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4076 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4077 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4078 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4079 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4080 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4081 } 4082 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4083 * (1 + v->DISPCLKRampingMargin / 100.0); 4084 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4085 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4086 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4087 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4088 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4089 } 4090 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4091 * (1 + v->DISPCLKRampingMargin / 100.0); 4092 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4093 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4094 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4095 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4096 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4097 } 4098 4099 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4100 || !(v->Output[k] == dm_dp || 4101 v->Output[k] == dm_dp2p0 || 4102 v->Output[k] == dm_edp)) { 4103 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4104 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4105 4106 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4107 FMTBufferExceeded = true; 4108 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4109 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4110 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4111 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4112 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4113 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4114 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4115 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4116 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4117 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4118 } else { 4119 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4120 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4121 } 4122 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH 4123 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4124 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { 4125 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4126 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4127 } else { 4128 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4129 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4130 } 4131 } 4132 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH 4133 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4134 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { 4135 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4136 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4137 4138 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4139 FMTBufferExceeded = true; 4140 } else { 4141 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4142 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4143 } 4144 } 4145 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4146 v->MPCCombine[i][j][k] = false; 4147 v->NoOfDPP[i][j][k] = 4; 4148 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4149 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4150 v->MPCCombine[i][j][k] = false; 4151 v->NoOfDPP[i][j][k] = 2; 4152 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4153 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4154 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4155 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4156 v->MPCCombine[i][j][k] = false; 4157 v->NoOfDPP[i][j][k] = 1; 4158 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4159 } else { 4160 v->MPCCombine[i][j][k] = true; 4161 v->NoOfDPP[i][j][k] = 2; 4162 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4163 } 4164 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4165 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4166 > v->MaxDppclkRoundedDownToDFSGranularity) 4167 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4168 v->DISPCLK_DPPCLK_Support[i][j] = false; 4169 } 4170 } 4171 v->TotalNumberOfActiveDPP[i][j] = 0; 4172 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4173 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4174 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4175 if (v->NoOfDPP[i][j][k] == 1) 4176 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4177 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4178 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4179 NoChroma = false; 4180 } 4181 4182 // UPTO 4183 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4184 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4185 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4186 double BWOfNonSplitPlaneOfMaximumBandwidth; 4187 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4188 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4189 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4190 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4191 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4192 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4193 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4194 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4195 } 4196 } 4197 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4198 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4199 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4200 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4201 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4202 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4203 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4204 } 4205 } 4206 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4207 v->RequiredDISPCLK[i][j] = 0.0; 4208 v->DISPCLK_DPPCLK_Support[i][j] = true; 4209 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4211 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4212 v->MPCCombine[i][j][k] = true; 4213 v->NoOfDPP[i][j][k] = 2; 4214 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4215 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4216 } else { 4217 v->MPCCombine[i][j][k] = false; 4218 v->NoOfDPP[i][j][k] = 1; 4219 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4220 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4221 } 4222 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4223 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4224 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4225 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4226 } else { 4227 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4228 } 4229 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4230 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4231 > v->MaxDppclkRoundedDownToDFSGranularity) 4232 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4233 v->DISPCLK_DPPCLK_Support[i][j] = false; 4234 } 4235 } 4236 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4237 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4238 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4239 } 4240 } 4241 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4242 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4243 v->DISPCLK_DPPCLK_Support[i][j] = false; 4244 } 4245 } 4246 } 4247 4248 /*Total Available Pipes Support Check*/ 4249 4250 for (i = 0; i < v->soc.num_states; i++) { 4251 for (j = 0; j < 2; j++) { 4252 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4253 v->TotalAvailablePipesSupport[i][j] = true; 4254 } else { 4255 v->TotalAvailablePipesSupport[i][j] = false; 4256 } 4257 } 4258 } 4259 /*Display IO and DSC Support Check*/ 4260 4261 v->NonsupportedDSCInputBPC = false; 4262 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4263 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4264 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4265 v->NonsupportedDSCInputBPC = true; 4266 } 4267 } 4268 4269 /*Number Of DSC Slices*/ 4270 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4271 if (v->BlendingAndTiming[k] == k) { 4272 if (v->PixelClockBackEnd[k] > 3200) { 4273 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4274 } else if (v->PixelClockBackEnd[k] > 1360) { 4275 v->NumberOfDSCSlices[k] = 8; 4276 } else if (v->PixelClockBackEnd[k] > 680) { 4277 v->NumberOfDSCSlices[k] = 4; 4278 } else if (v->PixelClockBackEnd[k] > 340) { 4279 v->NumberOfDSCSlices[k] = 2; 4280 } else { 4281 v->NumberOfDSCSlices[k] = 1; 4282 } 4283 } else { 4284 v->NumberOfDSCSlices[k] = 0; 4285 } 4286 } 4287 4288 for (i = 0; i < v->soc.num_states; i++) { 4289 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4290 v->RequiresDSC[i][k] = false; 4291 v->RequiresFEC[i][k] = false; 4292 if (v->BlendingAndTiming[k] == k) { 4293 if (v->Output[k] == dm_hdmi) { 4294 v->RequiresDSC[i][k] = false; 4295 v->RequiresFEC[i][k] = false; 4296 v->OutputBppPerState[i][k] = TruncToValidBPP( 4297 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4298 3, 4299 v->HTotal[k], 4300 v->HActive[k], 4301 v->PixelClockBackEnd[k], 4302 v->ForcedOutputLinkBPP[k], 4303 false, 4304 v->Output[k], 4305 v->OutputFormat[k], 4306 v->DSCInputBitPerComponent[k], 4307 v->NumberOfDSCSlices[k], 4308 v->AudioSampleRate[k], 4309 v->AudioSampleLayout[k], 4310 v->ODMCombineEnablePerState[i][k]); 4311 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4312 if (v->DSCEnable[k] == true) { 4313 v->RequiresDSC[i][k] = true; 4314 v->LinkDSCEnable = true; 4315 if (v->Output[k] == dm_dp) { 4316 v->RequiresFEC[i][k] = true; 4317 } else { 4318 v->RequiresFEC[i][k] = false; 4319 } 4320 } else { 4321 v->RequiresDSC[i][k] = false; 4322 v->LinkDSCEnable = false; 4323 v->RequiresFEC[i][k] = false; 4324 } 4325 4326 v->Outbpp = BPP_INVALID; 4327 if (v->PHYCLKPerState[i] >= 270.0) { 4328 v->Outbpp = TruncToValidBPP( 4329 (1.0 - v->Downspreading / 100.0) * 2700, 4330 v->OutputLinkDPLanes[k], 4331 v->HTotal[k], 4332 v->HActive[k], 4333 v->PixelClockBackEnd[k], 4334 v->ForcedOutputLinkBPP[k], 4335 v->LinkDSCEnable, 4336 v->Output[k], 4337 v->OutputFormat[k], 4338 v->DSCInputBitPerComponent[k], 4339 v->NumberOfDSCSlices[k], 4340 v->AudioSampleRate[k], 4341 v->AudioSampleLayout[k], 4342 v->ODMCombineEnablePerState[i][k]); 4343 v->OutputBppPerState[i][k] = v->Outbpp; 4344 // TODO: Need some other way to handle this nonsense 4345 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4346 } 4347 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4348 v->Outbpp = TruncToValidBPP( 4349 (1.0 - v->Downspreading / 100.0) * 5400, 4350 v->OutputLinkDPLanes[k], 4351 v->HTotal[k], 4352 v->HActive[k], 4353 v->PixelClockBackEnd[k], 4354 v->ForcedOutputLinkBPP[k], 4355 v->LinkDSCEnable, 4356 v->Output[k], 4357 v->OutputFormat[k], 4358 v->DSCInputBitPerComponent[k], 4359 v->NumberOfDSCSlices[k], 4360 v->AudioSampleRate[k], 4361 v->AudioSampleLayout[k], 4362 v->ODMCombineEnablePerState[i][k]); 4363 v->OutputBppPerState[i][k] = v->Outbpp; 4364 // TODO: Need some other way to handle this nonsense 4365 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4366 } 4367 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4368 v->Outbpp = TruncToValidBPP( 4369 (1.0 - v->Downspreading / 100.0) * 8100, 4370 v->OutputLinkDPLanes[k], 4371 v->HTotal[k], 4372 v->HActive[k], 4373 v->PixelClockBackEnd[k], 4374 v->ForcedOutputLinkBPP[k], 4375 v->LinkDSCEnable, 4376 v->Output[k], 4377 v->OutputFormat[k], 4378 v->DSCInputBitPerComponent[k], 4379 v->NumberOfDSCSlices[k], 4380 v->AudioSampleRate[k], 4381 v->AudioSampleLayout[k], 4382 v->ODMCombineEnablePerState[i][k]); 4383 v->OutputBppPerState[i][k] = v->Outbpp; 4384 // TODO: Need some other way to handle this nonsense 4385 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4386 } 4387 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4388 v->Outbpp = TruncToValidBPP( 4389 (1.0 - v->Downspreading / 100.0) * 10000, 4390 4, 4391 v->HTotal[k], 4392 v->HActive[k], 4393 v->PixelClockBackEnd[k], 4394 v->ForcedOutputLinkBPP[k], 4395 v->LinkDSCEnable, 4396 v->Output[k], 4397 v->OutputFormat[k], 4398 v->DSCInputBitPerComponent[k], 4399 v->NumberOfDSCSlices[k], 4400 v->AudioSampleRate[k], 4401 v->AudioSampleLayout[k], 4402 v->ODMCombineEnablePerState[i][k]); 4403 v->OutputBppPerState[i][k] = v->Outbpp; 4404 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4405 } 4406 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4407 v->Outbpp = TruncToValidBPP( 4408 12000, 4409 4, 4410 v->HTotal[k], 4411 v->HActive[k], 4412 v->PixelClockBackEnd[k], 4413 v->ForcedOutputLinkBPP[k], 4414 v->LinkDSCEnable, 4415 v->Output[k], 4416 v->OutputFormat[k], 4417 v->DSCInputBitPerComponent[k], 4418 v->NumberOfDSCSlices[k], 4419 v->AudioSampleRate[k], 4420 v->AudioSampleLayout[k], 4421 v->ODMCombineEnablePerState[i][k]); 4422 v->OutputBppPerState[i][k] = v->Outbpp; 4423 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4424 } 4425 } 4426 } else { 4427 v->OutputBppPerState[i][k] = 0; 4428 } 4429 } 4430 } 4431 4432 for (i = 0; i < v->soc.num_states; i++) { 4433 v->LinkCapacitySupport[i] = true; 4434 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4435 if (v->BlendingAndTiming[k] == k 4436 && (v->Output[k] == dm_dp || 4437 v->Output[k] == dm_edp || 4438 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4439 v->LinkCapacitySupport[i] = false; 4440 } 4441 } 4442 } 4443 4444 // UPTO 2172 4445 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4446 if (v->BlendingAndTiming[k] == k 4447 && (v->Output[k] == dm_dp || 4448 v->Output[k] == dm_edp || 4449 v->Output[k] == dm_hdmi)) { 4450 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4451 P2IWith420 = true; 4452 } 4453 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4454 && !v->DSC422NativeSupport) { 4455 DSC422NativeNotSupported = true; 4456 } 4457 } 4458 } 4459 4460 for (i = 0; i < v->soc.num_states; ++i) { 4461 v->ODMCombine4To1SupportCheckOK[i] = true; 4462 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4463 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4464 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4465 || v->Output[k] == dm_hdmi)) { 4466 v->ODMCombine4To1SupportCheckOK[i] = false; 4467 } 4468 } 4469 } 4470 4471 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4472 4473 for (i = 0; i < v->soc.num_states; i++) { 4474 v->NotEnoughDSCUnits[i] = false; 4475 v->TotalDSCUnitsRequired = 0.0; 4476 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4477 if (v->RequiresDSC[i][k] == true) { 4478 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4479 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4480 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4481 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4482 } else { 4483 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4484 } 4485 } 4486 } 4487 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4488 v->NotEnoughDSCUnits[i] = true; 4489 } 4490 } 4491 /*DSC Delay per state*/ 4492 4493 for (i = 0; i < v->soc.num_states; i++) { 4494 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4495 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4496 v->BPP = 0.0; 4497 } else { 4498 v->BPP = v->OutputBppPerState[i][k]; 4499 } 4500 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4501 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4502 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4503 v->DSCInputBitPerComponent[k], 4504 v->BPP, 4505 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4506 v->NumberOfDSCSlices[k], 4507 v->OutputFormat[k], 4508 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4509 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4510 v->DSCDelayPerState[i][k] = 2.0 4511 * (dscceComputeDelay( 4512 v->DSCInputBitPerComponent[k], 4513 v->BPP, 4514 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4515 v->NumberOfDSCSlices[k] / 2, 4516 v->OutputFormat[k], 4517 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4518 } else { 4519 v->DSCDelayPerState[i][k] = 4.0 4520 * (dscceComputeDelay( 4521 v->DSCInputBitPerComponent[k], 4522 v->BPP, 4523 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4524 v->NumberOfDSCSlices[k] / 4, 4525 v->OutputFormat[k], 4526 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4527 } 4528 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4529 } else { 4530 v->DSCDelayPerState[i][k] = 0.0; 4531 } 4532 } 4533 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4534 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4535 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4536 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4537 } 4538 } 4539 } 4540 } 4541 4542 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4543 // 4544 for (i = 0; i < v->soc.num_states; ++i) { 4545 for (j = 0; j <= 1; ++j) { 4546 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4547 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4548 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4549 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4550 } 4551 4552 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315) 4553 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]); 4554 CalculateSwathAndDETConfiguration( 4555 false, 4556 v->NumberOfActivePlanes, 4557 v->DETBufferSizeInKByte[0], 4558 v->MaximumSwathWidthLuma, 4559 v->MaximumSwathWidthChroma, 4560 v->SourceScan, 4561 v->SourcePixelFormat, 4562 v->SurfaceTiling, 4563 v->ViewportWidth, 4564 v->ViewportHeight, 4565 v->SurfaceWidthY, 4566 v->SurfaceWidthC, 4567 v->SurfaceHeightY, 4568 v->SurfaceHeightC, 4569 v->Read256BlockHeightY, 4570 v->Read256BlockHeightC, 4571 v->Read256BlockWidthY, 4572 v->Read256BlockWidthC, 4573 v->ODMCombineEnableThisState, 4574 v->BlendingAndTiming, 4575 v->BytePerPixelY, 4576 v->BytePerPixelC, 4577 v->BytePerPixelInDETY, 4578 v->BytePerPixelInDETC, 4579 v->HActive, 4580 v->HRatio, 4581 v->HRatioChroma, 4582 v->NoOfDPPThisState, 4583 v->swath_width_luma_ub_this_state, 4584 v->swath_width_chroma_ub_this_state, 4585 v->SwathWidthYThisState, 4586 v->SwathWidthCThisState, 4587 v->SwathHeightYThisState, 4588 v->SwathHeightCThisState, 4589 v->DETBufferSizeYThisState, 4590 v->DETBufferSizeCThisState, 4591 v->dummystring, 4592 &v->ViewportSizeSupport[i][j]); 4593 4594 CalculateDCFCLKDeepSleep( 4595 mode_lib, 4596 v->NumberOfActivePlanes, 4597 v->BytePerPixelY, 4598 v->BytePerPixelC, 4599 v->VRatio, 4600 v->VRatioChroma, 4601 v->SwathWidthYThisState, 4602 v->SwathWidthCThisState, 4603 v->NoOfDPPThisState, 4604 v->HRatio, 4605 v->HRatioChroma, 4606 v->PixelClock, 4607 v->PSCL_FACTOR, 4608 v->PSCL_FACTOR_CHROMA, 4609 v->RequiredDPPCLKThisState, 4610 v->ReadBandwidthLuma, 4611 v->ReadBandwidthChroma, 4612 v->ReturnBusWidth, 4613 &v->ProjectedDCFCLKDeepSleep[i][j]); 4614 4615 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4616 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4617 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4618 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4619 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4620 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4621 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4622 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4623 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4624 } 4625 } 4626 } 4627 4628 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4629 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4630 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4631 } 4632 4633 for (i = 0; i < v->soc.num_states; i++) { 4634 for (j = 0; j < 2; j++) { 4635 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4636 4637 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4638 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4639 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4640 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4641 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4642 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4643 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4644 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4645 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4646 } 4647 4648 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4649 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4650 if (v->DCCEnable[k] == true) { 4651 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4652 } 4653 } 4654 4655 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4656 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4657 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4658 4659 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4660 && v->SourceScan[k] != dm_vert) { 4661 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4662 / 2; 4663 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4664 } else { 4665 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4666 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4667 } 4668 4669 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4670 mode_lib, 4671 v->DCCEnable[k], 4672 v->Read256BlockHeightC[k], 4673 v->Read256BlockWidthC[k], 4674 v->SourcePixelFormat[k], 4675 v->SurfaceTiling[k], 4676 v->BytePerPixelC[k], 4677 v->SourceScan[k], 4678 v->SwathWidthCThisState[k], 4679 v->ViewportHeightChroma[k], 4680 v->GPUVMEnable, 4681 v->HostVMEnable, 4682 v->HostVMMaxNonCachedPageTableLevels, 4683 v->GPUVMMinPageSize, 4684 v->HostVMMinPageSize, 4685 v->PTEBufferSizeInRequestsForChroma, 4686 v->PitchC[k], 4687 0.0, 4688 &v->MacroTileWidthC[k], 4689 &v->MetaRowBytesC, 4690 &v->DPTEBytesPerRowC, 4691 &v->PTEBufferSizeNotExceededC[i][j][k], 4692 &v->dummyinteger7, 4693 &v->dpte_row_height_chroma[k], 4694 &v->dummyinteger28, 4695 &v->dummyinteger26, 4696 &v->dummyinteger23, 4697 &v->meta_row_height_chroma[k], 4698 &v->dummyinteger8, 4699 &v->dummyinteger9, 4700 &v->dummyinteger19, 4701 &v->dummyinteger20, 4702 &v->dummyinteger17, 4703 &v->dummyinteger10, 4704 &v->dummyinteger11); 4705 4706 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4707 mode_lib, 4708 v->VRatioChroma[k], 4709 v->VTAPsChroma[k], 4710 v->Interlace[k], 4711 v->ProgressiveToInterlaceUnitInOPP, 4712 v->SwathHeightCThisState[k], 4713 v->ViewportYStartC[k], 4714 &v->PrefillC[k], 4715 &v->MaxNumSwC[k]); 4716 } else { 4717 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4718 v->PTEBufferSizeInRequestsForChroma = 0; 4719 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4720 v->MetaRowBytesC = 0.0; 4721 v->DPTEBytesPerRowC = 0.0; 4722 v->PrefetchLinesC[i][j][k] = 0.0; 4723 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4724 } 4725 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4726 mode_lib, 4727 v->DCCEnable[k], 4728 v->Read256BlockHeightY[k], 4729 v->Read256BlockWidthY[k], 4730 v->SourcePixelFormat[k], 4731 v->SurfaceTiling[k], 4732 v->BytePerPixelY[k], 4733 v->SourceScan[k], 4734 v->SwathWidthYThisState[k], 4735 v->ViewportHeight[k], 4736 v->GPUVMEnable, 4737 v->HostVMEnable, 4738 v->HostVMMaxNonCachedPageTableLevels, 4739 v->GPUVMMinPageSize, 4740 v->HostVMMinPageSize, 4741 v->PTEBufferSizeInRequestsForLuma, 4742 v->PitchY[k], 4743 v->DCCMetaPitchY[k], 4744 &v->MacroTileWidthY[k], 4745 &v->MetaRowBytesY, 4746 &v->DPTEBytesPerRowY, 4747 &v->PTEBufferSizeNotExceededY[i][j][k], 4748 &v->dummyinteger7, 4749 &v->dpte_row_height[k], 4750 &v->dummyinteger29, 4751 &v->dummyinteger27, 4752 &v->dummyinteger24, 4753 &v->meta_row_height[k], 4754 &v->dummyinteger25, 4755 &v->dpte_group_bytes[k], 4756 &v->dummyinteger21, 4757 &v->dummyinteger22, 4758 &v->dummyinteger18, 4759 &v->dummyinteger5, 4760 &v->dummyinteger6); 4761 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4762 mode_lib, 4763 v->VRatio[k], 4764 v->vtaps[k], 4765 v->Interlace[k], 4766 v->ProgressiveToInterlaceUnitInOPP, 4767 v->SwathHeightYThisState[k], 4768 v->ViewportYStartY[k], 4769 &v->PrefillY[k], 4770 &v->MaxNumSwY[k]); 4771 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4772 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4773 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4774 4775 CalculateRowBandwidth( 4776 v->GPUVMEnable, 4777 v->SourcePixelFormat[k], 4778 v->VRatio[k], 4779 v->VRatioChroma[k], 4780 v->DCCEnable[k], 4781 v->HTotal[k] / v->PixelClock[k], 4782 v->MetaRowBytesY, 4783 v->MetaRowBytesC, 4784 v->meta_row_height[k], 4785 v->meta_row_height_chroma[k], 4786 v->DPTEBytesPerRowY, 4787 v->DPTEBytesPerRowC, 4788 v->dpte_row_height[k], 4789 v->dpte_row_height_chroma[k], 4790 &v->meta_row_bandwidth[i][j][k], 4791 &v->dpte_row_bandwidth[i][j][k]); 4792 } 4793 /*DCCMetaBufferSizeSupport(i, j) = True 4794 For k = 0 To NumberOfActivePlanes - 1 4795 If MetaRowBytes(i, j, k) > 24064 Then 4796 DCCMetaBufferSizeSupport(i, j) = False 4797 End If 4798 Next k*/ 4799 v->DCCMetaBufferSizeSupport[i][j] = true; 4800 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4801 if (v->MetaRowBytes[i][j][k] > 24064) 4802 v->DCCMetaBufferSizeSupport[i][j] = false; 4803 } 4804 v->UrgLatency[i] = CalculateUrgentLatency( 4805 v->UrgentLatencyPixelDataOnly, 4806 v->UrgentLatencyPixelMixedWithVMData, 4807 v->UrgentLatencyVMDataOnly, 4808 v->DoUrgentLatencyAdjustment, 4809 v->UrgentLatencyAdjustmentFabricClockComponent, 4810 v->UrgentLatencyAdjustmentFabricClockReference, 4811 v->FabricClockPerState[i]); 4812 4813 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4814 CalculateUrgentBurstFactor( 4815 v->swath_width_luma_ub_this_state[k], 4816 v->swath_width_chroma_ub_this_state[k], 4817 v->SwathHeightYThisState[k], 4818 v->SwathHeightCThisState[k], 4819 v->HTotal[k] / v->PixelClock[k], 4820 v->UrgLatency[i], 4821 v->CursorBufferSize, 4822 v->CursorWidth[k][0], 4823 v->CursorBPP[k][0], 4824 v->VRatio[k], 4825 v->VRatioChroma[k], 4826 v->BytePerPixelInDETY[k], 4827 v->BytePerPixelInDETC[k], 4828 v->DETBufferSizeYThisState[k], 4829 v->DETBufferSizeCThisState[k], 4830 &v->UrgentBurstFactorCursor[k], 4831 &v->UrgentBurstFactorLuma[k], 4832 &v->UrgentBurstFactorChroma[k], 4833 &NotUrgentLatencyHiding[k]); 4834 } 4835 4836 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 4837 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4838 if (NotUrgentLatencyHiding[k]) { 4839 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 4840 } 4841 } 4842 4843 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4844 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 4845 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 4846 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 4847 } 4848 4849 v->TotalVActivePixelBandwidth[i][j] = 0; 4850 v->TotalVActiveCursorBandwidth[i][j] = 0; 4851 v->TotalMetaRowBandwidth[i][j] = 0; 4852 v->TotalDPTERowBandwidth[i][j] = 0; 4853 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4854 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 4855 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 4856 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 4857 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 4858 } 4859 } 4860 } 4861 4862 //Calculate Return BW 4863 for (i = 0; i < v->soc.num_states; ++i) { 4864 for (j = 0; j <= 1; ++j) { 4865 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4866 if (v->BlendingAndTiming[k] == k) { 4867 if (v->WritebackEnable[k] == true) { 4868 v->WritebackDelayTime[k] = v->WritebackLatency 4869 + CalculateWriteBackDelay( 4870 v->WritebackPixelFormat[k], 4871 v->WritebackHRatio[k], 4872 v->WritebackVRatio[k], 4873 v->WritebackVTaps[k], 4874 v->WritebackDestinationWidth[k], 4875 v->WritebackDestinationHeight[k], 4876 v->WritebackSourceHeight[k], 4877 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 4878 } else { 4879 v->WritebackDelayTime[k] = 0.0; 4880 } 4881 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4882 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 4883 v->WritebackDelayTime[k] = dml_max( 4884 v->WritebackDelayTime[k], 4885 v->WritebackLatency 4886 + CalculateWriteBackDelay( 4887 v->WritebackPixelFormat[m], 4888 v->WritebackHRatio[m], 4889 v->WritebackVRatio[m], 4890 v->WritebackVTaps[m], 4891 v->WritebackDestinationWidth[m], 4892 v->WritebackDestinationHeight[m], 4893 v->WritebackSourceHeight[m], 4894 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 4895 } 4896 } 4897 } 4898 } 4899 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4900 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4901 if (v->BlendingAndTiming[k] == m) { 4902 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 4903 } 4904 } 4905 } 4906 v->MaxMaxVStartup[i][j] = 0; 4907 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4908 v->MaximumVStartup[i][j][k] = 4909 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 4910 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 4911 v->VTotal[k] - v->VActive[k] 4912 - dml_max( 4913 1.0, 4914 dml_ceil( 4915 1.0 * v->WritebackDelayTime[k] 4916 / (v->HTotal[k] 4917 / v->PixelClock[k]), 4918 1.0)); 4919 if (v->MaximumVStartup[i][j][k] > 1023) 4920 v->MaximumVStartup[i][j][k] = 1023; 4921 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 4922 } 4923 } 4924 } 4925 4926 ReorderingBytes = v->NumberOfChannels 4927 * dml_max3( 4928 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 4929 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 4930 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 4931 4932 for (i = 0; i < v->soc.num_states; ++i) { 4933 for (j = 0; j <= 1; ++j) { 4934 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 4935 } 4936 } 4937 4938 if (v->UseMinimumRequiredDCFCLK == true) 4939 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 4940 4941 for (i = 0; i < v->soc.num_states; ++i) { 4942 for (j = 0; j <= 1; ++j) { 4943 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 4944 v->ReturnBusWidth * v->DCFCLKState[i][j], 4945 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 4946 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 4947 double PixelDataOnlyReturnBWPerState = dml_min( 4948 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 4949 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 4950 double PixelMixedWithVMDataReturnBWPerState = dml_min( 4951 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 4952 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 4953 4954 if (v->HostVMEnable != true) { 4955 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 4956 } else { 4957 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 4958 } 4959 } 4960 } 4961 4962 //Re-ordering Buffer Support Check 4963 for (i = 0; i < v->soc.num_states; ++i) { 4964 for (j = 0; j <= 1; ++j) { 4965 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 4966 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 4967 v->ROBSupport[i][j] = true; 4968 } else { 4969 v->ROBSupport[i][j] = false; 4970 } 4971 } 4972 } 4973 4974 //Vertical Active BW support check 4975 4976 MaxTotalVActiveRDBandwidth = 0; 4977 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4978 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4979 } 4980 4981 for (i = 0; i < v->soc.num_states; ++i) { 4982 for (j = 0; j <= 1; ++j) { 4983 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 4984 dml_min( 4985 v->ReturnBusWidth * v->DCFCLKState[i][j], 4986 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 4987 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 4988 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 4989 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 4990 4991 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 4992 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 4993 } else { 4994 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 4995 } 4996 } 4997 } 4998 4999 v->UrgentLatency = CalculateUrgentLatency( 5000 v->UrgentLatencyPixelDataOnly, 5001 v->UrgentLatencyPixelMixedWithVMData, 5002 v->UrgentLatencyVMDataOnly, 5003 v->DoUrgentLatencyAdjustment, 5004 v->UrgentLatencyAdjustmentFabricClockComponent, 5005 v->UrgentLatencyAdjustmentFabricClockReference, 5006 v->FabricClock); 5007 //Prefetch Check 5008 for (i = 0; i < v->soc.num_states; ++i) { 5009 for (j = 0; j <= 1; ++j) { 5010 double VMDataOnlyReturnBWPerState; 5011 double HostVMInefficiencyFactor = 1; 5012 int NextPrefetchModeState = MinPrefetchMode; 5013 bool UnboundedRequestEnabledThisState = false; 5014 int CompressedBufferSizeInkByteThisState = 0; 5015 double dummy; 5016 5017 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5018 5019 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5020 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5021 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5022 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5023 } 5024 5025 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5026 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5027 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5028 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5029 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5030 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5031 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5032 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5033 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5034 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5035 } 5036 5037 VMDataOnlyReturnBWPerState = dml_min( 5038 dml_min( 5039 v->ReturnBusWidth * v->DCFCLKState[i][j], 5040 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5041 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5042 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5043 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5044 if (v->GPUVMEnable && v->HostVMEnable) 5045 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5046 5047 v->ExtraLatency = CalculateExtraLatency( 5048 v->RoundTripPingLatencyCycles, 5049 ReorderingBytes, 5050 v->DCFCLKState[i][j], 5051 v->TotalNumberOfActiveDPP[i][j], 5052 v->PixelChunkSizeInKByte, 5053 v->TotalNumberOfDCCActiveDPP[i][j], 5054 v->MetaChunkSize, 5055 v->ReturnBWPerState[i][j], 5056 v->GPUVMEnable, 5057 v->HostVMEnable, 5058 v->NumberOfActivePlanes, 5059 v->NoOfDPPThisState, 5060 v->dpte_group_bytes, 5061 HostVMInefficiencyFactor, 5062 v->HostVMMinPageSize, 5063 v->HostVMMaxNonCachedPageTableLevels); 5064 5065 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5066 do { 5067 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5068 v->MaxVStartup = v->NextMaxVStartup; 5069 5070 v->TWait = CalculateTWait( 5071 v->PrefetchModePerState[i][j], 5072 v->DRAMClockChangeLatency, 5073 v->UrgLatency[i], 5074 v->SREnterPlusExitTime); 5075 5076 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5077 CalculatePrefetchSchedulePerPlane(mode_lib, 5078 HostVMInefficiencyFactor, 5079 i, j, k); 5080 } 5081 5082 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5083 CalculateUrgentBurstFactor( 5084 v->swath_width_luma_ub_this_state[k], 5085 v->swath_width_chroma_ub_this_state[k], 5086 v->SwathHeightYThisState[k], 5087 v->SwathHeightCThisState[k], 5088 v->HTotal[k] / v->PixelClock[k], 5089 v->UrgLatency[i], 5090 v->CursorBufferSize, 5091 v->CursorWidth[k][0], 5092 v->CursorBPP[k][0], 5093 v->VRatioPreY[i][j][k], 5094 v->VRatioPreC[i][j][k], 5095 v->BytePerPixelInDETY[k], 5096 v->BytePerPixelInDETC[k], 5097 v->DETBufferSizeYThisState[k], 5098 v->DETBufferSizeCThisState[k], 5099 &v->UrgentBurstFactorCursorPre[k], 5100 &v->UrgentBurstFactorLumaPre[k], 5101 &v->UrgentBurstFactorChroma[k], 5102 &v->NotUrgentLatencyHidingPre[k]); 5103 } 5104 5105 v->MaximumReadBandwidthWithPrefetch = 0.0; 5106 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5107 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5108 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5109 5110 v->MaximumReadBandwidthWithPrefetch = 5111 v->MaximumReadBandwidthWithPrefetch 5112 + dml_max3( 5113 v->VActivePixelBandwidth[i][j][k] 5114 + v->VActiveCursorBandwidth[i][j][k] 5115 + v->NoOfDPP[i][j][k] 5116 * (v->meta_row_bandwidth[i][j][k] 5117 + v->dpte_row_bandwidth[i][j][k]), 5118 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5119 v->NoOfDPP[i][j][k] 5120 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5121 * v->UrgentBurstFactorLumaPre[k] 5122 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5123 * v->UrgentBurstFactorChromaPre[k]) 5124 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5125 } 5126 5127 v->NotEnoughUrgentLatencyHidingPre = false; 5128 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5129 if (v->NotUrgentLatencyHidingPre[k] == true) { 5130 v->NotEnoughUrgentLatencyHidingPre = true; 5131 } 5132 } 5133 5134 v->PrefetchSupported[i][j] = true; 5135 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5136 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5137 v->PrefetchSupported[i][j] = false; 5138 } 5139 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5140 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5141 || v->NoTimeForPrefetch[i][j][k] == true) { 5142 v->PrefetchSupported[i][j] = false; 5143 } 5144 } 5145 5146 v->DynamicMetadataSupported[i][j] = true; 5147 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5148 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5149 v->DynamicMetadataSupported[i][j] = false; 5150 } 5151 } 5152 5153 v->VRatioInPrefetchSupported[i][j] = true; 5154 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5155 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5156 v->VRatioInPrefetchSupported[i][j] = false; 5157 } 5158 } 5159 v->AnyLinesForVMOrRowTooLarge = false; 5160 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5161 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5162 v->AnyLinesForVMOrRowTooLarge = true; 5163 } 5164 } 5165 5166 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5167 5168 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5169 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5170 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5171 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5172 - dml_max( 5173 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5174 v->NoOfDPP[i][j][k] 5175 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5176 * v->UrgentBurstFactorLumaPre[k] 5177 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5178 * v->UrgentBurstFactorChromaPre[k]) 5179 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5180 } 5181 v->TotImmediateFlipBytes = 0.0; 5182 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5183 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5184 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5185 + v->DPTEBytesPerRow[i][j][k]; 5186 } 5187 5188 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5189 CalculateFlipSchedule( 5190 mode_lib, 5191 k, 5192 HostVMInefficiencyFactor, 5193 v->ExtraLatency, 5194 v->UrgLatency[i], 5195 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5196 v->MetaRowBytes[i][j][k], 5197 v->DPTEBytesPerRow[i][j][k]); 5198 } 5199 v->total_dcn_read_bw_with_flip = 0.0; 5200 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5201 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5202 + dml_max3( 5203 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5204 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5205 + v->VActiveCursorBandwidth[i][j][k], 5206 v->NoOfDPP[i][j][k] 5207 * (v->final_flip_bw[k] 5208 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5209 * v->UrgentBurstFactorLumaPre[k] 5210 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5211 * v->UrgentBurstFactorChromaPre[k]) 5212 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5213 } 5214 v->ImmediateFlipSupportedForState[i][j] = true; 5215 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5216 v->ImmediateFlipSupportedForState[i][j] = false; 5217 } 5218 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5219 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5220 v->ImmediateFlipSupportedForState[i][j] = false; 5221 } 5222 } 5223 } else { 5224 v->ImmediateFlipSupportedForState[i][j] = false; 5225 } 5226 5227 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5228 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5229 NextPrefetchModeState = NextPrefetchModeState + 1; 5230 } else { 5231 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5232 } 5233 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5234 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5235 && ((v->HostVMEnable == false && 5236 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5237 || v->ImmediateFlipSupportedForState[i][j] == true)) 5238 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5239 5240 CalculateUnboundedRequestAndCompressedBufferSize( 5241 v->DETBufferSizeInKByte[0], 5242 v->ConfigReturnBufferSizeInKByte, 5243 v->UseUnboundedRequesting, 5244 v->TotalNumberOfActiveDPP[i][j], 5245 NoChroma, 5246 v->MaxNumDPP, 5247 v->CompressedBufferSegmentSizeInkByte, 5248 v->Output, 5249 &UnboundedRequestEnabledThisState, 5250 &CompressedBufferSizeInkByteThisState); 5251 5252 CalculateWatermarksAndDRAMSpeedChangeSupport( 5253 mode_lib, 5254 v->PrefetchModePerState[i][j], 5255 v->DCFCLKState[i][j], 5256 v->ReturnBWPerState[i][j], 5257 v->UrgLatency[i], 5258 v->ExtraLatency, 5259 v->SOCCLKPerState[i], 5260 v->ProjectedDCFCLKDeepSleep[i][j], 5261 v->DETBufferSizeYThisState, 5262 v->DETBufferSizeCThisState, 5263 v->SwathHeightYThisState, 5264 v->SwathHeightCThisState, 5265 v->SwathWidthYThisState, 5266 v->SwathWidthCThisState, 5267 v->NoOfDPPThisState, 5268 v->BytePerPixelInDETY, 5269 v->BytePerPixelInDETC, 5270 UnboundedRequestEnabledThisState, 5271 CompressedBufferSizeInkByteThisState, 5272 &v->DRAMClockChangeSupport[i][j], 5273 &dummy, 5274 &dummy, 5275 &dummy, 5276 &dummy); 5277 } 5278 } 5279 5280 /*PTE Buffer Size Check*/ 5281 for (i = 0; i < v->soc.num_states; i++) { 5282 for (j = 0; j < 2; j++) { 5283 v->PTEBufferSizeNotExceeded[i][j] = true; 5284 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5285 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5286 v->PTEBufferSizeNotExceeded[i][j] = false; 5287 } 5288 } 5289 } 5290 } 5291 5292 /*Cursor Support Check*/ 5293 v->CursorSupport = true; 5294 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5295 if (v->CursorWidth[k][0] > 0.0) { 5296 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5297 v->CursorSupport = false; 5298 } 5299 } 5300 } 5301 5302 /*Valid Pitch Check*/ 5303 v->PitchSupport = true; 5304 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5305 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5306 if (v->DCCEnable[k] == true) { 5307 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5308 } else { 5309 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5310 } 5311 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5312 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5313 && v->SourcePixelFormat[k] != dm_mono_8) { 5314 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5315 if (v->DCCEnable[k] == true) { 5316 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5317 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5318 64.0 * v->Read256BlockWidthC[k]); 5319 } else { 5320 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5321 } 5322 } else { 5323 v->AlignedCPitch[k] = v->PitchC[k]; 5324 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5325 } 5326 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5327 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5328 v->PitchSupport = false; 5329 } 5330 } 5331 5332 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5333 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5334 ViewportExceedsSurface = true; 5335 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5336 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5337 && v->SourcePixelFormat[k] != dm_rgbe) { 5338 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5339 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5340 ViewportExceedsSurface = true; 5341 } 5342 } 5343 } 5344 } 5345 5346 /*Mode Support, Voltage State and SOC Configuration*/ 5347 for (i = v->soc.num_states - 1; i >= 0; i--) { 5348 for (j = 0; j < 2; j++) { 5349 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5350 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5351 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5352 && v->DTBCLKRequiredMoreThanSupported[i] == false 5353 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5354 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5355 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5356 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5357 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5358 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5359 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5360 && ((v->HostVMEnable == false 5361 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5362 || v->ImmediateFlipSupportedForState[i][j] == true) 5363 && FMTBufferExceeded == false) { 5364 v->ModeSupport[i][j] = true; 5365 } else { 5366 v->ModeSupport[i][j] = false; 5367 #ifdef __DML_VBA_DEBUG__ 5368 if (v->ScaleRatioAndTapsSupport == false) 5369 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed"); 5370 if (v->SourceFormatPixelAndScanSupport == false) 5371 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed"); 5372 if (v->ViewportSizeSupport[i][j] == false) 5373 dml_print("DML SUPPORT: ViewportSizeSupport failed"); 5374 if (v->LinkCapacitySupport[i] == false) 5375 dml_print("DML SUPPORT: LinkCapacitySupport failed"); 5376 if (v->ODMCombine4To1SupportCheckOK[i] == false) 5377 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5378 if (v->NotEnoughDSCUnits[i] == true) 5379 dml_print("DML SUPPORT: NotEnoughDSCUnits"); 5380 if (v->DTBCLKRequiredMoreThanSupported[i] == true) 5381 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported"); 5382 if (v->ROBSupport[i][j] == false) 5383 dml_print("DML SUPPORT: ROBSupport failed"); 5384 if (v->DISPCLK_DPPCLK_Support[i][j] == false) 5385 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed"); 5386 if (v->TotalAvailablePipesSupport[i][j] == false) 5387 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5388 if (EnoughWritebackUnits == false) 5389 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5390 if (v->WritebackLatencySupport == false) 5391 dml_print("DML SUPPORT: WritebackLatencySupport failed"); 5392 if (v->WritebackScaleRatioAndTapsSupport == false) 5393 dml_print("DML SUPPORT: DSC422NativeNotSupported "); 5394 if (v->CursorSupport == false) 5395 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5396 if (v->PitchSupport == false) 5397 dml_print("DML SUPPORT: PitchSupport failed"); 5398 if (ViewportExceedsSurface == true) 5399 dml_print("DML SUPPORT: ViewportExceedsSurface failed"); 5400 if (v->PrefetchSupported[i][j] == false) 5401 dml_print("DML SUPPORT: PrefetchSupported failed"); 5402 if (v->DynamicMetadataSupported[i][j] == false) 5403 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5404 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false) 5405 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed"); 5406 if (v->VRatioInPrefetchSupported[i][j] == false) 5407 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed"); 5408 if (v->PTEBufferSizeNotExceeded[i][j] == false) 5409 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed"); 5410 if (v->NonsupportedDSCInputBPC == true) 5411 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed"); 5412 if (!((v->HostVMEnable == false 5413 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5414 || v->ImmediateFlipSupportedForState[i][j] == true)) 5415 dml_print("DML SUPPORT: ImmediateFlipRequirement failed"); 5416 if (FMTBufferExceeded == true) 5417 dml_print("DML SUPPORT: FMTBufferExceeded failed"); 5418 #endif 5419 } 5420 } 5421 } 5422 5423 { 5424 unsigned int MaximumMPCCombine = 0; 5425 for (i = v->soc.num_states; i >= 0; i--) { 5426 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5427 v->VoltageLevel = i; 5428 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5429 if (v->ModeSupport[i][0] == true) { 5430 MaximumMPCCombine = 0; 5431 } else { 5432 MaximumMPCCombine = 1; 5433 } 5434 } 5435 } 5436 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5437 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5438 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5439 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5440 } 5441 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5442 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5443 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5444 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5445 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5446 v->maxMpcComb = MaximumMPCCombine; 5447 } 5448 } 5449 5450 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5451 struct display_mode_lib *mode_lib, 5452 unsigned int PrefetchMode, 5453 double DCFCLK, 5454 double ReturnBW, 5455 double UrgentLatency, 5456 double ExtraLatency, 5457 double SOCCLK, 5458 double DCFCLKDeepSleep, 5459 unsigned int DETBufferSizeY[], 5460 unsigned int DETBufferSizeC[], 5461 unsigned int SwathHeightY[], 5462 unsigned int SwathHeightC[], 5463 double SwathWidthY[], 5464 double SwathWidthC[], 5465 unsigned int DPPPerPlane[], 5466 double BytePerPixelDETY[], 5467 double BytePerPixelDETC[], 5468 bool UnboundedRequestEnabled, 5469 int unsigned CompressedBufferSizeInkByte, 5470 enum clock_change_support *DRAMClockChangeSupport, 5471 double *StutterExitWatermark, 5472 double *StutterEnterPlusExitWatermark, 5473 double *Z8StutterExitWatermark, 5474 double *Z8StutterEnterPlusExitWatermark) 5475 { 5476 struct vba_vars_st *v = &mode_lib->vba; 5477 double EffectiveLBLatencyHidingY; 5478 double EffectiveLBLatencyHidingC; 5479 double LinesInDETY[DC__NUM_DPP__MAX]; 5480 double LinesInDETC; 5481 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5482 unsigned int LinesInDETCRoundedDownToSwath; 5483 double FullDETBufferingTimeY; 5484 double FullDETBufferingTimeC; 5485 double ActiveDRAMClockChangeLatencyMarginY; 5486 double ActiveDRAMClockChangeLatencyMarginC; 5487 double WritebackDRAMClockChangeLatencyMargin; 5488 double PlaneWithMinActiveDRAMClockChangeMargin; 5489 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5490 double WritebackDRAMClockChangeLatencyHiding; 5491 double TotalPixelBW = 0.0; 5492 int k, j; 5493 5494 v->UrgentWatermark = UrgentLatency + ExtraLatency; 5495 5496 #ifdef __DML_VBA_DEBUG__ 5497 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5498 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5499 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); 5500 #endif 5501 5502 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; 5503 5504 #ifdef __DML_VBA_DEBUG__ 5505 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); 5506 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); 5507 #endif 5508 5509 v->TotalActiveWriteback = 0; 5510 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5511 if (v->WritebackEnable[k] == true) { 5512 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5513 } 5514 } 5515 5516 if (v->TotalActiveWriteback <= 1) { 5517 v->WritebackUrgentWatermark = v->WritebackLatency; 5518 } else { 5519 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5520 } 5521 5522 if (v->TotalActiveWriteback <= 1) { 5523 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; 5524 } else { 5525 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5526 } 5527 5528 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5529 TotalPixelBW = TotalPixelBW 5530 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) 5531 / (v->HTotal[k] / v->PixelClock[k]); 5532 } 5533 5534 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5535 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5536 5537 v->LBLatencyHidingSourceLinesY = dml_min( 5538 (double) v->MaxLineBufferLines, 5539 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 5540 5541 v->LBLatencyHidingSourceLinesC = dml_min( 5542 (double) v->MaxLineBufferLines, 5543 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 5544 5545 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 5546 5547 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 5548 5549 if (UnboundedRequestEnabled) { 5550 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5551 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 5552 } 5553 5554 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5555 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5556 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 5557 if (BytePerPixelDETC[k] > 0) { 5558 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5559 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5560 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; 5561 } else { 5562 LinesInDETC = 0; 5563 FullDETBufferingTimeC = 999999; 5564 } 5565 5566 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5567 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5568 5569 if (v->NumberOfActivePlanes > 1) { 5570 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5571 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; 5572 } 5573 5574 if (BytePerPixelDETC[k] > 0) { 5575 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5576 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5577 5578 if (v->NumberOfActivePlanes > 1) { 5579 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5580 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; 5581 } 5582 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5583 } else { 5584 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5585 } 5586 5587 if (v->WritebackEnable[k] == true) { 5588 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 5589 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 5590 if (v->WritebackPixelFormat[k] == dm_444_64) { 5591 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5592 } 5593 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5594 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5595 } 5596 } 5597 5598 v->MinActiveDRAMClockChangeMargin = 999999; 5599 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5600 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5601 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5602 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5603 if (v->BlendingAndTiming[k] == k) { 5604 PlaneWithMinActiveDRAMClockChangeMargin = k; 5605 } else { 5606 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 5607 if (v->BlendingAndTiming[k] == j) { 5608 PlaneWithMinActiveDRAMClockChangeMargin = j; 5609 } 5610 } 5611 } 5612 } 5613 } 5614 5615 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; 5616 5617 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5618 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5619 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5620 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5621 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5622 } 5623 } 5624 5625 v->TotalNumberOfActiveOTG = 0; 5626 5627 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5628 if (v->BlendingAndTiming[k] == k) { 5629 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5630 } 5631 } 5632 5633 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5634 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5635 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5636 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5637 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5638 } else { 5639 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5640 } 5641 5642 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5643 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5644 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5645 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5646 5647 #ifdef __DML_VBA_DEBUG__ 5648 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5649 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5650 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5651 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5652 #endif 5653 } 5654 5655 static void CalculateDCFCLKDeepSleep( 5656 struct display_mode_lib *mode_lib, 5657 unsigned int NumberOfActivePlanes, 5658 int BytePerPixelY[], 5659 int BytePerPixelC[], 5660 double VRatio[], 5661 double VRatioChroma[], 5662 double SwathWidthY[], 5663 double SwathWidthC[], 5664 unsigned int DPPPerPlane[], 5665 double HRatio[], 5666 double HRatioChroma[], 5667 double PixelClock[], 5668 double PSCL_THROUGHPUT[], 5669 double PSCL_THROUGHPUT_CHROMA[], 5670 double DPPCLK[], 5671 double ReadBandwidthLuma[], 5672 double ReadBandwidthChroma[], 5673 int ReturnBusWidth, 5674 double *DCFCLKDeepSleep) 5675 { 5676 struct vba_vars_st *v = &mode_lib->vba; 5677 double DisplayPipeLineDeliveryTimeLuma; 5678 double DisplayPipeLineDeliveryTimeChroma; 5679 double ReadBandwidth = 0.0; 5680 int k; 5681 5682 for (k = 0; k < NumberOfActivePlanes; ++k) { 5683 5684 if (VRatio[k] <= 1) { 5685 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5686 } else { 5687 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5688 } 5689 if (BytePerPixelC[k] == 0) { 5690 DisplayPipeLineDeliveryTimeChroma = 0; 5691 } else { 5692 if (VRatioChroma[k] <= 1) { 5693 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5694 } else { 5695 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5696 } 5697 } 5698 5699 if (BytePerPixelC[k] > 0) { 5700 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5701 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5702 } else { 5703 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5704 } 5705 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5706 5707 } 5708 5709 for (k = 0; k < NumberOfActivePlanes; ++k) { 5710 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5711 } 5712 5713 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5714 5715 for (k = 0; k < NumberOfActivePlanes; ++k) { 5716 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5717 } 5718 } 5719 5720 static void CalculateUrgentBurstFactor( 5721 int swath_width_luma_ub, 5722 int swath_width_chroma_ub, 5723 unsigned int SwathHeightY, 5724 unsigned int SwathHeightC, 5725 double LineTime, 5726 double UrgentLatency, 5727 double CursorBufferSize, 5728 unsigned int CursorWidth, 5729 unsigned int CursorBPP, 5730 double VRatio, 5731 double VRatioC, 5732 double BytePerPixelInDETY, 5733 double BytePerPixelInDETC, 5734 double DETBufferSizeY, 5735 double DETBufferSizeC, 5736 double *UrgentBurstFactorCursor, 5737 double *UrgentBurstFactorLuma, 5738 double *UrgentBurstFactorChroma, 5739 bool *NotEnoughUrgentLatencyHiding) 5740 { 5741 double LinesInDETLuma; 5742 double LinesInDETChroma; 5743 unsigned int LinesInCursorBuffer; 5744 double CursorBufferSizeInTime; 5745 double DETBufferSizeInTimeLuma; 5746 double DETBufferSizeInTimeChroma; 5747 5748 *NotEnoughUrgentLatencyHiding = 0; 5749 5750 if (CursorWidth > 0) { 5751 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5752 if (VRatio > 0) { 5753 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5754 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5755 *NotEnoughUrgentLatencyHiding = 1; 5756 *UrgentBurstFactorCursor = 0; 5757 } else { 5758 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 5759 } 5760 } else { 5761 *UrgentBurstFactorCursor = 1; 5762 } 5763 } 5764 5765 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 5766 if (VRatio > 0) { 5767 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5768 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5769 *NotEnoughUrgentLatencyHiding = 1; 5770 *UrgentBurstFactorLuma = 0; 5771 } else { 5772 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 5773 } 5774 } else { 5775 *UrgentBurstFactorLuma = 1; 5776 } 5777 5778 if (BytePerPixelInDETC > 0) { 5779 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 5780 if (VRatio > 0) { 5781 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 5782 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5783 *NotEnoughUrgentLatencyHiding = 1; 5784 *UrgentBurstFactorChroma = 0; 5785 } else { 5786 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 5787 } 5788 } else { 5789 *UrgentBurstFactorChroma = 1; 5790 } 5791 } 5792 } 5793 5794 static void CalculatePixelDeliveryTimes( 5795 unsigned int NumberOfActivePlanes, 5796 double VRatio[], 5797 double VRatioChroma[], 5798 double VRatioPrefetchY[], 5799 double VRatioPrefetchC[], 5800 unsigned int swath_width_luma_ub[], 5801 unsigned int swath_width_chroma_ub[], 5802 unsigned int DPPPerPlane[], 5803 double HRatio[], 5804 double HRatioChroma[], 5805 double PixelClock[], 5806 double PSCL_THROUGHPUT[], 5807 double PSCL_THROUGHPUT_CHROMA[], 5808 double DPPCLK[], 5809 int BytePerPixelC[], 5810 enum scan_direction_class SourceScan[], 5811 unsigned int NumberOfCursors[], 5812 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 5813 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 5814 unsigned int BlockWidth256BytesY[], 5815 unsigned int BlockHeight256BytesY[], 5816 unsigned int BlockWidth256BytesC[], 5817 unsigned int BlockHeight256BytesC[], 5818 double DisplayPipeLineDeliveryTimeLuma[], 5819 double DisplayPipeLineDeliveryTimeChroma[], 5820 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 5821 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 5822 double DisplayPipeRequestDeliveryTimeLuma[], 5823 double DisplayPipeRequestDeliveryTimeChroma[], 5824 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 5825 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 5826 double CursorRequestDeliveryTime[], 5827 double CursorRequestDeliveryTimePrefetch[]) 5828 { 5829 double req_per_swath_ub; 5830 int k; 5831 5832 for (k = 0; k < NumberOfActivePlanes; ++k) { 5833 if (VRatio[k] <= 1) { 5834 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5835 } else { 5836 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5837 } 5838 5839 if (BytePerPixelC[k] == 0) { 5840 DisplayPipeLineDeliveryTimeChroma[k] = 0; 5841 } else { 5842 if (VRatioChroma[k] <= 1) { 5843 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5844 } else { 5845 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5846 } 5847 } 5848 5849 if (VRatioPrefetchY[k] <= 1) { 5850 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5851 } else { 5852 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5853 } 5854 5855 if (BytePerPixelC[k] == 0) { 5856 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 5857 } else { 5858 if (VRatioPrefetchC[k] <= 1) { 5859 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5860 } else { 5861 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5862 } 5863 } 5864 } 5865 5866 for (k = 0; k < NumberOfActivePlanes; ++k) { 5867 if (SourceScan[k] != dm_vert) { 5868 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 5869 } else { 5870 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 5871 } 5872 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 5873 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 5874 if (BytePerPixelC[k] == 0) { 5875 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 5876 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 5877 } else { 5878 if (SourceScan[k] != dm_vert) { 5879 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 5880 } else { 5881 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 5882 } 5883 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 5884 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 5885 } 5886 #ifdef __DML_VBA_DEBUG__ 5887 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 5888 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 5889 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 5890 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 5891 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 5892 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 5893 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 5894 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 5895 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 5896 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 5897 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 5898 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 5899 #endif 5900 } 5901 5902 for (k = 0; k < NumberOfActivePlanes; ++k) { 5903 int cursor_req_per_width; 5904 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 5905 if (NumberOfCursors[k] > 0) { 5906 if (VRatio[k] <= 1) { 5907 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 5908 } else { 5909 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 5910 } 5911 if (VRatioPrefetchY[k] <= 1) { 5912 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 5913 } else { 5914 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 5915 } 5916 } else { 5917 CursorRequestDeliveryTime[k] = 0; 5918 CursorRequestDeliveryTimePrefetch[k] = 0; 5919 } 5920 #ifdef __DML_VBA_DEBUG__ 5921 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 5922 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 5923 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 5924 #endif 5925 } 5926 } 5927 5928 static void CalculateMetaAndPTETimes( 5929 int NumberOfActivePlanes, 5930 bool GPUVMEnable, 5931 int MetaChunkSize, 5932 int MinMetaChunkSizeBytes, 5933 int HTotal[], 5934 double VRatio[], 5935 double VRatioChroma[], 5936 double DestinationLinesToRequestRowInVBlank[], 5937 double DestinationLinesToRequestRowInImmediateFlip[], 5938 bool DCCEnable[], 5939 double PixelClock[], 5940 int BytePerPixelY[], 5941 int BytePerPixelC[], 5942 enum scan_direction_class SourceScan[], 5943 int dpte_row_height[], 5944 int dpte_row_height_chroma[], 5945 int meta_row_width[], 5946 int meta_row_width_chroma[], 5947 int meta_row_height[], 5948 int meta_row_height_chroma[], 5949 int meta_req_width[], 5950 int meta_req_width_chroma[], 5951 int meta_req_height[], 5952 int meta_req_height_chroma[], 5953 int dpte_group_bytes[], 5954 int PTERequestSizeY[], 5955 int PTERequestSizeC[], 5956 int PixelPTEReqWidthY[], 5957 int PixelPTEReqHeightY[], 5958 int PixelPTEReqWidthC[], 5959 int PixelPTEReqHeightC[], 5960 int dpte_row_width_luma_ub[], 5961 int dpte_row_width_chroma_ub[], 5962 double DST_Y_PER_PTE_ROW_NOM_L[], 5963 double DST_Y_PER_PTE_ROW_NOM_C[], 5964 double DST_Y_PER_META_ROW_NOM_L[], 5965 double DST_Y_PER_META_ROW_NOM_C[], 5966 double TimePerMetaChunkNominal[], 5967 double TimePerChromaMetaChunkNominal[], 5968 double TimePerMetaChunkVBlank[], 5969 double TimePerChromaMetaChunkVBlank[], 5970 double TimePerMetaChunkFlip[], 5971 double TimePerChromaMetaChunkFlip[], 5972 double time_per_pte_group_nom_luma[], 5973 double time_per_pte_group_vblank_luma[], 5974 double time_per_pte_group_flip_luma[], 5975 double time_per_pte_group_nom_chroma[], 5976 double time_per_pte_group_vblank_chroma[], 5977 double time_per_pte_group_flip_chroma[]) 5978 { 5979 unsigned int meta_chunk_width; 5980 unsigned int min_meta_chunk_width; 5981 unsigned int meta_chunk_per_row_int; 5982 unsigned int meta_row_remainder; 5983 unsigned int meta_chunk_threshold; 5984 unsigned int meta_chunks_per_row_ub; 5985 unsigned int meta_chunk_width_chroma; 5986 unsigned int min_meta_chunk_width_chroma; 5987 unsigned int meta_chunk_per_row_int_chroma; 5988 unsigned int meta_row_remainder_chroma; 5989 unsigned int meta_chunk_threshold_chroma; 5990 unsigned int meta_chunks_per_row_ub_chroma; 5991 unsigned int dpte_group_width_luma; 5992 unsigned int dpte_groups_per_row_luma_ub; 5993 unsigned int dpte_group_width_chroma; 5994 unsigned int dpte_groups_per_row_chroma_ub; 5995 int k; 5996 5997 for (k = 0; k < NumberOfActivePlanes; ++k) { 5998 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 5999 if (BytePerPixelC[k] == 0) { 6000 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6001 } else { 6002 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6003 } 6004 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6005 if (BytePerPixelC[k] == 0) { 6006 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6007 } else { 6008 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6009 } 6010 } 6011 6012 for (k = 0; k < NumberOfActivePlanes; ++k) { 6013 if (DCCEnable[k] == true) { 6014 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6015 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6016 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6017 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6018 if (SourceScan[k] != dm_vert) { 6019 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6020 } else { 6021 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6022 } 6023 if (meta_row_remainder <= meta_chunk_threshold) { 6024 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6025 } else { 6026 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6027 } 6028 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6029 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6030 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6031 if (BytePerPixelC[k] == 0) { 6032 TimePerChromaMetaChunkNominal[k] = 0; 6033 TimePerChromaMetaChunkVBlank[k] = 0; 6034 TimePerChromaMetaChunkFlip[k] = 0; 6035 } else { 6036 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6037 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6038 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6039 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6040 if (SourceScan[k] != dm_vert) { 6041 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6042 } else { 6043 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6044 } 6045 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6046 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6047 } else { 6048 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6049 } 6050 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6051 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6052 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6053 } 6054 } else { 6055 TimePerMetaChunkNominal[k] = 0; 6056 TimePerMetaChunkVBlank[k] = 0; 6057 TimePerMetaChunkFlip[k] = 0; 6058 TimePerChromaMetaChunkNominal[k] = 0; 6059 TimePerChromaMetaChunkVBlank[k] = 0; 6060 TimePerChromaMetaChunkFlip[k] = 0; 6061 } 6062 } 6063 6064 for (k = 0; k < NumberOfActivePlanes; ++k) { 6065 if (GPUVMEnable == true) { 6066 if (SourceScan[k] != dm_vert) { 6067 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6068 } else { 6069 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6070 } 6071 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6072 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6073 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6074 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6075 if (BytePerPixelC[k] == 0) { 6076 time_per_pte_group_nom_chroma[k] = 0; 6077 time_per_pte_group_vblank_chroma[k] = 0; 6078 time_per_pte_group_flip_chroma[k] = 0; 6079 } else { 6080 if (SourceScan[k] != dm_vert) { 6081 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6082 } else { 6083 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6084 } 6085 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6086 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6087 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6088 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6089 } 6090 } else { 6091 time_per_pte_group_nom_luma[k] = 0; 6092 time_per_pte_group_vblank_luma[k] = 0; 6093 time_per_pte_group_flip_luma[k] = 0; 6094 time_per_pte_group_nom_chroma[k] = 0; 6095 time_per_pte_group_vblank_chroma[k] = 0; 6096 time_per_pte_group_flip_chroma[k] = 0; 6097 } 6098 } 6099 } 6100 6101 static void CalculateVMGroupAndRequestTimes( 6102 unsigned int NumberOfActivePlanes, 6103 bool GPUVMEnable, 6104 unsigned int GPUVMMaxPageTableLevels, 6105 unsigned int HTotal[], 6106 int BytePerPixelC[], 6107 double DestinationLinesToRequestVMInVBlank[], 6108 double DestinationLinesToRequestVMInImmediateFlip[], 6109 bool DCCEnable[], 6110 double PixelClock[], 6111 int dpte_row_width_luma_ub[], 6112 int dpte_row_width_chroma_ub[], 6113 int vm_group_bytes[], 6114 unsigned int dpde0_bytes_per_frame_ub_l[], 6115 unsigned int dpde0_bytes_per_frame_ub_c[], 6116 int meta_pte_bytes_per_frame_ub_l[], 6117 int meta_pte_bytes_per_frame_ub_c[], 6118 double TimePerVMGroupVBlank[], 6119 double TimePerVMGroupFlip[], 6120 double TimePerVMRequestVBlank[], 6121 double TimePerVMRequestFlip[]) 6122 { 6123 int num_group_per_lower_vm_stage; 6124 int num_req_per_lower_vm_stage; 6125 int k; 6126 6127 for (k = 0; k < NumberOfActivePlanes; ++k) { 6128 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6129 if (DCCEnable[k] == false) { 6130 if (BytePerPixelC[k] > 0) { 6131 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6132 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6133 } else { 6134 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6135 } 6136 } else { 6137 if (GPUVMMaxPageTableLevels == 1) { 6138 if (BytePerPixelC[k] > 0) { 6139 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6140 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6141 } else { 6142 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6143 } 6144 } else { 6145 if (BytePerPixelC[k] > 0) { 6146 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6147 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6148 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6149 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6150 } else { 6151 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6152 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6153 } 6154 } 6155 } 6156 6157 if (DCCEnable[k] == false) { 6158 if (BytePerPixelC[k] > 0) { 6159 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6160 } else { 6161 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6162 } 6163 } else { 6164 if (GPUVMMaxPageTableLevels == 1) { 6165 if (BytePerPixelC[k] > 0) { 6166 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6167 } else { 6168 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6169 } 6170 } else { 6171 if (BytePerPixelC[k] > 0) { 6172 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6173 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6174 } else { 6175 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6176 } 6177 } 6178 } 6179 6180 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6181 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6182 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6183 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6184 6185 if (GPUVMMaxPageTableLevels > 2) { 6186 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6187 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6188 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6189 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6190 } 6191 6192 } else { 6193 TimePerVMGroupVBlank[k] = 0; 6194 TimePerVMGroupFlip[k] = 0; 6195 TimePerVMRequestVBlank[k] = 0; 6196 TimePerVMRequestFlip[k] = 0; 6197 } 6198 } 6199 } 6200 6201 static void CalculateStutterEfficiency( 6202 struct display_mode_lib *mode_lib, 6203 int CompressedBufferSizeInkByte, 6204 bool UnboundedRequestEnabled, 6205 int ConfigReturnBufferSizeInKByte, 6206 int MetaFIFOSizeInKEntries, 6207 int ZeroSizeBufferEntries, 6208 int NumberOfActivePlanes, 6209 int ROBBufferSizeInKByte, 6210 double TotalDataReadBandwidth, 6211 double DCFCLK, 6212 double ReturnBW, 6213 double COMPBUF_RESERVED_SPACE_64B, 6214 double COMPBUF_RESERVED_SPACE_ZS, 6215 double SRExitTime, 6216 double SRExitZ8Time, 6217 bool SynchronizedVBlank, 6218 double Z8StutterEnterPlusExitWatermark, 6219 double StutterEnterPlusExitWatermark, 6220 bool ProgressiveToInterlaceUnitInOPP, 6221 bool Interlace[], 6222 double MinTTUVBlank[], 6223 int DPPPerPlane[], 6224 unsigned int DETBufferSizeY[], 6225 int BytePerPixelY[], 6226 double BytePerPixelDETY[], 6227 double SwathWidthY[], 6228 int SwathHeightY[], 6229 int SwathHeightC[], 6230 double NetDCCRateLuma[], 6231 double NetDCCRateChroma[], 6232 double DCCFractionOfZeroSizeRequestsLuma[], 6233 double DCCFractionOfZeroSizeRequestsChroma[], 6234 int HTotal[], 6235 int VTotal[], 6236 double PixelClock[], 6237 double VRatio[], 6238 enum scan_direction_class SourceScan[], 6239 int BlockHeight256BytesY[], 6240 int BlockWidth256BytesY[], 6241 int BlockHeight256BytesC[], 6242 int BlockWidth256BytesC[], 6243 int DCCYMaxUncompressedBlock[], 6244 int DCCCMaxUncompressedBlock[], 6245 int VActive[], 6246 bool DCCEnable[], 6247 bool WritebackEnable[], 6248 double ReadBandwidthPlaneLuma[], 6249 double ReadBandwidthPlaneChroma[], 6250 double meta_row_bw[], 6251 double dpte_row_bw[], 6252 double *StutterEfficiencyNotIncludingVBlank, 6253 double *StutterEfficiency, 6254 int *NumberOfStutterBurstsPerFrame, 6255 double *Z8StutterEfficiencyNotIncludingVBlank, 6256 double *Z8StutterEfficiency, 6257 int *Z8NumberOfStutterBurstsPerFrame, 6258 double *StutterPeriod) 6259 { 6260 struct vba_vars_st *v = &mode_lib->vba; 6261 6262 double DETBufferingTimeY; 6263 double SwathWidthYCriticalPlane = 0; 6264 double VActiveTimeCriticalPlane = 0; 6265 double FrameTimeCriticalPlane = 0; 6266 int BytePerPixelYCriticalPlane = 0; 6267 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6268 double MinTTUVBlankCriticalPlane = 0; 6269 double TotalCompressedReadBandwidth; 6270 double TotalRowReadBandwidth; 6271 double AverageDCCCompressionRate; 6272 double EffectiveCompressedBufferSize; 6273 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6274 double StutterBurstTime; 6275 int TotalActiveWriteback; 6276 double LinesInDETY; 6277 double LinesInDETYRoundedDownToSwath; 6278 double MaximumEffectiveCompressionLuma; 6279 double MaximumEffectiveCompressionChroma; 6280 double TotalZeroSizeRequestReadBandwidth; 6281 double TotalZeroSizeCompressedReadBandwidth; 6282 double AverageDCCZeroSizeFraction; 6283 double AverageZeroSizeCompressionRate; 6284 int TotalNumberOfActiveOTG = 0; 6285 double LastStutterPeriod = 0.0; 6286 double LastZ8StutterPeriod = 0.0; 6287 int k; 6288 6289 TotalZeroSizeRequestReadBandwidth = 0; 6290 TotalZeroSizeCompressedReadBandwidth = 0; 6291 TotalRowReadBandwidth = 0; 6292 TotalCompressedReadBandwidth = 0; 6293 6294 for (k = 0; k < NumberOfActivePlanes; ++k) { 6295 if (DCCEnable[k] == true) { 6296 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6297 || DCCYMaxUncompressedBlock[k] < 256) { 6298 MaximumEffectiveCompressionLuma = 2; 6299 } else { 6300 MaximumEffectiveCompressionLuma = 4; 6301 } 6302 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6303 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6304 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6305 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6306 if (ReadBandwidthPlaneChroma[k] > 0) { 6307 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6308 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6309 MaximumEffectiveCompressionChroma = 2; 6310 } else { 6311 MaximumEffectiveCompressionChroma = 4; 6312 } 6313 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6314 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6315 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6316 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6317 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6318 } 6319 } else { 6320 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6321 } 6322 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6323 } 6324 6325 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6326 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6327 6328 #ifdef __DML_VBA_DEBUG__ 6329 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6330 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6331 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6332 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6333 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6334 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6335 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6336 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6337 #endif 6338 6339 if (AverageDCCZeroSizeFraction == 1) { 6340 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6341 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6342 } else if (AverageDCCZeroSizeFraction > 0) { 6343 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6344 EffectiveCompressedBufferSize = dml_min( 6345 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6346 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6347 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6348 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6349 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6350 dml_print( 6351 "DML::%s: min 2 = %f\n", 6352 __func__, 6353 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6354 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6355 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6356 } else { 6357 EffectiveCompressedBufferSize = dml_min( 6358 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6359 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6360 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6361 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6362 } 6363 6364 #ifdef __DML_VBA_DEBUG__ 6365 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6366 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6367 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6368 #endif 6369 6370 *StutterPeriod = 0; 6371 for (k = 0; k < NumberOfActivePlanes; ++k) { 6372 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6373 / BytePerPixelDETY[k] / SwathWidthY[k]; 6374 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6375 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6376 #ifdef __DML_VBA_DEBUG__ 6377 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6378 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6379 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6380 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6381 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6382 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6383 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6384 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6385 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6386 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6387 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6388 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6389 #endif 6390 6391 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6392 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6393 6394 *StutterPeriod = DETBufferingTimeY; 6395 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6396 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6397 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6398 SwathWidthYCriticalPlane = SwathWidthY[k]; 6399 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6400 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6401 6402 #ifdef __DML_VBA_DEBUG__ 6403 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6404 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6405 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6406 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6407 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6408 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6409 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6410 #endif 6411 } 6412 } 6413 6414 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6415 #ifdef __DML_VBA_DEBUG__ 6416 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6417 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6418 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6419 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6420 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6421 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6422 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6423 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6424 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6425 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6426 #endif 6427 6428 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6429 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6430 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6431 #ifdef __DML_VBA_DEBUG__ 6432 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6433 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6434 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6435 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6436 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6437 #endif 6438 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6439 6440 dml_print( 6441 "DML::%s: Time to finish residue swath=%f\n", 6442 __func__, 6443 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6444 6445 TotalActiveWriteback = 0; 6446 for (k = 0; k < NumberOfActivePlanes; ++k) { 6447 if (WritebackEnable[k]) { 6448 TotalActiveWriteback = TotalActiveWriteback + 1; 6449 } 6450 } 6451 6452 if (TotalActiveWriteback == 0) { 6453 #ifdef __DML_VBA_DEBUG__ 6454 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6455 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6456 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6457 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6458 #endif 6459 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6460 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6461 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6462 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6463 } else { 6464 *StutterEfficiencyNotIncludingVBlank = 0.; 6465 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6466 *NumberOfStutterBurstsPerFrame = 0; 6467 *Z8NumberOfStutterBurstsPerFrame = 0; 6468 } 6469 #ifdef __DML_VBA_DEBUG__ 6470 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6471 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6472 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6473 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6474 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6475 #endif 6476 6477 for (k = 0; k < NumberOfActivePlanes; ++k) { 6478 if (v->BlendingAndTiming[k] == k) { 6479 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6480 } 6481 } 6482 6483 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6484 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6485 6486 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6487 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6488 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6489 } else { 6490 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6491 } 6492 } else { 6493 *StutterEfficiency = 0; 6494 } 6495 6496 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6497 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6498 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6499 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6500 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6501 } else { 6502 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6503 } 6504 } else { 6505 *Z8StutterEfficiency = 0.; 6506 } 6507 6508 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6509 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6510 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6511 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6512 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6513 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6514 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6515 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6516 } 6517 6518 static void CalculateSwathAndDETConfiguration( 6519 bool ForceSingleDPP, 6520 int NumberOfActivePlanes, 6521 unsigned int DETBufferSizeInKByte, 6522 double MaximumSwathWidthLuma[], 6523 double MaximumSwathWidthChroma[], 6524 enum scan_direction_class SourceScan[], 6525 enum source_format_class SourcePixelFormat[], 6526 enum dm_swizzle_mode SurfaceTiling[], 6527 int ViewportWidth[], 6528 int ViewportHeight[], 6529 int SurfaceWidthY[], 6530 int SurfaceWidthC[], 6531 int SurfaceHeightY[], 6532 int SurfaceHeightC[], 6533 int Read256BytesBlockHeightY[], 6534 int Read256BytesBlockHeightC[], 6535 int Read256BytesBlockWidthY[], 6536 int Read256BytesBlockWidthC[], 6537 enum odm_combine_mode ODMCombineEnabled[], 6538 int BlendingAndTiming[], 6539 int BytePerPixY[], 6540 int BytePerPixC[], 6541 double BytePerPixDETY[], 6542 double BytePerPixDETC[], 6543 int HActive[], 6544 double HRatio[], 6545 double HRatioChroma[], 6546 int DPPPerPlane[], 6547 int swath_width_luma_ub[], 6548 int swath_width_chroma_ub[], 6549 double SwathWidth[], 6550 double SwathWidthChroma[], 6551 int SwathHeightY[], 6552 int SwathHeightC[], 6553 unsigned int DETBufferSizeY[], 6554 unsigned int DETBufferSizeC[], 6555 bool ViewportSizeSupportPerPlane[], 6556 bool *ViewportSizeSupport) 6557 { 6558 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6559 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6560 int MinimumSwathHeightY; 6561 int MinimumSwathHeightC; 6562 int RoundedUpMaxSwathSizeBytesY; 6563 int RoundedUpMaxSwathSizeBytesC; 6564 int RoundedUpMinSwathSizeBytesY; 6565 int RoundedUpMinSwathSizeBytesC; 6566 int RoundedUpSwathSizeBytesY; 6567 int RoundedUpSwathSizeBytesC; 6568 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6569 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6570 int k; 6571 6572 CalculateSwathWidth( 6573 ForceSingleDPP, 6574 NumberOfActivePlanes, 6575 SourcePixelFormat, 6576 SourceScan, 6577 ViewportWidth, 6578 ViewportHeight, 6579 SurfaceWidthY, 6580 SurfaceWidthC, 6581 SurfaceHeightY, 6582 SurfaceHeightC, 6583 ODMCombineEnabled, 6584 BytePerPixY, 6585 BytePerPixC, 6586 Read256BytesBlockHeightY, 6587 Read256BytesBlockHeightC, 6588 Read256BytesBlockWidthY, 6589 Read256BytesBlockWidthC, 6590 BlendingAndTiming, 6591 HActive, 6592 HRatio, 6593 DPPPerPlane, 6594 SwathWidthSingleDPP, 6595 SwathWidthSingleDPPChroma, 6596 SwathWidth, 6597 SwathWidthChroma, 6598 MaximumSwathHeightY, 6599 MaximumSwathHeightC, 6600 swath_width_luma_ub, 6601 swath_width_chroma_ub); 6602 6603 *ViewportSizeSupport = true; 6604 for (k = 0; k < NumberOfActivePlanes; ++k) { 6605 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6606 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6607 if (SurfaceTiling[k] == dm_sw_linear 6608 || (SourcePixelFormat[k] == dm_444_64 6609 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6610 && SourceScan[k] != dm_vert)) { 6611 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6612 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6613 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6614 } else { 6615 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6616 } 6617 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6618 } else { 6619 if (SurfaceTiling[k] == dm_sw_linear) { 6620 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6621 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6622 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6623 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6624 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6625 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6626 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6627 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6628 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6629 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6630 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6631 } else { 6632 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6633 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6634 } 6635 } 6636 6637 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6638 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6639 if (SourcePixelFormat[k] == dm_420_10) { 6640 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6641 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6642 } 6643 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6644 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6645 if (SourcePixelFormat[k] == dm_420_10) { 6646 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6647 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6648 } 6649 6650 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6651 SwathHeightY[k] = MaximumSwathHeightY[k]; 6652 SwathHeightC[k] = MaximumSwathHeightC[k]; 6653 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6654 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6655 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6656 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6657 SwathHeightY[k] = MinimumSwathHeightY; 6658 SwathHeightC[k] = MaximumSwathHeightC[k]; 6659 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6660 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6661 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6662 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6663 SwathHeightY[k] = MaximumSwathHeightY[k]; 6664 SwathHeightC[k] = MinimumSwathHeightC; 6665 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6666 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6667 } else { 6668 SwathHeightY[k] = MinimumSwathHeightY; 6669 SwathHeightC[k] = MinimumSwathHeightC; 6670 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6671 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6672 } 6673 { 6674 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6675 if (SwathHeightC[k] == 0) { 6676 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6677 DETBufferSizeC[k] = 0; 6678 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6679 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6680 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6681 } else { 6682 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6683 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6684 } 6685 6686 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6687 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6688 *ViewportSizeSupport = false; 6689 ViewportSizeSupportPerPlane[k] = false; 6690 } else { 6691 ViewportSizeSupportPerPlane[k] = true; 6692 } 6693 } 6694 } 6695 } 6696 6697 static void CalculateSwathWidth( 6698 bool ForceSingleDPP, 6699 int NumberOfActivePlanes, 6700 enum source_format_class SourcePixelFormat[], 6701 enum scan_direction_class SourceScan[], 6702 int ViewportWidth[], 6703 int ViewportHeight[], 6704 int SurfaceWidthY[], 6705 int SurfaceWidthC[], 6706 int SurfaceHeightY[], 6707 int SurfaceHeightC[], 6708 enum odm_combine_mode ODMCombineEnabled[], 6709 int BytePerPixY[], 6710 int BytePerPixC[], 6711 int Read256BytesBlockHeightY[], 6712 int Read256BytesBlockHeightC[], 6713 int Read256BytesBlockWidthY[], 6714 int Read256BytesBlockWidthC[], 6715 int BlendingAndTiming[], 6716 int HActive[], 6717 double HRatio[], 6718 int DPPPerPlane[], 6719 double SwathWidthSingleDPPY[], 6720 double SwathWidthSingleDPPC[], 6721 double SwathWidthY[], 6722 double SwathWidthC[], 6723 int MaximumSwathHeightY[], 6724 int MaximumSwathHeightC[], 6725 int swath_width_luma_ub[], 6726 int swath_width_chroma_ub[]) 6727 { 6728 enum odm_combine_mode MainPlaneODMCombine; 6729 int j, k; 6730 6731 #ifdef __DML_VBA_DEBUG__ 6732 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6733 #endif 6734 6735 for (k = 0; k < NumberOfActivePlanes; ++k) { 6736 if (SourceScan[k] != dm_vert) { 6737 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6738 } else { 6739 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6740 } 6741 6742 #ifdef __DML_VBA_DEBUG__ 6743 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6744 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6745 #endif 6746 6747 MainPlaneODMCombine = ODMCombineEnabled[k]; 6748 for (j = 0; j < NumberOfActivePlanes; ++j) { 6749 if (BlendingAndTiming[k] == j) { 6750 MainPlaneODMCombine = ODMCombineEnabled[j]; 6751 } 6752 } 6753 6754 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { 6755 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 6756 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { 6757 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 6758 } else if (DPPPerPlane[k] == 2) { 6759 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 6760 } else { 6761 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6762 } 6763 6764 #ifdef __DML_VBA_DEBUG__ 6765 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 6766 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 6767 #endif 6768 6769 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 6770 SwathWidthC[k] = SwathWidthY[k] / 2; 6771 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 6772 } else { 6773 SwathWidthC[k] = SwathWidthY[k]; 6774 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 6775 } 6776 6777 if (ForceSingleDPP == true) { 6778 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6779 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 6780 } 6781 { 6782 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 6783 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 6784 6785 #ifdef __DML_VBA_DEBUG__ 6786 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 6787 #endif 6788 6789 if (SourceScan[k] != dm_vert) { 6790 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 6791 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 6792 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 6793 if (BytePerPixC[k] > 0) { 6794 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 6795 6796 swath_width_chroma_ub[k] = dml_min( 6797 surface_width_ub_c, 6798 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 6799 } else { 6800 swath_width_chroma_ub[k] = 0; 6801 } 6802 } else { 6803 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 6804 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 6805 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 6806 if (BytePerPixC[k] > 0) { 6807 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 6808 6809 swath_width_chroma_ub[k] = dml_min( 6810 surface_height_ub_c, 6811 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 6812 } else { 6813 swath_width_chroma_ub[k] = 0; 6814 } 6815 } 6816 } 6817 } 6818 } 6819 6820 static double CalculateExtraLatency( 6821 int RoundTripPingLatencyCycles, 6822 int ReorderingBytes, 6823 double DCFCLK, 6824 int TotalNumberOfActiveDPP, 6825 int PixelChunkSizeInKByte, 6826 int TotalNumberOfDCCActiveDPP, 6827 int MetaChunkSize, 6828 double ReturnBW, 6829 bool GPUVMEnable, 6830 bool HostVMEnable, 6831 int NumberOfActivePlanes, 6832 int NumberOfDPP[], 6833 int dpte_group_bytes[], 6834 double HostVMInefficiencyFactor, 6835 double HostVMMinPageSize, 6836 int HostVMMaxNonCachedPageTableLevels) 6837 { 6838 double ExtraLatencyBytes; 6839 double ExtraLatency; 6840 6841 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6842 ReorderingBytes, 6843 TotalNumberOfActiveDPP, 6844 PixelChunkSizeInKByte, 6845 TotalNumberOfDCCActiveDPP, 6846 MetaChunkSize, 6847 GPUVMEnable, 6848 HostVMEnable, 6849 NumberOfActivePlanes, 6850 NumberOfDPP, 6851 dpte_group_bytes, 6852 HostVMInefficiencyFactor, 6853 HostVMMinPageSize, 6854 HostVMMaxNonCachedPageTableLevels); 6855 6856 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 6857 6858 #ifdef __DML_VBA_DEBUG__ 6859 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 6860 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 6861 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 6862 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 6863 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 6864 #endif 6865 6866 return ExtraLatency; 6867 } 6868 6869 static double CalculateExtraLatencyBytes( 6870 int ReorderingBytes, 6871 int TotalNumberOfActiveDPP, 6872 int PixelChunkSizeInKByte, 6873 int TotalNumberOfDCCActiveDPP, 6874 int MetaChunkSize, 6875 bool GPUVMEnable, 6876 bool HostVMEnable, 6877 int NumberOfActivePlanes, 6878 int NumberOfDPP[], 6879 int dpte_group_bytes[], 6880 double HostVMInefficiencyFactor, 6881 double HostVMMinPageSize, 6882 int HostVMMaxNonCachedPageTableLevels) 6883 { 6884 double ret; 6885 int HostVMDynamicLevels = 0, k; 6886 6887 if (GPUVMEnable == true && HostVMEnable == true) { 6888 if (HostVMMinPageSize < 2048) { 6889 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 6890 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 6891 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 6892 } else { 6893 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 6894 } 6895 } else { 6896 HostVMDynamicLevels = 0; 6897 } 6898 6899 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 6900 6901 if (GPUVMEnable == true) { 6902 for (k = 0; k < NumberOfActivePlanes; ++k) { 6903 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 6904 } 6905 } 6906 return ret; 6907 } 6908 6909 static double CalculateUrgentLatency( 6910 double UrgentLatencyPixelDataOnly, 6911 double UrgentLatencyPixelMixedWithVMData, 6912 double UrgentLatencyVMDataOnly, 6913 bool DoUrgentLatencyAdjustment, 6914 double UrgentLatencyAdjustmentFabricClockComponent, 6915 double UrgentLatencyAdjustmentFabricClockReference, 6916 double FabricClock) 6917 { 6918 double ret; 6919 6920 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 6921 if (DoUrgentLatencyAdjustment == true) { 6922 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 6923 } 6924 return ret; 6925 } 6926 6927 static void UseMinimumDCFCLK( 6928 struct display_mode_lib *mode_lib, 6929 int MaxPrefetchMode, 6930 int ReorderingBytes) 6931 { 6932 struct vba_vars_st *v = &mode_lib->vba; 6933 int dummy1, i, j, k; 6934 double NormalEfficiency, dummy2, dummy3; 6935 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 6936 6937 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 6938 for (i = 0; i < v->soc.num_states; ++i) { 6939 for (j = 0; j <= 1; ++j) { 6940 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 6941 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 6942 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 6943 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 6944 double MinimumTWait; 6945 double NonDPTEBandwidth; 6946 double DPTEBandwidth; 6947 double DCFCLKRequiredForAverageBandwidth; 6948 double ExtraLatencyBytes; 6949 double ExtraLatencyCycles; 6950 double DCFCLKRequiredForPeakBandwidth; 6951 int NoOfDPPState[DC__NUM_DPP__MAX]; 6952 double MinimumTvmPlus2Tr0; 6953 6954 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 6955 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 6956 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 6957 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 6958 } 6959 6960 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 6961 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 6962 } 6963 6964 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 6965 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 6966 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 6967 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 6968 DCFCLKRequiredForAverageBandwidth = dml_max3( 6969 v->ProjectedDCFCLKDeepSleep[i][j], 6970 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 6971 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 6972 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 6973 6974 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6975 ReorderingBytes, 6976 v->TotalNumberOfActiveDPP[i][j], 6977 v->PixelChunkSizeInKByte, 6978 v->TotalNumberOfDCCActiveDPP[i][j], 6979 v->MetaChunkSize, 6980 v->GPUVMEnable, 6981 v->HostVMEnable, 6982 v->NumberOfActivePlanes, 6983 NoOfDPPState, 6984 v->dpte_group_bytes, 6985 1, 6986 v->HostVMMinPageSize, 6987 v->HostVMMaxNonCachedPageTableLevels); 6988 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 6989 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 6990 double DCFCLKCyclesRequiredInPrefetch; 6991 double ExpectedPrefetchBWAcceleration; 6992 double PrefetchTime; 6993 6994 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 6995 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 6996 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 6997 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 6998 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 6999 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7000 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7001 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7002 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7003 DynamicMetadataVMExtraLatency[k] = 7004 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7005 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7006 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7007 - v->UrgLatency[i] 7008 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7009 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7010 - DynamicMetadataVMExtraLatency[k]; 7011 7012 if (PrefetchTime > 0) { 7013 double ExpectedVRatioPrefetch; 7014 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7015 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7016 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7017 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7018 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7019 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7020 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7021 } 7022 } else { 7023 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7024 } 7025 if (v->DynamicMetadataEnable[k] == true) { 7026 double TSetupPipe; 7027 double TdmbfPipe; 7028 double TdmsksPipe; 7029 double TdmecPipe; 7030 double AllowedTimeForUrgentExtraLatency; 7031 7032 CalculateVupdateAndDynamicMetadataParameters( 7033 v->MaxInterDCNTileRepeaters, 7034 v->RequiredDPPCLK[i][j][k], 7035 v->RequiredDISPCLK[i][j], 7036 v->ProjectedDCFCLKDeepSleep[i][j], 7037 v->PixelClock[k], 7038 v->HTotal[k], 7039 v->VTotal[k] - v->VActive[k], 7040 v->DynamicMetadataTransmittedBytes[k], 7041 v->DynamicMetadataLinesBeforeActiveRequired[k], 7042 v->Interlace[k], 7043 v->ProgressiveToInterlaceUnitInOPP, 7044 &TSetupPipe, 7045 &TdmbfPipe, 7046 &TdmecPipe, 7047 &TdmsksPipe, 7048 &dummy1, 7049 &dummy2, 7050 &dummy3); 7051 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7052 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7053 if (AllowedTimeForUrgentExtraLatency > 0) { 7054 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7055 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7056 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7057 } else { 7058 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7059 } 7060 } 7061 } 7062 DCFCLKRequiredForPeakBandwidth = 0; 7063 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 7064 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7065 } 7066 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7067 * (v->GPUVMEnable == true ? 7068 (v->HostVMEnable == true ? 7069 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7070 0); 7071 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7072 double MaximumTvmPlus2Tr0PlusTsw; 7073 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7074 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7075 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7076 } else { 7077 DCFCLKRequiredForPeakBandwidth = dml_max3( 7078 DCFCLKRequiredForPeakBandwidth, 7079 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7080 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7081 } 7082 } 7083 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7084 } 7085 } 7086 } 7087 7088 static void CalculateUnboundedRequestAndCompressedBufferSize( 7089 unsigned int DETBufferSizeInKByte, 7090 int ConfigReturnBufferSizeInKByte, 7091 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7092 int TotalActiveDPP, 7093 bool NoChromaPlanes, 7094 int MaxNumDPP, 7095 int CompressedBufferSegmentSizeInkByteFinal, 7096 enum output_encoder_class *Output, 7097 bool *UnboundedRequestEnabled, 7098 int *CompressedBufferSizeInkByte) 7099 { 7100 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7101 7102 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7103 *CompressedBufferSizeInkByte = ( 7104 *UnboundedRequestEnabled == true ? 7105 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7106 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7107 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7108 7109 #ifdef __DML_VBA_DEBUG__ 7110 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7111 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7112 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7113 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7114 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7115 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7116 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7117 #endif 7118 } 7119 7120 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7121 { 7122 bool ret_val = false; 7123 7124 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7125 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { 7126 ret_val = false; 7127 } 7128 return (ret_val); 7129 } 7130 7131