1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #define UNIT_TEST 0
28 #if !UNIT_TEST
29 #include "dc.h"
30 #endif
31 #include "../display_mode_lib.h"
32 #include "display_mode_vba_314.h"
33 #include "../dml_inline_defs.h"
34
35 /*
36 * NOTE:
37 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 *
39 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
40 * ways. Unless there is something clearly wrong with it the code should
41 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 */
43
44 #define BPP_INVALID 0
45 #define BPP_BLENDED_PIPE 0xffffffff
46 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
47 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
48
49 // For DML-C changes that hasn't been propagated to VBA yet
50 //#define __DML_VBA_ALLOW_DELTA__
51
52 // Move these to ip parameters/constant
53
54 // At which vstartup the DML start to try if the mode can be supported
55 #define __DML_VBA_MIN_VSTARTUP__ 9
56
57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
59
60 // fudge factor for min dcfclk calclation
61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
62
63 typedef struct {
64 double DPPCLK;
65 double DISPCLK;
66 double PixelClock;
67 double DCFCLKDeepSleep;
68 unsigned int DPPPerPlane;
69 bool ScalerEnabled;
70 double VRatio;
71 double VRatioChroma;
72 enum scan_direction_class SourceScan;
73 unsigned int BlockWidth256BytesY;
74 unsigned int BlockHeight256BytesY;
75 unsigned int BlockWidth256BytesC;
76 unsigned int BlockHeight256BytesC;
77 unsigned int InterlaceEnable;
78 unsigned int NumberOfCursors;
79 unsigned int VBlank;
80 unsigned int HTotal;
81 unsigned int DCCEnable;
82 bool ODMCombineIsEnabled;
83 enum source_format_class SourcePixelFormat;
84 int BytePerPixelY;
85 int BytePerPixelC;
86 bool ProgressiveToInterlaceUnitInOPP;
87 } Pipe;
88
89 #define BPP_INVALID 0
90 #define BPP_BLENDED_PIPE 0xffffffff
91
92 static bool CalculateBytePerPixelAnd256BBlockSizes(
93 enum source_format_class SourcePixelFormat,
94 enum dm_swizzle_mode SurfaceTiling,
95 unsigned int *BytePerPixelY,
96 unsigned int *BytePerPixelC,
97 double *BytePerPixelDETY,
98 double *BytePerPixelDETC,
99 unsigned int *BlockHeight256BytesY,
100 unsigned int *BlockHeight256BytesC,
101 unsigned int *BlockWidth256BytesY,
102 unsigned int *BlockWidth256BytesC);
103 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
104 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
105 static unsigned int dscceComputeDelay(
106 unsigned int bpc,
107 double BPP,
108 unsigned int sliceWidth,
109 unsigned int numSlices,
110 enum output_format_class pixelFormat,
111 enum output_encoder_class Output);
112 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
113 static bool CalculatePrefetchSchedule(
114 struct display_mode_lib *mode_lib,
115 double HostVMInefficiencyFactor,
116 Pipe *myPipe,
117 unsigned int DSCDelay,
118 double DPPCLKDelaySubtotalPlusCNVCFormater,
119 double DPPCLKDelaySCL,
120 double DPPCLKDelaySCLLBOnly,
121 double DPPCLKDelayCNVCCursor,
122 double DISPCLKDelaySubtotal,
123 unsigned int DPP_RECOUT_WIDTH,
124 enum output_format_class OutputFormat,
125 unsigned int MaxInterDCNTileRepeaters,
126 unsigned int VStartup,
127 unsigned int MaxVStartup,
128 unsigned int GPUVMPageTableLevels,
129 bool GPUVMEnable,
130 bool HostVMEnable,
131 unsigned int HostVMMaxNonCachedPageTableLevels,
132 double HostVMMinPageSize,
133 bool DynamicMetadataEnable,
134 bool DynamicMetadataVMEnabled,
135 int DynamicMetadataLinesBeforeActiveRequired,
136 unsigned int DynamicMetadataTransmittedBytes,
137 double UrgentLatency,
138 double UrgentExtraLatency,
139 double TCalc,
140 unsigned int PDEAndMetaPTEBytesFrame,
141 unsigned int MetaRowByte,
142 unsigned int PixelPTEBytesPerRow,
143 double PrefetchSourceLinesY,
144 unsigned int SwathWidthY,
145 double VInitPreFillY,
146 unsigned int MaxNumSwathY,
147 double PrefetchSourceLinesC,
148 unsigned int SwathWidthC,
149 double VInitPreFillC,
150 unsigned int MaxNumSwathC,
151 int swath_width_luma_ub,
152 int swath_width_chroma_ub,
153 unsigned int SwathHeightY,
154 unsigned int SwathHeightC,
155 double TWait,
156 double *DSTXAfterScaler,
157 double *DSTYAfterScaler,
158 double *DestinationLinesForPrefetch,
159 double *PrefetchBandwidth,
160 double *DestinationLinesToRequestVMInVBlank,
161 double *DestinationLinesToRequestRowInVBlank,
162 double *VRatioPrefetchY,
163 double *VRatioPrefetchC,
164 double *RequiredPrefetchPixDataBWLuma,
165 double *RequiredPrefetchPixDataBWChroma,
166 bool *NotEnoughTimeForDynamicMetadata,
167 double *Tno_bw,
168 double *prefetch_vmrow_bw,
169 double *Tdmdl_vm,
170 double *Tdmdl,
171 double *TSetup,
172 int *VUpdateOffsetPix,
173 double *VUpdateWidthPix,
174 double *VReadyOffsetPix);
175 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
176 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
177 static void CalculateDCCConfiguration(
178 bool DCCEnabled,
179 bool DCCProgrammingAssumesScanDirectionUnknown,
180 enum source_format_class SourcePixelFormat,
181 unsigned int SurfaceWidthLuma,
182 unsigned int SurfaceWidthChroma,
183 unsigned int SurfaceHeightLuma,
184 unsigned int SurfaceHeightChroma,
185 double DETBufferSize,
186 unsigned int RequestHeight256ByteLuma,
187 unsigned int RequestHeight256ByteChroma,
188 enum dm_swizzle_mode TilingFormat,
189 unsigned int BytePerPixelY,
190 unsigned int BytePerPixelC,
191 double BytePerPixelDETY,
192 double BytePerPixelDETC,
193 enum scan_direction_class ScanOrientation,
194 unsigned int *MaxUncompressedBlockLuma,
195 unsigned int *MaxUncompressedBlockChroma,
196 unsigned int *MaxCompressedBlockLuma,
197 unsigned int *MaxCompressedBlockChroma,
198 unsigned int *IndependentBlockLuma,
199 unsigned int *IndependentBlockChroma);
200 static double CalculatePrefetchSourceLines(
201 struct display_mode_lib *mode_lib,
202 double VRatio,
203 double vtaps,
204 bool Interlace,
205 bool ProgressiveToInterlaceUnitInOPP,
206 unsigned int SwathHeight,
207 unsigned int ViewportYStart,
208 double *VInitPreFill,
209 unsigned int *MaxNumSwath);
210 static unsigned int CalculateVMAndRowBytes(
211 struct display_mode_lib *mode_lib,
212 bool DCCEnable,
213 unsigned int BlockHeight256Bytes,
214 unsigned int BlockWidth256Bytes,
215 enum source_format_class SourcePixelFormat,
216 unsigned int SurfaceTiling,
217 unsigned int BytePerPixel,
218 enum scan_direction_class ScanDirection,
219 unsigned int SwathWidth,
220 unsigned int ViewportHeight,
221 bool GPUVMEnable,
222 bool HostVMEnable,
223 unsigned int HostVMMaxNonCachedPageTableLevels,
224 unsigned int GPUVMMinPageSize,
225 unsigned int HostVMMinPageSize,
226 unsigned int PTEBufferSizeInRequests,
227 unsigned int Pitch,
228 unsigned int DCCMetaPitch,
229 unsigned int *MacroTileWidth,
230 unsigned int *MetaRowByte,
231 unsigned int *PixelPTEBytesPerRow,
232 bool *PTEBufferSizeNotExceeded,
233 int *dpte_row_width_ub,
234 unsigned int *dpte_row_height,
235 unsigned int *MetaRequestWidth,
236 unsigned int *MetaRequestHeight,
237 unsigned int *meta_row_width,
238 unsigned int *meta_row_height,
239 int *vm_group_bytes,
240 unsigned int *dpte_group_bytes,
241 unsigned int *PixelPTEReqWidth,
242 unsigned int *PixelPTEReqHeight,
243 unsigned int *PTERequestSize,
244 int *DPDE0BytesFrame,
245 int *MetaPTEBytesFrame);
246 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
247 static void CalculateRowBandwidth(
248 bool GPUVMEnable,
249 enum source_format_class SourcePixelFormat,
250 double VRatio,
251 double VRatioChroma,
252 bool DCCEnable,
253 double LineTime,
254 unsigned int MetaRowByteLuma,
255 unsigned int MetaRowByteChroma,
256 unsigned int meta_row_height_luma,
257 unsigned int meta_row_height_chroma,
258 unsigned int PixelPTEBytesPerRowLuma,
259 unsigned int PixelPTEBytesPerRowChroma,
260 unsigned int dpte_row_height_luma,
261 unsigned int dpte_row_height_chroma,
262 double *meta_row_bw,
263 double *dpte_row_bw);
264
265 static void CalculateFlipSchedule(
266 struct display_mode_lib *mode_lib,
267 unsigned int k,
268 double HostVMInefficiencyFactor,
269 double UrgentExtraLatency,
270 double UrgentLatency,
271 double PDEAndMetaPTEBytesPerFrame,
272 double MetaRowBytes,
273 double DPTEBytesPerRow);
274 static double CalculateWriteBackDelay(
275 enum source_format_class WritebackPixelFormat,
276 double WritebackHRatio,
277 double WritebackVRatio,
278 unsigned int WritebackVTaps,
279 int WritebackDestinationWidth,
280 int WritebackDestinationHeight,
281 int WritebackSourceHeight,
282 unsigned int HTotal);
283
284 static void CalculateVupdateAndDynamicMetadataParameters(
285 int MaxInterDCNTileRepeaters,
286 double DPPCLK,
287 double DISPCLK,
288 double DCFClkDeepSleep,
289 double PixelClock,
290 int HTotal,
291 int VBlank,
292 int DynamicMetadataTransmittedBytes,
293 int DynamicMetadataLinesBeforeActiveRequired,
294 int InterlaceEnable,
295 bool ProgressiveToInterlaceUnitInOPP,
296 double *TSetup,
297 double *Tdmbf,
298 double *Tdmec,
299 double *Tdmsks,
300 int *VUpdateOffsetPix,
301 double *VUpdateWidthPix,
302 double *VReadyOffsetPix);
303
304 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
305 struct display_mode_lib *mode_lib,
306 unsigned int PrefetchMode,
307 double DCFCLK,
308 double ReturnBW,
309 double UrgentLatency,
310 double ExtraLatency,
311 double SOCCLK,
312 double DCFCLKDeepSleep,
313 unsigned int DETBufferSizeY[],
314 unsigned int DETBufferSizeC[],
315 unsigned int SwathHeightY[],
316 unsigned int SwathHeightC[],
317 double SwathWidthY[],
318 double SwathWidthC[],
319 unsigned int DPPPerPlane[],
320 double BytePerPixelDETY[],
321 double BytePerPixelDETC[],
322 bool UnboundedRequestEnabled,
323 unsigned int CompressedBufferSizeInkByte,
324 enum clock_change_support *DRAMClockChangeSupport,
325 double *StutterExitWatermark,
326 double *StutterEnterPlusExitWatermark,
327 double *Z8StutterExitWatermark,
328 double *Z8StutterEnterPlusExitWatermark);
329
330 static void CalculateDCFCLKDeepSleep(
331 struct display_mode_lib *mode_lib,
332 unsigned int NumberOfActivePlanes,
333 int BytePerPixelY[],
334 int BytePerPixelC[],
335 double VRatio[],
336 double VRatioChroma[],
337 double SwathWidthY[],
338 double SwathWidthC[],
339 unsigned int DPPPerPlane[],
340 double HRatio[],
341 double HRatioChroma[],
342 double PixelClock[],
343 double PSCL_THROUGHPUT[],
344 double PSCL_THROUGHPUT_CHROMA[],
345 double DPPCLK[],
346 double ReadBandwidthLuma[],
347 double ReadBandwidthChroma[],
348 int ReturnBusWidth,
349 double *DCFCLKDeepSleep);
350
351 static void CalculateUrgentBurstFactor(
352 int swath_width_luma_ub,
353 int swath_width_chroma_ub,
354 unsigned int SwathHeightY,
355 unsigned int SwathHeightC,
356 double LineTime,
357 double UrgentLatency,
358 double CursorBufferSize,
359 unsigned int CursorWidth,
360 unsigned int CursorBPP,
361 double VRatio,
362 double VRatioC,
363 double BytePerPixelInDETY,
364 double BytePerPixelInDETC,
365 double DETBufferSizeY,
366 double DETBufferSizeC,
367 double *UrgentBurstFactorCursor,
368 double *UrgentBurstFactorLuma,
369 double *UrgentBurstFactorChroma,
370 bool *NotEnoughUrgentLatencyHiding);
371
372 static void UseMinimumDCFCLK(
373 struct display_mode_lib *mode_lib,
374 int MaxPrefetchMode,
375 int ReorderingBytes);
376
377 static void CalculatePixelDeliveryTimes(
378 unsigned int NumberOfActivePlanes,
379 double VRatio[],
380 double VRatioChroma[],
381 double VRatioPrefetchY[],
382 double VRatioPrefetchC[],
383 unsigned int swath_width_luma_ub[],
384 unsigned int swath_width_chroma_ub[],
385 unsigned int DPPPerPlane[],
386 double HRatio[],
387 double HRatioChroma[],
388 double PixelClock[],
389 double PSCL_THROUGHPUT[],
390 double PSCL_THROUGHPUT_CHROMA[],
391 double DPPCLK[],
392 int BytePerPixelC[],
393 enum scan_direction_class SourceScan[],
394 unsigned int NumberOfCursors[],
395 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
396 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
397 unsigned int BlockWidth256BytesY[],
398 unsigned int BlockHeight256BytesY[],
399 unsigned int BlockWidth256BytesC[],
400 unsigned int BlockHeight256BytesC[],
401 double DisplayPipeLineDeliveryTimeLuma[],
402 double DisplayPipeLineDeliveryTimeChroma[],
403 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
404 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeLuma[],
406 double DisplayPipeRequestDeliveryTimeChroma[],
407 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
408 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
409 double CursorRequestDeliveryTime[],
410 double CursorRequestDeliveryTimePrefetch[]);
411
412 static void CalculateMetaAndPTETimes(
413 int NumberOfActivePlanes,
414 bool GPUVMEnable,
415 int MetaChunkSize,
416 int MinMetaChunkSizeBytes,
417 int HTotal[],
418 double VRatio[],
419 double VRatioChroma[],
420 double DestinationLinesToRequestRowInVBlank[],
421 double DestinationLinesToRequestRowInImmediateFlip[],
422 bool DCCEnable[],
423 double PixelClock[],
424 int BytePerPixelY[],
425 int BytePerPixelC[],
426 enum scan_direction_class SourceScan[],
427 int dpte_row_height[],
428 int dpte_row_height_chroma[],
429 int meta_row_width[],
430 int meta_row_width_chroma[],
431 int meta_row_height[],
432 int meta_row_height_chroma[],
433 int meta_req_width[],
434 int meta_req_width_chroma[],
435 int meta_req_height[],
436 int meta_req_height_chroma[],
437 int dpte_group_bytes[],
438 int PTERequestSizeY[],
439 int PTERequestSizeC[],
440 int PixelPTEReqWidthY[],
441 int PixelPTEReqHeightY[],
442 int PixelPTEReqWidthC[],
443 int PixelPTEReqHeightC[],
444 int dpte_row_width_luma_ub[],
445 int dpte_row_width_chroma_ub[],
446 double DST_Y_PER_PTE_ROW_NOM_L[],
447 double DST_Y_PER_PTE_ROW_NOM_C[],
448 double DST_Y_PER_META_ROW_NOM_L[],
449 double DST_Y_PER_META_ROW_NOM_C[],
450 double TimePerMetaChunkNominal[],
451 double TimePerChromaMetaChunkNominal[],
452 double TimePerMetaChunkVBlank[],
453 double TimePerChromaMetaChunkVBlank[],
454 double TimePerMetaChunkFlip[],
455 double TimePerChromaMetaChunkFlip[],
456 double time_per_pte_group_nom_luma[],
457 double time_per_pte_group_vblank_luma[],
458 double time_per_pte_group_flip_luma[],
459 double time_per_pte_group_nom_chroma[],
460 double time_per_pte_group_vblank_chroma[],
461 double time_per_pte_group_flip_chroma[]);
462
463 static void CalculateVMGroupAndRequestTimes(
464 unsigned int NumberOfActivePlanes,
465 bool GPUVMEnable,
466 unsigned int GPUVMMaxPageTableLevels,
467 unsigned int HTotal[],
468 int BytePerPixelC[],
469 double DestinationLinesToRequestVMInVBlank[],
470 double DestinationLinesToRequestVMInImmediateFlip[],
471 bool DCCEnable[],
472 double PixelClock[],
473 int dpte_row_width_luma_ub[],
474 int dpte_row_width_chroma_ub[],
475 int vm_group_bytes[],
476 unsigned int dpde0_bytes_per_frame_ub_l[],
477 unsigned int dpde0_bytes_per_frame_ub_c[],
478 int meta_pte_bytes_per_frame_ub_l[],
479 int meta_pte_bytes_per_frame_ub_c[],
480 double TimePerVMGroupVBlank[],
481 double TimePerVMGroupFlip[],
482 double TimePerVMRequestVBlank[],
483 double TimePerVMRequestFlip[]);
484
485 static void CalculateStutterEfficiency(
486 struct display_mode_lib *mode_lib,
487 int CompressedBufferSizeInkByte,
488 bool UnboundedRequestEnabled,
489 int ConfigReturnBufferSizeInKByte,
490 int MetaFIFOSizeInKEntries,
491 int ZeroSizeBufferEntries,
492 int NumberOfActivePlanes,
493 int ROBBufferSizeInKByte,
494 double TotalDataReadBandwidth,
495 double DCFCLK,
496 double ReturnBW,
497 double COMPBUF_RESERVED_SPACE_64B,
498 double COMPBUF_RESERVED_SPACE_ZS,
499 double SRExitTime,
500 double SRExitZ8Time,
501 bool SynchronizedVBlank,
502 double Z8StutterEnterPlusExitWatermark,
503 double StutterEnterPlusExitWatermark,
504 bool ProgressiveToInterlaceUnitInOPP,
505 bool Interlace[],
506 double MinTTUVBlank[],
507 int DPPPerPlane[],
508 unsigned int DETBufferSizeY[],
509 int BytePerPixelY[],
510 double BytePerPixelDETY[],
511 double SwathWidthY[],
512 int SwathHeightY[],
513 int SwathHeightC[],
514 double NetDCCRateLuma[],
515 double NetDCCRateChroma[],
516 double DCCFractionOfZeroSizeRequestsLuma[],
517 double DCCFractionOfZeroSizeRequestsChroma[],
518 int HTotal[],
519 int VTotal[],
520 double PixelClock[],
521 double VRatio[],
522 enum scan_direction_class SourceScan[],
523 int BlockHeight256BytesY[],
524 int BlockWidth256BytesY[],
525 int BlockHeight256BytesC[],
526 int BlockWidth256BytesC[],
527 int DCCYMaxUncompressedBlock[],
528 int DCCCMaxUncompressedBlock[],
529 int VActive[],
530 bool DCCEnable[],
531 bool WritebackEnable[],
532 double ReadBandwidthPlaneLuma[],
533 double ReadBandwidthPlaneChroma[],
534 double meta_row_bw[],
535 double dpte_row_bw[],
536 double *StutterEfficiencyNotIncludingVBlank,
537 double *StutterEfficiency,
538 int *NumberOfStutterBurstsPerFrame,
539 double *Z8StutterEfficiencyNotIncludingVBlank,
540 double *Z8StutterEfficiency,
541 int *Z8NumberOfStutterBurstsPerFrame,
542 double *StutterPeriod);
543
544 static void CalculateSwathAndDETConfiguration(
545 bool ForceSingleDPP,
546 int NumberOfActivePlanes,
547 unsigned int DETBufferSizeInKByte,
548 double MaximumSwathWidthLuma[],
549 double MaximumSwathWidthChroma[],
550 enum scan_direction_class SourceScan[],
551 enum source_format_class SourcePixelFormat[],
552 enum dm_swizzle_mode SurfaceTiling[],
553 int ViewportWidth[],
554 int ViewportHeight[],
555 int SurfaceWidthY[],
556 int SurfaceWidthC[],
557 int SurfaceHeightY[],
558 int SurfaceHeightC[],
559 int Read256BytesBlockHeightY[],
560 int Read256BytesBlockHeightC[],
561 int Read256BytesBlockWidthY[],
562 int Read256BytesBlockWidthC[],
563 enum odm_combine_mode ODMCombineEnabled[],
564 int BlendingAndTiming[],
565 int BytePerPixY[],
566 int BytePerPixC[],
567 double BytePerPixDETY[],
568 double BytePerPixDETC[],
569 int HActive[],
570 double HRatio[],
571 double HRatioChroma[],
572 int DPPPerPlane[],
573 int swath_width_luma_ub[],
574 int swath_width_chroma_ub[],
575 double SwathWidth[],
576 double SwathWidthChroma[],
577 int SwathHeightY[],
578 int SwathHeightC[],
579 unsigned int DETBufferSizeY[],
580 unsigned int DETBufferSizeC[],
581 bool ViewportSizeSupportPerPlane[],
582 bool *ViewportSizeSupport);
583 static void CalculateSwathWidth(
584 bool ForceSingleDPP,
585 int NumberOfActivePlanes,
586 enum source_format_class SourcePixelFormat[],
587 enum scan_direction_class SourceScan[],
588 int ViewportWidth[],
589 int ViewportHeight[],
590 int SurfaceWidthY[],
591 int SurfaceWidthC[],
592 int SurfaceHeightY[],
593 int SurfaceHeightC[],
594 enum odm_combine_mode ODMCombineEnabled[],
595 int BytePerPixY[],
596 int BytePerPixC[],
597 int Read256BytesBlockHeightY[],
598 int Read256BytesBlockHeightC[],
599 int Read256BytesBlockWidthY[],
600 int Read256BytesBlockWidthC[],
601 int BlendingAndTiming[],
602 int HActive[],
603 double HRatio[],
604 int DPPPerPlane[],
605 double SwathWidthSingleDPPY[],
606 double SwathWidthSingleDPPC[],
607 double SwathWidthY[],
608 double SwathWidthC[],
609 int MaximumSwathHeightY[],
610 int MaximumSwathHeightC[],
611 int swath_width_luma_ub[],
612 int swath_width_chroma_ub[]);
613
614 static double CalculateExtraLatency(
615 int RoundTripPingLatencyCycles,
616 int ReorderingBytes,
617 double DCFCLK,
618 int TotalNumberOfActiveDPP,
619 int PixelChunkSizeInKByte,
620 int TotalNumberOfDCCActiveDPP,
621 int MetaChunkSize,
622 double ReturnBW,
623 bool GPUVMEnable,
624 bool HostVMEnable,
625 int NumberOfActivePlanes,
626 int NumberOfDPP[],
627 int dpte_group_bytes[],
628 double HostVMInefficiencyFactor,
629 double HostVMMinPageSize,
630 int HostVMMaxNonCachedPageTableLevels);
631
632 static double CalculateExtraLatencyBytes(
633 int ReorderingBytes,
634 int TotalNumberOfActiveDPP,
635 int PixelChunkSizeInKByte,
636 int TotalNumberOfDCCActiveDPP,
637 int MetaChunkSize,
638 bool GPUVMEnable,
639 bool HostVMEnable,
640 int NumberOfActivePlanes,
641 int NumberOfDPP[],
642 int dpte_group_bytes[],
643 double HostVMInefficiencyFactor,
644 double HostVMMinPageSize,
645 int HostVMMaxNonCachedPageTableLevels);
646
647 static double CalculateUrgentLatency(
648 double UrgentLatencyPixelDataOnly,
649 double UrgentLatencyPixelMixedWithVMData,
650 double UrgentLatencyVMDataOnly,
651 bool DoUrgentLatencyAdjustment,
652 double UrgentLatencyAdjustmentFabricClockComponent,
653 double UrgentLatencyAdjustmentFabricClockReference,
654 double FabricClockSingle);
655
656 static void CalculateUnboundedRequestAndCompressedBufferSize(
657 unsigned int DETBufferSizeInKByte,
658 int ConfigReturnBufferSizeInKByte,
659 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
660 int TotalActiveDPP,
661 bool NoChromaPlanes,
662 int MaxNumDPP,
663 int CompressedBufferSegmentSizeInkByteFinal,
664 enum output_encoder_class *Output,
665 bool *UnboundedRequestEnabled,
666 int *CompressedBufferSizeInkByte);
667
668 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
669 static unsigned int CalculateMaxVStartup(
670 unsigned int VTotal,
671 unsigned int VActive,
672 unsigned int VBlankNom,
673 unsigned int HTotal,
674 double PixelClock,
675 bool ProgressiveTointerlaceUnitinOPP,
676 bool Interlace,
677 unsigned int VBlankNomDefaultUS,
678 double WritebackDelayTime);
679
dml314_recalculate(struct display_mode_lib * mode_lib)680 void dml314_recalculate(struct display_mode_lib *mode_lib)
681 {
682 ModeSupportAndSystemConfiguration(mode_lib);
683 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
684 DisplayPipeConfiguration(mode_lib);
685 #ifdef __DML_VBA_DEBUG__
686 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
687 #endif
688 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
689 }
690
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)691 static unsigned int dscceComputeDelay(
692 unsigned int bpc,
693 double BPP,
694 unsigned int sliceWidth,
695 unsigned int numSlices,
696 enum output_format_class pixelFormat,
697 enum output_encoder_class Output)
698 {
699 // valid bpc = source bits per component in the set of {8, 10, 12}
700 // valid bpp = increments of 1/16 of a bit
701 // min = 6/7/8 in N420/N422/444, respectively
702 // max = such that compression is 1:1
703 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
704 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
705 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
706
707 // fixed value
708 unsigned int rcModelSize = 8192;
709
710 // N422/N420 operate at 2 pixels per clock
711 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
712
713 if (pixelFormat == dm_420)
714 pixelsPerClock = 2;
715 else if (pixelFormat == dm_444)
716 pixelsPerClock = 1;
717 else if (pixelFormat == dm_n422)
718 pixelsPerClock = 2;
719 // #all other modes operate at 1 pixel per clock
720 else
721 pixelsPerClock = 1;
722
723 //initial transmit delay as per PPS
724 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
725
726 //compute ssm delay
727 if (bpc == 8)
728 D = 81;
729 else if (bpc == 10)
730 D = 89;
731 else
732 D = 113;
733
734 //divide by pixel per cycle to compute slice width as seen by DSC
735 w = sliceWidth / pixelsPerClock;
736
737 //422 mode has an additional cycle of delay
738 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
739 s = 0;
740 else
741 s = 1;
742
743 //main calculation for the dscce
744 ix = initalXmitDelay + 45;
745 wx = (w + 2) / 3;
746 P = 3 * wx - w;
747 l0 = ix / w;
748 a = ix + P * l0;
749 ax = (a + 2) / 3 + D + 6 + 1;
750 L = (ax + wx - 1) / wx;
751 if ((ix % w) == 0 && P != 0)
752 lstall = 1;
753 else
754 lstall = 0;
755 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
756
757 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
758 pixels = Delay * 3 * pixelsPerClock;
759 return pixels;
760 }
761
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)762 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
763 {
764 unsigned int Delay = 0;
765
766 if (pixelFormat == dm_420) {
767 // sfr
768 Delay = Delay + 2;
769 // dsccif
770 Delay = Delay + 0;
771 // dscc - input deserializer
772 Delay = Delay + 3;
773 // dscc gets pixels every other cycle
774 Delay = Delay + 2;
775 // dscc - input cdc fifo
776 Delay = Delay + 12;
777 // dscc gets pixels every other cycle
778 Delay = Delay + 13;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 7;
783 // dscc gets pixels every other cycle
784 Delay = Delay + 3;
785 // dscc - cdc uncertainty
786 Delay = Delay + 2;
787 // dscc - output serializer
788 Delay = Delay + 1;
789 // sft
790 Delay = Delay + 1;
791 } else if (pixelFormat == dm_n422) {
792 // sfr
793 Delay = Delay + 2;
794 // dsccif
795 Delay = Delay + 1;
796 // dscc - input deserializer
797 Delay = Delay + 5;
798 // dscc - input cdc fifo
799 Delay = Delay + 25;
800 // dscc - cdc uncertainty
801 Delay = Delay + 2;
802 // dscc - output cdc fifo
803 Delay = Delay + 10;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // dscc - output serializer
807 Delay = Delay + 1;
808 // sft
809 Delay = Delay + 1;
810 } else {
811 // sfr
812 Delay = Delay + 2;
813 // dsccif
814 Delay = Delay + 0;
815 // dscc - input deserializer
816 Delay = Delay + 3;
817 // dscc - input cdc fifo
818 Delay = Delay + 12;
819 // dscc - cdc uncertainty
820 Delay = Delay + 2;
821 // dscc - output cdc fifo
822 Delay = Delay + 7;
823 // dscc - output serializer
824 Delay = Delay + 1;
825 // dscc - cdc uncertainty
826 Delay = Delay + 2;
827 // sft
828 Delay = Delay + 1;
829 }
830
831 return Delay;
832 }
833
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)834 static bool CalculatePrefetchSchedule(
835 struct display_mode_lib *mode_lib,
836 double HostVMInefficiencyFactor,
837 Pipe *myPipe,
838 unsigned int DSCDelay,
839 double DPPCLKDelaySubtotalPlusCNVCFormater,
840 double DPPCLKDelaySCL,
841 double DPPCLKDelaySCLLBOnly,
842 double DPPCLKDelayCNVCCursor,
843 double DISPCLKDelaySubtotal,
844 unsigned int DPP_RECOUT_WIDTH,
845 enum output_format_class OutputFormat,
846 unsigned int MaxInterDCNTileRepeaters,
847 unsigned int VStartup,
848 unsigned int MaxVStartup,
849 unsigned int GPUVMPageTableLevels,
850 bool GPUVMEnable,
851 bool HostVMEnable,
852 unsigned int HostVMMaxNonCachedPageTableLevels,
853 double HostVMMinPageSize,
854 bool DynamicMetadataEnable,
855 bool DynamicMetadataVMEnabled,
856 int DynamicMetadataLinesBeforeActiveRequired,
857 unsigned int DynamicMetadataTransmittedBytes,
858 double UrgentLatency,
859 double UrgentExtraLatency,
860 double TCalc,
861 unsigned int PDEAndMetaPTEBytesFrame,
862 unsigned int MetaRowByte,
863 unsigned int PixelPTEBytesPerRow,
864 double PrefetchSourceLinesY,
865 unsigned int SwathWidthY,
866 double VInitPreFillY,
867 unsigned int MaxNumSwathY,
868 double PrefetchSourceLinesC,
869 unsigned int SwathWidthC,
870 double VInitPreFillC,
871 unsigned int MaxNumSwathC,
872 int swath_width_luma_ub,
873 int swath_width_chroma_ub,
874 unsigned int SwathHeightY,
875 unsigned int SwathHeightC,
876 double TWait,
877 double *DSTXAfterScaler,
878 double *DSTYAfterScaler,
879 double *DestinationLinesForPrefetch,
880 double *PrefetchBandwidth,
881 double *DestinationLinesToRequestVMInVBlank,
882 double *DestinationLinesToRequestRowInVBlank,
883 double *VRatioPrefetchY,
884 double *VRatioPrefetchC,
885 double *RequiredPrefetchPixDataBWLuma,
886 double *RequiredPrefetchPixDataBWChroma,
887 bool *NotEnoughTimeForDynamicMetadata,
888 double *Tno_bw,
889 double *prefetch_vmrow_bw,
890 double *Tdmdl_vm,
891 double *Tdmdl,
892 double *TSetup,
893 int *VUpdateOffsetPix,
894 double *VUpdateWidthPix,
895 double *VReadyOffsetPix)
896 {
897 bool MyError = false;
898 unsigned int DPPCycles, DISPCLKCycles;
899 double DSTTotalPixelsAfterScaler;
900 double LineTime;
901 double dst_y_prefetch_equ;
902 #ifdef __DML_VBA_DEBUG__
903 double Tsw_oto;
904 #endif
905 double prefetch_bw_oto;
906 double prefetch_bw_pr;
907 double Tvm_oto;
908 double Tr0_oto;
909 double Tvm_oto_lines;
910 double Tr0_oto_lines;
911 double dst_y_prefetch_oto;
912 double TimeForFetchingMetaPTE = 0;
913 double TimeForFetchingRowInVBlank = 0;
914 double LinesToRequestPrefetchPixelData = 0;
915 unsigned int HostVMDynamicLevelsTrips;
916 double trip_to_mem;
917 double Tvm_trips;
918 double Tr0_trips;
919 double Tvm_trips_rounded;
920 double Tr0_trips_rounded;
921 double Lsw_oto;
922 double Tpre_rounded;
923 double prefetch_bw_equ;
924 double Tvm_equ;
925 double Tr0_equ;
926 double Tdmbf;
927 double Tdmec;
928 double Tdmsks;
929 double prefetch_sw_bytes;
930 double bytes_pp;
931 double dep_bytes;
932 int max_vratio_pre = 4;
933 double min_Lsw;
934 double Tsw_est1 = 0;
935 double Tsw_est3 = 0;
936 double max_Tsw = 0;
937
938 if (GPUVMEnable == true && HostVMEnable == true) {
939 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
940 } else {
941 HostVMDynamicLevelsTrips = 0;
942 }
943 #ifdef __DML_VBA_DEBUG__
944 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
945 #endif
946 CalculateVupdateAndDynamicMetadataParameters(
947 MaxInterDCNTileRepeaters,
948 myPipe->DPPCLK,
949 myPipe->DISPCLK,
950 myPipe->DCFCLKDeepSleep,
951 myPipe->PixelClock,
952 myPipe->HTotal,
953 myPipe->VBlank,
954 DynamicMetadataTransmittedBytes,
955 DynamicMetadataLinesBeforeActiveRequired,
956 myPipe->InterlaceEnable,
957 myPipe->ProgressiveToInterlaceUnitInOPP,
958 TSetup,
959 &Tdmbf,
960 &Tdmec,
961 &Tdmsks,
962 VUpdateOffsetPix,
963 VUpdateWidthPix,
964 VReadyOffsetPix);
965
966 LineTime = myPipe->HTotal / myPipe->PixelClock;
967 trip_to_mem = UrgentLatency;
968 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
969
970 #ifdef __DML_VBA_ALLOW_DELTA__
971 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
972 #else
973 if (DynamicMetadataVMEnabled == true) {
974 #endif
975 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
976 } else {
977 *Tdmdl = TWait + UrgentExtraLatency;
978 }
979
980 #ifdef __DML_VBA_ALLOW_DELTA__
981 if (DynamicMetadataEnable == false) {
982 *Tdmdl = 0.0;
983 }
984 #endif
985
986 if (DynamicMetadataEnable == true) {
987 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
988 *NotEnoughTimeForDynamicMetadata = true;
989 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
990 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
991 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
992 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
993 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
994 } else {
995 *NotEnoughTimeForDynamicMetadata = false;
996 }
997 } else {
998 *NotEnoughTimeForDynamicMetadata = false;
999 }
1000
1001 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1002
1003 if (myPipe->ScalerEnabled)
1004 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1005 else
1006 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1007
1008 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1009
1010 DISPCLKCycles = DISPCLKDelaySubtotal;
1011
1012 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1013 return true;
1014
1015 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1016
1017 #ifdef __DML_VBA_DEBUG__
1018 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1019 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1020 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1021 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1022 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1023 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1024 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1025 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1026 #endif
1027
1028 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1029
1030 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1031 *DSTYAfterScaler = 1;
1032 else
1033 *DSTYAfterScaler = 0;
1034
1035 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1036 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1037 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1038
1039 #ifdef __DML_VBA_DEBUG__
1040 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1041 #endif
1042
1043 MyError = false;
1044
1045 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1046 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1047 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1048
1049 #ifdef __DML_VBA_ALLOW_DELTA__
1050 if (!myPipe->DCCEnable) {
1051 Tr0_trips = 0.0;
1052 Tr0_trips_rounded = 0.0;
1053 }
1054 #endif
1055
1056 if (!GPUVMEnable) {
1057 Tvm_trips = 0.0;
1058 Tvm_trips_rounded = 0.0;
1059 }
1060
1061 if (GPUVMEnable) {
1062 if (GPUVMPageTableLevels >= 3) {
1063 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1064 } else {
1065 *Tno_bw = 0;
1066 }
1067 } else if (!myPipe->DCCEnable) {
1068 *Tno_bw = LineTime;
1069 } else {
1070 *Tno_bw = LineTime / 4;
1071 }
1072
1073 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1074 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1075 else
1076 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1077 /*rev 99*/
1078 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1079 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1080 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1081 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1082 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1083
1084 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1085 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1086 #ifdef __DML_VBA_DEBUG__
1087 Tsw_oto = Lsw_oto * LineTime;
1088 #endif
1089
1090
1091 #ifdef __DML_VBA_DEBUG__
1092 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1093 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1094 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1095 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1096 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1097 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1098 #endif
1099
1100 if (GPUVMEnable == true)
1101 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1102 else
1103 Tvm_oto = LineTime / 4.0;
1104
1105 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1106 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1107 LineTime - Tvm_oto,
1108 LineTime / 4);
1109 } else {
1110 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1111 }
1112
1113 #ifdef __DML_VBA_DEBUG__
1114 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1115 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1116 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1117 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1118 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1119 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1120 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1121 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1122 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1123 #endif
1124
1125 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1126 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1127 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1128 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1129 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1130 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1131
1132 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1133
1134 if (prefetch_sw_bytes < dep_bytes)
1135 prefetch_sw_bytes = 2 * dep_bytes;
1136
1137 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1138 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1139 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1140 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1141 dml_print("DML: LineTime: %f\n", LineTime);
1142 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1143
1144 dml_print("DML: LineTime: %f\n", LineTime);
1145 dml_print("DML: VStartup: %d\n", VStartup);
1146 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1147 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1148 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1149 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1150 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1151 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1152 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1153 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1154 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1155 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1156 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1157
1158 *PrefetchBandwidth = 0;
1159 *DestinationLinesToRequestVMInVBlank = 0;
1160 *DestinationLinesToRequestRowInVBlank = 0;
1161 *VRatioPrefetchY = 0;
1162 *VRatioPrefetchC = 0;
1163 *RequiredPrefetchPixDataBWLuma = 0;
1164 if (dst_y_prefetch_equ > 1) {
1165 double PrefetchBandwidth1;
1166 double PrefetchBandwidth2;
1167 double PrefetchBandwidth3;
1168 double PrefetchBandwidth4;
1169
1170 if (Tpre_rounded - *Tno_bw > 0) {
1171 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1172 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1173 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1174 } else {
1175 PrefetchBandwidth1 = 0;
1176 }
1177
1178 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1179 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1180 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1181 }
1182
1183 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1184 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1185 else
1186 PrefetchBandwidth2 = 0;
1187
1188 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1189 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1190 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1191 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1192 } else {
1193 PrefetchBandwidth3 = 0;
1194 }
1195
1196 #ifdef __DML_VBA_DEBUG__
1197 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1198 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1199 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1200 #endif
1201 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1202 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1203 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1204 }
1205
1206 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1207 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1208 else
1209 PrefetchBandwidth4 = 0;
1210
1211 {
1212 bool Case1OK;
1213 bool Case2OK;
1214 bool Case3OK;
1215
1216 if (PrefetchBandwidth1 > 0) {
1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1219 Case1OK = true;
1220 } else {
1221 Case1OK = false;
1222 }
1223 } else {
1224 Case1OK = false;
1225 }
1226
1227 if (PrefetchBandwidth2 > 0) {
1228 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1229 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1230 Case2OK = true;
1231 } else {
1232 Case2OK = false;
1233 }
1234 } else {
1235 Case2OK = false;
1236 }
1237
1238 if (PrefetchBandwidth3 > 0) {
1239 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1240 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1241 Case3OK = true;
1242 } else {
1243 Case3OK = false;
1244 }
1245 } else {
1246 Case3OK = false;
1247 }
1248
1249 if (Case1OK) {
1250 prefetch_bw_equ = PrefetchBandwidth1;
1251 } else if (Case2OK) {
1252 prefetch_bw_equ = PrefetchBandwidth2;
1253 } else if (Case3OK) {
1254 prefetch_bw_equ = PrefetchBandwidth3;
1255 } else {
1256 prefetch_bw_equ = PrefetchBandwidth4;
1257 }
1258
1259 #ifdef __DML_VBA_DEBUG__
1260 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1261 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1262 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1263 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1264 #endif
1265
1266 if (prefetch_bw_equ > 0) {
1267 if (GPUVMEnable == true) {
1268 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1269 } else {
1270 Tvm_equ = LineTime / 4;
1271 }
1272
1273 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1274 Tr0_equ = dml_max4(
1275 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1276 Tr0_trips,
1277 (LineTime - Tvm_equ) / 2,
1278 LineTime / 4);
1279 } else {
1280 Tr0_equ = (LineTime - Tvm_equ) / 2;
1281 }
1282 } else {
1283 Tvm_equ = 0;
1284 Tr0_equ = 0;
1285 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1286 }
1287 }
1288
1289 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1290 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1291 TimeForFetchingMetaPTE = Tvm_oto;
1292 TimeForFetchingRowInVBlank = Tr0_oto;
1293 *PrefetchBandwidth = prefetch_bw_oto;
1294 } else {
1295 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1296 TimeForFetchingMetaPTE = Tvm_equ;
1297 TimeForFetchingRowInVBlank = Tr0_equ;
1298 *PrefetchBandwidth = prefetch_bw_equ;
1299 }
1300
1301 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1302
1303 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1304
1305 #ifdef __DML_VBA_ALLOW_DELTA__
1306 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1307 // See note above dated 5/30/2018
1308 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1309 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1310 #else
1311 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1312 #endif
1313
1314 #ifdef __DML_VBA_DEBUG__
1315 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1316 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1317 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1318 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1319 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1320 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1321 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1322 #endif
1323
1324 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1325
1326 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1327 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1328 #ifdef __DML_VBA_DEBUG__
1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1330 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1331 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1332 #endif
1333 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1334 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1335 *VRatioPrefetchY = dml_max(
1336 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1337 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1338 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1339 } else {
1340 MyError = true;
1341 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1342 *VRatioPrefetchY = 0;
1343 }
1344 #ifdef __DML_VBA_DEBUG__
1345 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1346 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1347 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1348 #endif
1349 }
1350
1351 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1352 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1353
1354 #ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1356 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1357 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1358 #endif
1359 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1360 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1361 *VRatioPrefetchC = dml_max(
1362 *VRatioPrefetchC,
1363 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1364 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1365 } else {
1366 MyError = true;
1367 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1368 *VRatioPrefetchC = 0;
1369 }
1370 #ifdef __DML_VBA_DEBUG__
1371 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1372 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1373 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1374 #endif
1375 }
1376
1377 #ifdef __DML_VBA_DEBUG__
1378 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1379 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1380 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1381 #endif
1382
1383 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1384
1385 #ifdef __DML_VBA_DEBUG__
1386 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1387 #endif
1388
1389 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1390 / LineTime;
1391 } else {
1392 MyError = true;
1393 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1394 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1395 *VRatioPrefetchY = 0;
1396 *VRatioPrefetchC = 0;
1397 *RequiredPrefetchPixDataBWLuma = 0;
1398 *RequiredPrefetchPixDataBWChroma = 0;
1399 }
1400
1401 dml_print(
1402 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1403 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1404 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1405 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1406 dml_print(
1407 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1408 (double) LinesToRequestPrefetchPixelData * LineTime);
1409 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1410 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1411 dml_print(
1412 "DML: Tslack(pre): %fus - time left over in schedule\n",
1413 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1414 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1415 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1416
1417 } else {
1418 MyError = true;
1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1420 }
1421
1422 {
1423 double prefetch_vm_bw;
1424 double prefetch_row_bw;
1425
1426 if (PDEAndMetaPTEBytesFrame == 0) {
1427 prefetch_vm_bw = 0;
1428 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1429 #ifdef __DML_VBA_DEBUG__
1430 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1431 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1432 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1433 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1434 #endif
1435 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1436 #ifdef __DML_VBA_DEBUG__
1437 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1438 #endif
1439 } else {
1440 prefetch_vm_bw = 0;
1441 MyError = true;
1442 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1443 }
1444
1445 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1446 prefetch_row_bw = 0;
1447 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1448 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1449
1450 #ifdef __DML_VBA_DEBUG__
1451 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1452 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1453 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1454 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1455 #endif
1456 } else {
1457 prefetch_row_bw = 0;
1458 MyError = true;
1459 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1460 }
1461
1462 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1463 }
1464
1465 if (MyError) {
1466 *PrefetchBandwidth = 0;
1467 TimeForFetchingMetaPTE = 0;
1468 TimeForFetchingRowInVBlank = 0;
1469 *DestinationLinesToRequestVMInVBlank = 0;
1470 *DestinationLinesToRequestRowInVBlank = 0;
1471 *DestinationLinesForPrefetch = 0;
1472 LinesToRequestPrefetchPixelData = 0;
1473 *VRatioPrefetchY = 0;
1474 *VRatioPrefetchC = 0;
1475 *RequiredPrefetchPixDataBWLuma = 0;
1476 *RequiredPrefetchPixDataBWChroma = 0;
1477 }
1478
1479 return MyError;
1480 }
1481
1482 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1483 {
1484 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1485 }
1486
1487 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1488 {
1489 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1490 }
1491
1492 static void CalculateDCCConfiguration(
1493 bool DCCEnabled,
1494 bool DCCProgrammingAssumesScanDirectionUnknown,
1495 enum source_format_class SourcePixelFormat,
1496 unsigned int SurfaceWidthLuma,
1497 unsigned int SurfaceWidthChroma,
1498 unsigned int SurfaceHeightLuma,
1499 unsigned int SurfaceHeightChroma,
1500 double DETBufferSize,
1501 unsigned int RequestHeight256ByteLuma,
1502 unsigned int RequestHeight256ByteChroma,
1503 enum dm_swizzle_mode TilingFormat,
1504 unsigned int BytePerPixelY,
1505 unsigned int BytePerPixelC,
1506 double BytePerPixelDETY,
1507 double BytePerPixelDETC,
1508 enum scan_direction_class ScanOrientation,
1509 unsigned int *MaxUncompressedBlockLuma,
1510 unsigned int *MaxUncompressedBlockChroma,
1511 unsigned int *MaxCompressedBlockLuma,
1512 unsigned int *MaxCompressedBlockChroma,
1513 unsigned int *IndependentBlockLuma,
1514 unsigned int *IndependentBlockChroma)
1515 {
1516 int yuv420;
1517 int horz_div_l;
1518 int horz_div_c;
1519 int vert_div_l;
1520 int vert_div_c;
1521
1522 int swath_buf_size;
1523 double detile_buf_vp_horz_limit;
1524 double detile_buf_vp_vert_limit;
1525
1526 int MAS_vp_horz_limit;
1527 int MAS_vp_vert_limit;
1528 int max_vp_horz_width;
1529 int max_vp_vert_height;
1530 int eff_surf_width_l;
1531 int eff_surf_width_c;
1532 int eff_surf_height_l;
1533 int eff_surf_height_c;
1534
1535 int full_swath_bytes_horz_wc_l;
1536 int full_swath_bytes_horz_wc_c;
1537 int full_swath_bytes_vert_wc_l;
1538 int full_swath_bytes_vert_wc_c;
1539 int req128_horz_wc_l;
1540 int req128_horz_wc_c;
1541 int req128_vert_wc_l;
1542 int req128_vert_wc_c;
1543 int segment_order_horz_contiguous_luma;
1544 int segment_order_horz_contiguous_chroma;
1545 int segment_order_vert_contiguous_luma;
1546 int segment_order_vert_contiguous_chroma;
1547
1548 typedef enum {
1549 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1550 } RequestType;
1551 RequestType RequestLuma;
1552 RequestType RequestChroma;
1553
1554 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1555 horz_div_l = 1;
1556 horz_div_c = 1;
1557 vert_div_l = 1;
1558 vert_div_c = 1;
1559
1560 if (BytePerPixelY == 1)
1561 vert_div_l = 0;
1562 if (BytePerPixelC == 1)
1563 vert_div_c = 0;
1564 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1565 horz_div_l = 0;
1566 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1567 horz_div_c = 0;
1568
1569 if (BytePerPixelC == 0) {
1570 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1571 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1572 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1573 } else {
1574 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1575 detile_buf_vp_horz_limit = (double) swath_buf_size
1576 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1577 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1578 detile_buf_vp_vert_limit = (double) swath_buf_size
1579 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1580 }
1581
1582 if (SourcePixelFormat == dm_420_10) {
1583 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1584 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1585 }
1586
1587 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1588 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1589
1590 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1591 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1592 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1593 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1594 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1595 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1596 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1597 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1598
1599 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1600 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1601 if (BytePerPixelC > 0) {
1602 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1603 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1604 } else {
1605 full_swath_bytes_horz_wc_c = 0;
1606 full_swath_bytes_vert_wc_c = 0;
1607 }
1608
1609 if (SourcePixelFormat == dm_420_10) {
1610 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1611 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1612 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1613 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1614 }
1615
1616 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1617 req128_horz_wc_l = 0;
1618 req128_horz_wc_c = 0;
1619 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1620 req128_horz_wc_l = 0;
1621 req128_horz_wc_c = 1;
1622 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1623 req128_horz_wc_l = 1;
1624 req128_horz_wc_c = 0;
1625 } else {
1626 req128_horz_wc_l = 1;
1627 req128_horz_wc_c = 1;
1628 }
1629
1630 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1631 req128_vert_wc_l = 0;
1632 req128_vert_wc_c = 0;
1633 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1634 req128_vert_wc_l = 0;
1635 req128_vert_wc_c = 1;
1636 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1637 req128_vert_wc_l = 1;
1638 req128_vert_wc_c = 0;
1639 } else {
1640 req128_vert_wc_l = 1;
1641 req128_vert_wc_c = 1;
1642 }
1643
1644 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1645 segment_order_horz_contiguous_luma = 0;
1646 } else {
1647 segment_order_horz_contiguous_luma = 1;
1648 }
1649 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1650 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1651 segment_order_vert_contiguous_luma = 0;
1652 } else {
1653 segment_order_vert_contiguous_luma = 1;
1654 }
1655 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1656 segment_order_horz_contiguous_chroma = 0;
1657 } else {
1658 segment_order_horz_contiguous_chroma = 1;
1659 }
1660 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1661 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1662 segment_order_vert_contiguous_chroma = 0;
1663 } else {
1664 segment_order_vert_contiguous_chroma = 1;
1665 }
1666
1667 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1668 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1669 RequestLuma = REQ_256Bytes;
1670 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1671 RequestLuma = REQ_128BytesNonContiguous;
1672 } else {
1673 RequestLuma = REQ_128BytesContiguous;
1674 }
1675 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1676 RequestChroma = REQ_256Bytes;
1677 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1678 RequestChroma = REQ_128BytesNonContiguous;
1679 } else {
1680 RequestChroma = REQ_128BytesContiguous;
1681 }
1682 } else if (ScanOrientation != dm_vert) {
1683 if (req128_horz_wc_l == 0) {
1684 RequestLuma = REQ_256Bytes;
1685 } else if (segment_order_horz_contiguous_luma == 0) {
1686 RequestLuma = REQ_128BytesNonContiguous;
1687 } else {
1688 RequestLuma = REQ_128BytesContiguous;
1689 }
1690 if (req128_horz_wc_c == 0) {
1691 RequestChroma = REQ_256Bytes;
1692 } else if (segment_order_horz_contiguous_chroma == 0) {
1693 RequestChroma = REQ_128BytesNonContiguous;
1694 } else {
1695 RequestChroma = REQ_128BytesContiguous;
1696 }
1697 } else {
1698 if (req128_vert_wc_l == 0) {
1699 RequestLuma = REQ_256Bytes;
1700 } else if (segment_order_vert_contiguous_luma == 0) {
1701 RequestLuma = REQ_128BytesNonContiguous;
1702 } else {
1703 RequestLuma = REQ_128BytesContiguous;
1704 }
1705 if (req128_vert_wc_c == 0) {
1706 RequestChroma = REQ_256Bytes;
1707 } else if (segment_order_vert_contiguous_chroma == 0) {
1708 RequestChroma = REQ_128BytesNonContiguous;
1709 } else {
1710 RequestChroma = REQ_128BytesContiguous;
1711 }
1712 }
1713
1714 if (RequestLuma == REQ_256Bytes) {
1715 *MaxUncompressedBlockLuma = 256;
1716 *MaxCompressedBlockLuma = 256;
1717 *IndependentBlockLuma = 0;
1718 } else if (RequestLuma == REQ_128BytesContiguous) {
1719 *MaxUncompressedBlockLuma = 256;
1720 *MaxCompressedBlockLuma = 128;
1721 *IndependentBlockLuma = 128;
1722 } else {
1723 *MaxUncompressedBlockLuma = 256;
1724 *MaxCompressedBlockLuma = 64;
1725 *IndependentBlockLuma = 64;
1726 }
1727
1728 if (RequestChroma == REQ_256Bytes) {
1729 *MaxUncompressedBlockChroma = 256;
1730 *MaxCompressedBlockChroma = 256;
1731 *IndependentBlockChroma = 0;
1732 } else if (RequestChroma == REQ_128BytesContiguous) {
1733 *MaxUncompressedBlockChroma = 256;
1734 *MaxCompressedBlockChroma = 128;
1735 *IndependentBlockChroma = 128;
1736 } else {
1737 *MaxUncompressedBlockChroma = 256;
1738 *MaxCompressedBlockChroma = 64;
1739 *IndependentBlockChroma = 64;
1740 }
1741
1742 if (DCCEnabled != true || BytePerPixelC == 0) {
1743 *MaxUncompressedBlockChroma = 0;
1744 *MaxCompressedBlockChroma = 0;
1745 *IndependentBlockChroma = 0;
1746 }
1747
1748 if (DCCEnabled != true) {
1749 *MaxUncompressedBlockLuma = 0;
1750 *MaxCompressedBlockLuma = 0;
1751 *IndependentBlockLuma = 0;
1752 }
1753 }
1754
1755 static double CalculatePrefetchSourceLines(
1756 struct display_mode_lib *mode_lib,
1757 double VRatio,
1758 double vtaps,
1759 bool Interlace,
1760 bool ProgressiveToInterlaceUnitInOPP,
1761 unsigned int SwathHeight,
1762 unsigned int ViewportYStart,
1763 double *VInitPreFill,
1764 unsigned int *MaxNumSwath)
1765 {
1766 struct vba_vars_st *v = &mode_lib->vba;
1767 unsigned int MaxPartialSwath;
1768
1769 if (ProgressiveToInterlaceUnitInOPP)
1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1771 else
1772 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1773
1774 if (!v->IgnoreViewportPositioning) {
1775
1776 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1777
1778 if (*VInitPreFill > 1.0)
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1780 else
1781 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1782 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1783
1784 } else {
1785
1786 if (ViewportYStart != 0)
1787 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1788
1789 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1790
1791 if (*VInitPreFill > 1.0)
1792 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1793 else
1794 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1795 }
1796
1797 #ifdef __DML_VBA_DEBUG__
1798 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1799 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1800 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1801 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1802 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1803 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1804 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1805 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1806 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1807 #endif
1808 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1809 }
1810
1811 static unsigned int CalculateVMAndRowBytes(
1812 struct display_mode_lib *mode_lib,
1813 bool DCCEnable,
1814 unsigned int BlockHeight256Bytes,
1815 unsigned int BlockWidth256Bytes,
1816 enum source_format_class SourcePixelFormat,
1817 unsigned int SurfaceTiling,
1818 unsigned int BytePerPixel,
1819 enum scan_direction_class ScanDirection,
1820 unsigned int SwathWidth,
1821 unsigned int ViewportHeight,
1822 bool GPUVMEnable,
1823 bool HostVMEnable,
1824 unsigned int HostVMMaxNonCachedPageTableLevels,
1825 unsigned int GPUVMMinPageSize,
1826 unsigned int HostVMMinPageSize,
1827 unsigned int PTEBufferSizeInRequests,
1828 unsigned int Pitch,
1829 unsigned int DCCMetaPitch,
1830 unsigned int *MacroTileWidth,
1831 unsigned int *MetaRowByte,
1832 unsigned int *PixelPTEBytesPerRow,
1833 bool *PTEBufferSizeNotExceeded,
1834 int *dpte_row_width_ub,
1835 unsigned int *dpte_row_height,
1836 unsigned int *MetaRequestWidth,
1837 unsigned int *MetaRequestHeight,
1838 unsigned int *meta_row_width,
1839 unsigned int *meta_row_height,
1840 int *vm_group_bytes,
1841 unsigned int *dpte_group_bytes,
1842 unsigned int *PixelPTEReqWidth,
1843 unsigned int *PixelPTEReqHeight,
1844 unsigned int *PTERequestSize,
1845 int *DPDE0BytesFrame,
1846 int *MetaPTEBytesFrame)
1847 {
1848 struct vba_vars_st *v = &mode_lib->vba;
1849 unsigned int MPDEBytesFrame;
1850 unsigned int DCCMetaSurfaceBytes;
1851 unsigned int MacroTileSizeBytes;
1852 unsigned int MacroTileHeight;
1853 unsigned int ExtraDPDEBytesFrame;
1854 unsigned int PDEAndMetaPTEBytesFrame;
1855 unsigned int PixelPTEReqHeightPTEs = 0;
1856 unsigned int HostVMDynamicLevels = 0;
1857 double FractionOfPTEReturnDrop;
1858
1859 if (GPUVMEnable == true && HostVMEnable == true) {
1860 if (HostVMMinPageSize < 2048) {
1861 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1862 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1864 } else {
1865 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1866 }
1867 }
1868
1869 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1870 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1871 if (ScanDirection != dm_vert) {
1872 *meta_row_height = *MetaRequestHeight;
1873 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1874 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1875 } else {
1876 *meta_row_height = *MetaRequestWidth;
1877 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1878 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1879 }
1880 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1881 if (GPUVMEnable == true) {
1882 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1883 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1884 } else {
1885 *MetaPTEBytesFrame = 0;
1886 MPDEBytesFrame = 0;
1887 }
1888
1889 if (DCCEnable != true) {
1890 *MetaPTEBytesFrame = 0;
1891 MPDEBytesFrame = 0;
1892 *MetaRowByte = 0;
1893 }
1894
1895 if (SurfaceTiling == dm_sw_linear) {
1896 MacroTileSizeBytes = 256;
1897 MacroTileHeight = BlockHeight256Bytes;
1898 } else {
1899 MacroTileSizeBytes = 65536;
1900 MacroTileHeight = 16 * BlockHeight256Bytes;
1901 }
1902 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1903
1904 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1905 if (ScanDirection != dm_vert) {
1906 *DPDE0BytesFrame = 64
1907 * (dml_ceil(
1908 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1909 / (8 * 2097152),
1910 1) + 1);
1911 } else {
1912 *DPDE0BytesFrame = 64
1913 * (dml_ceil(
1914 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1915 / (8 * 2097152),
1916 1) + 1);
1917 }
1918 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1919 } else {
1920 *DPDE0BytesFrame = 0;
1921 ExtraDPDEBytesFrame = 0;
1922 }
1923
1924 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1925
1926 #ifdef __DML_VBA_DEBUG__
1927 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1928 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1929 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1930 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1931 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1932 #endif
1933
1934 if (HostVMEnable == true) {
1935 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1936 }
1937 #ifdef __DML_VBA_DEBUG__
1938 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1939 #endif
1940
1941 if (SurfaceTiling == dm_sw_linear) {
1942 PixelPTEReqHeightPTEs = 1;
1943 *PixelPTEReqHeight = 1;
1944 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1945 *PTERequestSize = 64;
1946 FractionOfPTEReturnDrop = 0;
1947 } else if (MacroTileSizeBytes == 4096) {
1948 PixelPTEReqHeightPTEs = 1;
1949 *PixelPTEReqHeight = MacroTileHeight;
1950 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1951 *PTERequestSize = 64;
1952 if (ScanDirection != dm_vert)
1953 FractionOfPTEReturnDrop = 0;
1954 else
1955 FractionOfPTEReturnDrop = 7 / 8;
1956 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1957 PixelPTEReqHeightPTEs = 16;
1958 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1959 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1960 *PTERequestSize = 128;
1961 FractionOfPTEReturnDrop = 0;
1962 } else {
1963 PixelPTEReqHeightPTEs = 1;
1964 *PixelPTEReqHeight = MacroTileHeight;
1965 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1966 *PTERequestSize = 64;
1967 FractionOfPTEReturnDrop = 0;
1968 }
1969
1970 if (SurfaceTiling == dm_sw_linear) {
1971 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1972 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1973 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1974 } else if (ScanDirection != dm_vert) {
1975 *dpte_row_height = *PixelPTEReqHeight;
1976 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1977 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1978 } else {
1979 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1980 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1981 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1982 }
1983
1984 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1985 *PTEBufferSizeNotExceeded = true;
1986 } else {
1987 *PTEBufferSizeNotExceeded = false;
1988 }
1989
1990 if (GPUVMEnable != true) {
1991 *PixelPTEBytesPerRow = 0;
1992 *PTEBufferSizeNotExceeded = true;
1993 }
1994
1995 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1996
1997 if (HostVMEnable == true) {
1998 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1999 }
2000
2001 if (HostVMEnable == true) {
2002 *vm_group_bytes = 512;
2003 *dpte_group_bytes = 512;
2004 } else if (GPUVMEnable == true) {
2005 *vm_group_bytes = 2048;
2006 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2007 *dpte_group_bytes = 512;
2008 } else {
2009 *dpte_group_bytes = 2048;
2010 }
2011 } else {
2012 *vm_group_bytes = 0;
2013 *dpte_group_bytes = 0;
2014 }
2015 return PDEAndMetaPTEBytesFrame;
2016 }
2017
2018 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2019 {
2020 struct vba_vars_st *v = &mode_lib->vba;
2021 unsigned int j, k;
2022 double HostVMInefficiencyFactor = 1.0;
2023 bool NoChromaPlanes = true;
2024 int ReorderBytes;
2025 double VMDataOnlyReturnBW;
2026 double MaxTotalRDBandwidth = 0;
2027 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2028
2029 v->WritebackDISPCLK = 0.0;
2030 v->DISPCLKWithRamping = 0;
2031 v->DISPCLKWithoutRamping = 0;
2032 v->GlobalDPPCLK = 0.0;
2033 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2034 {
2035 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2036 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2037 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2039
2040 if (v->HostVMEnable != true) {
2041 v->ReturnBW = dml_min(
2042 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2043 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2044 } else {
2045 v->ReturnBW = dml_min(
2046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2048 }
2049 }
2050 /* End DAL custom code */
2051
2052 // DISPCLK and DPPCLK Calculation
2053 //
2054 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2055 if (v->WritebackEnable[k]) {
2056 v->WritebackDISPCLK = dml_max(
2057 v->WritebackDISPCLK,
2058 dml314_CalculateWriteBackDISPCLK(
2059 v->WritebackPixelFormat[k],
2060 v->PixelClock[k],
2061 v->WritebackHRatio[k],
2062 v->WritebackVRatio[k],
2063 v->WritebackHTaps[k],
2064 v->WritebackVTaps[k],
2065 v->WritebackSourceWidth[k],
2066 v->WritebackDestinationWidth[k],
2067 v->HTotal[k],
2068 v->WritebackLineBufferSize));
2069 }
2070 }
2071
2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2073 if (v->HRatio[k] > 1) {
2074 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2075 v->MaxDCHUBToPSCLThroughput,
2076 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2077 } else {
2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2079 }
2080
2081 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2082 * dml_max(
2083 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2084 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2085
2086 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2087 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2088 }
2089
2090 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2091 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2092 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2093 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2094 } else {
2095 if (v->HRatioChroma[k] > 1) {
2096 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2097 v->MaxDCHUBToPSCLThroughput,
2098 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2099 } else {
2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2101 }
2102 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2103 * dml_max3(
2104 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2105 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2106 1.0);
2107
2108 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2109 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2110 }
2111
2112 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2113 }
2114 }
2115
2116 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2117 if (v->BlendingAndTiming[k] != k)
2118 continue;
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2120 v->DISPCLKWithRamping = dml_max(
2121 v->DISPCLKWithRamping,
2122 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2123 * (1 + v->DISPCLKRampingMargin / 100));
2124 v->DISPCLKWithoutRamping = dml_max(
2125 v->DISPCLKWithoutRamping,
2126 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2127 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2128 v->DISPCLKWithRamping = dml_max(
2129 v->DISPCLKWithRamping,
2130 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2131 * (1 + v->DISPCLKRampingMargin / 100));
2132 v->DISPCLKWithoutRamping = dml_max(
2133 v->DISPCLKWithoutRamping,
2134 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2135 } else {
2136 v->DISPCLKWithRamping = dml_max(
2137 v->DISPCLKWithRamping,
2138 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2139 v->DISPCLKWithoutRamping = dml_max(
2140 v->DISPCLKWithoutRamping,
2141 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2142 }
2143 }
2144
2145 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2146 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2147
2148 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2149 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2150 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2151 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2152 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2153 v->DISPCLKDPPCLKVCOSpeed);
2154 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2155 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2156 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2157 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2158 } else {
2159 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2160 }
2161 v->DISPCLK = v->DISPCLK_calculated;
2162 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2163
2164 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2165 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2166 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2167 }
2168 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2169 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2170 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2171 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2172 }
2173
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2176 }
2177
2178 // Urgent and B P-State/DRAM Clock Change Watermark
2179 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2180 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2181
2182 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2183 CalculateBytePerPixelAnd256BBlockSizes(
2184 v->SourcePixelFormat[k],
2185 v->SurfaceTiling[k],
2186 &v->BytePerPixelY[k],
2187 &v->BytePerPixelC[k],
2188 &v->BytePerPixelDETY[k],
2189 &v->BytePerPixelDETC[k],
2190 &v->BlockHeight256BytesY[k],
2191 &v->BlockHeight256BytesC[k],
2192 &v->BlockWidth256BytesY[k],
2193 &v->BlockWidth256BytesC[k]);
2194 }
2195
2196 CalculateSwathWidth(
2197 false,
2198 v->NumberOfActivePlanes,
2199 v->SourcePixelFormat,
2200 v->SourceScan,
2201 v->ViewportWidth,
2202 v->ViewportHeight,
2203 v->SurfaceWidthY,
2204 v->SurfaceWidthC,
2205 v->SurfaceHeightY,
2206 v->SurfaceHeightC,
2207 v->ODMCombineEnabled,
2208 v->BytePerPixelY,
2209 v->BytePerPixelC,
2210 v->BlockHeight256BytesY,
2211 v->BlockHeight256BytesC,
2212 v->BlockWidth256BytesY,
2213 v->BlockWidth256BytesC,
2214 v->BlendingAndTiming,
2215 v->HActive,
2216 v->HRatio,
2217 v->DPPPerPlane,
2218 v->SwathWidthSingleDPPY,
2219 v->SwathWidthSingleDPPC,
2220 v->SwathWidthY,
2221 v->SwathWidthC,
2222 v->dummyinteger3,
2223 v->dummyinteger4,
2224 v->swath_width_luma_ub,
2225 v->swath_width_chroma_ub);
2226
2227 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2228 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2229 * v->VRatio[k];
2230 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2231 * v->VRatioChroma[k];
2232 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2233 }
2234
2235 // DCFCLK Deep Sleep
2236 CalculateDCFCLKDeepSleep(
2237 mode_lib,
2238 v->NumberOfActivePlanes,
2239 v->BytePerPixelY,
2240 v->BytePerPixelC,
2241 v->VRatio,
2242 v->VRatioChroma,
2243 v->SwathWidthY,
2244 v->SwathWidthC,
2245 v->DPPPerPlane,
2246 v->HRatio,
2247 v->HRatioChroma,
2248 v->PixelClock,
2249 v->PSCL_THROUGHPUT_LUMA,
2250 v->PSCL_THROUGHPUT_CHROMA,
2251 v->DPPCLK,
2252 v->ReadBandwidthPlaneLuma,
2253 v->ReadBandwidthPlaneChroma,
2254 v->ReturnBusWidth,
2255 &v->DCFCLKDeepSleep);
2256
2257 // DSCCLK
2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2259 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2260 v->DSCCLK_calculated[k] = 0.0;
2261 } else {
2262 if (v->OutputFormat[k] == dm_420)
2263 v->DSCFormatFactor = 2;
2264 else if (v->OutputFormat[k] == dm_444)
2265 v->DSCFormatFactor = 1;
2266 else if (v->OutputFormat[k] == dm_n422)
2267 v->DSCFormatFactor = 2;
2268 else
2269 v->DSCFormatFactor = 1;
2270 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2273 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2276 else
2277 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2278 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2279 }
2280 }
2281
2282 // DSC Delay
2283 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2284 double BPP = v->OutputBpp[k];
2285
2286 if (v->DSCEnabled[k] && BPP != 0) {
2287 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2288 v->DSCDelay[k] = dscceComputeDelay(
2289 v->DSCInputBitPerComponent[k],
2290 BPP,
2291 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2292 v->NumberOfDSCSlices[k],
2293 v->OutputFormat[k],
2294 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2295 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2296 v->DSCDelay[k] = 2
2297 * (dscceComputeDelay(
2298 v->DSCInputBitPerComponent[k],
2299 BPP,
2300 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2301 v->NumberOfDSCSlices[k] / 2.0,
2302 v->OutputFormat[k],
2303 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2304 } else {
2305 v->DSCDelay[k] = 4
2306 * (dscceComputeDelay(
2307 v->DSCInputBitPerComponent[k],
2308 BPP,
2309 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2310 v->NumberOfDSCSlices[k] / 4.0,
2311 v->OutputFormat[k],
2312 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2313 }
2314 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2315 } else {
2316 v->DSCDelay[k] = 0;
2317 }
2318 }
2319
2320 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2321 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2322 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2323 v->DSCDelay[k] = v->DSCDelay[j];
2324
2325 // Prefetch
2326 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2327 unsigned int PDEAndMetaPTEBytesFrameY;
2328 unsigned int PixelPTEBytesPerRowY;
2329 unsigned int MetaRowByteY;
2330 unsigned int MetaRowByteC;
2331 unsigned int PDEAndMetaPTEBytesFrameC;
2332 unsigned int PixelPTEBytesPerRowC;
2333 bool PTEBufferSizeNotExceededY;
2334 bool PTEBufferSizeNotExceededC;
2335
2336 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2337 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2338 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2339 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2340 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2341 } else {
2342 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2343 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2344 }
2345
2346 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2347 mode_lib,
2348 v->DCCEnable[k],
2349 v->BlockHeight256BytesC[k],
2350 v->BlockWidth256BytesC[k],
2351 v->SourcePixelFormat[k],
2352 v->SurfaceTiling[k],
2353 v->BytePerPixelC[k],
2354 v->SourceScan[k],
2355 v->SwathWidthC[k],
2356 v->ViewportHeightChroma[k],
2357 v->GPUVMEnable,
2358 v->HostVMEnable,
2359 v->HostVMMaxNonCachedPageTableLevels,
2360 v->GPUVMMinPageSize,
2361 v->HostVMMinPageSize,
2362 v->PTEBufferSizeInRequestsForChroma,
2363 v->PitchC[k],
2364 v->DCCMetaPitchC[k],
2365 &v->MacroTileWidthC[k],
2366 &MetaRowByteC,
2367 &PixelPTEBytesPerRowC,
2368 &PTEBufferSizeNotExceededC,
2369 &v->dpte_row_width_chroma_ub[k],
2370 &v->dpte_row_height_chroma[k],
2371 &v->meta_req_width_chroma[k],
2372 &v->meta_req_height_chroma[k],
2373 &v->meta_row_width_chroma[k],
2374 &v->meta_row_height_chroma[k],
2375 &v->dummyinteger1,
2376 &v->dummyinteger2,
2377 &v->PixelPTEReqWidthC[k],
2378 &v->PixelPTEReqHeightC[k],
2379 &v->PTERequestSizeC[k],
2380 &v->dpde0_bytes_per_frame_ub_c[k],
2381 &v->meta_pte_bytes_per_frame_ub_c[k]);
2382
2383 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2384 mode_lib,
2385 v->VRatioChroma[k],
2386 v->VTAPsChroma[k],
2387 v->Interlace[k],
2388 v->ProgressiveToInterlaceUnitInOPP,
2389 v->SwathHeightC[k],
2390 v->ViewportYStartC[k],
2391 &v->VInitPreFillC[k],
2392 &v->MaxNumSwathC[k]);
2393 } else {
2394 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2395 v->PTEBufferSizeInRequestsForChroma = 0;
2396 PixelPTEBytesPerRowC = 0;
2397 PDEAndMetaPTEBytesFrameC = 0;
2398 MetaRowByteC = 0;
2399 v->MaxNumSwathC[k] = 0;
2400 v->PrefetchSourceLinesC[k] = 0;
2401 }
2402
2403 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2404 mode_lib,
2405 v->DCCEnable[k],
2406 v->BlockHeight256BytesY[k],
2407 v->BlockWidth256BytesY[k],
2408 v->SourcePixelFormat[k],
2409 v->SurfaceTiling[k],
2410 v->BytePerPixelY[k],
2411 v->SourceScan[k],
2412 v->SwathWidthY[k],
2413 v->ViewportHeight[k],
2414 v->GPUVMEnable,
2415 v->HostVMEnable,
2416 v->HostVMMaxNonCachedPageTableLevels,
2417 v->GPUVMMinPageSize,
2418 v->HostVMMinPageSize,
2419 v->PTEBufferSizeInRequestsForLuma,
2420 v->PitchY[k],
2421 v->DCCMetaPitchY[k],
2422 &v->MacroTileWidthY[k],
2423 &MetaRowByteY,
2424 &PixelPTEBytesPerRowY,
2425 &PTEBufferSizeNotExceededY,
2426 &v->dpte_row_width_luma_ub[k],
2427 &v->dpte_row_height[k],
2428 &v->meta_req_width[k],
2429 &v->meta_req_height[k],
2430 &v->meta_row_width[k],
2431 &v->meta_row_height[k],
2432 &v->vm_group_bytes[k],
2433 &v->dpte_group_bytes[k],
2434 &v->PixelPTEReqWidthY[k],
2435 &v->PixelPTEReqHeightY[k],
2436 &v->PTERequestSizeY[k],
2437 &v->dpde0_bytes_per_frame_ub_l[k],
2438 &v->meta_pte_bytes_per_frame_ub_l[k]);
2439
2440 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2441 mode_lib,
2442 v->VRatio[k],
2443 v->vtaps[k],
2444 v->Interlace[k],
2445 v->ProgressiveToInterlaceUnitInOPP,
2446 v->SwathHeightY[k],
2447 v->ViewportYStartY[k],
2448 &v->VInitPreFillY[k],
2449 &v->MaxNumSwathY[k]);
2450 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2451 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2452 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2453
2454 CalculateRowBandwidth(
2455 v->GPUVMEnable,
2456 v->SourcePixelFormat[k],
2457 v->VRatio[k],
2458 v->VRatioChroma[k],
2459 v->DCCEnable[k],
2460 v->HTotal[k] / v->PixelClock[k],
2461 MetaRowByteY,
2462 MetaRowByteC,
2463 v->meta_row_height[k],
2464 v->meta_row_height_chroma[k],
2465 PixelPTEBytesPerRowY,
2466 PixelPTEBytesPerRowC,
2467 v->dpte_row_height[k],
2468 v->dpte_row_height_chroma[k],
2469 &v->meta_row_bw[k],
2470 &v->dpte_row_bw[k]);
2471 }
2472
2473 v->TotalDCCActiveDPP = 0;
2474 v->TotalActiveDPP = 0;
2475 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2476 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2477 if (v->DCCEnable[k])
2478 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2479 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2480 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2481 NoChromaPlanes = false;
2482 }
2483
2484 ReorderBytes = v->NumberOfChannels
2485 * dml_max3(
2486 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2487 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2488 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2489
2490 VMDataOnlyReturnBW = dml_min(
2491 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2492 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2493 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2494 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2495
2496 #ifdef __DML_VBA_DEBUG__
2497 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2498 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2499 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2500 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2501 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2502 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2503 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2504 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2505 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2506 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2507 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2508 #endif
2509
2510 if (v->GPUVMEnable && v->HostVMEnable)
2511 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2512
2513 v->UrgentExtraLatency = CalculateExtraLatency(
2514 v->RoundTripPingLatencyCycles,
2515 ReorderBytes,
2516 v->DCFCLK,
2517 v->TotalActiveDPP,
2518 v->PixelChunkSizeInKByte,
2519 v->TotalDCCActiveDPP,
2520 v->MetaChunkSize,
2521 v->ReturnBW,
2522 v->GPUVMEnable,
2523 v->HostVMEnable,
2524 v->NumberOfActivePlanes,
2525 v->DPPPerPlane,
2526 v->dpte_group_bytes,
2527 HostVMInefficiencyFactor,
2528 v->HostVMMinPageSize,
2529 v->HostVMMaxNonCachedPageTableLevels);
2530
2531 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2532
2533 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2534 if (v->BlendingAndTiming[k] == k) {
2535 if (v->WritebackEnable[k] == true) {
2536 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2537 + CalculateWriteBackDelay(
2538 v->WritebackPixelFormat[k],
2539 v->WritebackHRatio[k],
2540 v->WritebackVRatio[k],
2541 v->WritebackVTaps[k],
2542 v->WritebackDestinationWidth[k],
2543 v->WritebackDestinationHeight[k],
2544 v->WritebackSourceHeight[k],
2545 v->HTotal[k]) / v->DISPCLK;
2546 } else
2547 v->WritebackDelay[v->VoltageLevel][k] = 0;
2548 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2549 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2550 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2551 v->WritebackDelay[v->VoltageLevel][k],
2552 v->WritebackLatency
2553 + CalculateWriteBackDelay(
2554 v->WritebackPixelFormat[j],
2555 v->WritebackHRatio[j],
2556 v->WritebackVRatio[j],
2557 v->WritebackVTaps[j],
2558 v->WritebackDestinationWidth[j],
2559 v->WritebackDestinationHeight[j],
2560 v->WritebackSourceHeight[j],
2561 v->HTotal[k]) / v->DISPCLK);
2562 }
2563 }
2564 }
2565 }
2566
2567 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2568 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2569 if (v->BlendingAndTiming[k] == j)
2570 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2571
2572 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2573 v->MaxVStartupLines[k] =
2574 CalculateMaxVStartup(
2575 v->VTotal[k],
2576 v->VActive[k],
2577 v->VBlankNom[k],
2578 v->HTotal[k],
2579 v->PixelClock[k],
2580 v->ProgressiveToInterlaceUnitInOPP,
2581 v->Interlace[k],
2582 v->ip.VBlankNomDefaultUS,
2583 v->WritebackDelay[v->VoltageLevel][k]);
2584
2585 #ifdef __DML_VBA_DEBUG__
2586 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2587 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2588 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2589 #endif
2590 }
2591
2592 v->MaximumMaxVStartupLines = 0;
2593 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2594 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2595
2596 // VBA_DELTA
2597 // We don't really care to iterate between the various prefetch modes
2598 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2599
2600 v->UrgentLatency = CalculateUrgentLatency(
2601 v->UrgentLatencyPixelDataOnly,
2602 v->UrgentLatencyPixelMixedWithVMData,
2603 v->UrgentLatencyVMDataOnly,
2604 v->DoUrgentLatencyAdjustment,
2605 v->UrgentLatencyAdjustmentFabricClockComponent,
2606 v->UrgentLatencyAdjustmentFabricClockReference,
2607 v->FabricClock);
2608
2609 v->FractionOfUrgentBandwidth = 0.0;
2610 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2611
2612 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2613
2614 do {
2615 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2616 bool DestinationLineTimesForPrefetchLessThan2 = false;
2617 bool VRatioPrefetchMoreThan4 = false;
2618 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2619
2620 MaxTotalRDBandwidth = 0;
2621
2622 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2623
2624 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2625 Pipe myPipe;
2626
2627 myPipe.DPPCLK = v->DPPCLK[k];
2628 myPipe.DISPCLK = v->DISPCLK;
2629 myPipe.PixelClock = v->PixelClock[k];
2630 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2631 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2632 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2633 myPipe.VRatio = v->VRatio[k];
2634 myPipe.VRatioChroma = v->VRatioChroma[k];
2635 myPipe.SourceScan = v->SourceScan[k];
2636 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2637 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2638 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2639 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2640 myPipe.InterlaceEnable = v->Interlace[k];
2641 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2642 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2643 myPipe.HTotal = v->HTotal[k];
2644 myPipe.DCCEnable = v->DCCEnable[k];
2645 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2646 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2647 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2648 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2649 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2650 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2651 v->ErrorResult[k] = CalculatePrefetchSchedule(
2652 mode_lib,
2653 HostVMInefficiencyFactor,
2654 &myPipe,
2655 v->DSCDelay[k],
2656 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2657 v->DPPCLKDelaySCL,
2658 v->DPPCLKDelaySCLLBOnly,
2659 v->DPPCLKDelayCNVCCursor,
2660 v->DISPCLKDelaySubtotal,
2661 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2662 v->OutputFormat[k],
2663 v->MaxInterDCNTileRepeaters,
2664 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2665 v->MaxVStartupLines[k],
2666 v->GPUVMMaxPageTableLevels,
2667 v->GPUVMEnable,
2668 v->HostVMEnable,
2669 v->HostVMMaxNonCachedPageTableLevels,
2670 v->HostVMMinPageSize,
2671 v->DynamicMetadataEnable[k],
2672 v->DynamicMetadataVMEnabled,
2673 v->DynamicMetadataLinesBeforeActiveRequired[k],
2674 v->DynamicMetadataTransmittedBytes[k],
2675 v->UrgentLatency,
2676 v->UrgentExtraLatency,
2677 v->TCalc,
2678 v->PDEAndMetaPTEBytesFrame[k],
2679 v->MetaRowByte[k],
2680 v->PixelPTEBytesPerRow[k],
2681 v->PrefetchSourceLinesY[k],
2682 v->SwathWidthY[k],
2683 v->VInitPreFillY[k],
2684 v->MaxNumSwathY[k],
2685 v->PrefetchSourceLinesC[k],
2686 v->SwathWidthC[k],
2687 v->VInitPreFillC[k],
2688 v->MaxNumSwathC[k],
2689 v->swath_width_luma_ub[k],
2690 v->swath_width_chroma_ub[k],
2691 v->SwathHeightY[k],
2692 v->SwathHeightC[k],
2693 TWait,
2694 &v->DSTXAfterScaler[k],
2695 &v->DSTYAfterScaler[k],
2696 &v->DestinationLinesForPrefetch[k],
2697 &v->PrefetchBandwidth[k],
2698 &v->DestinationLinesToRequestVMInVBlank[k],
2699 &v->DestinationLinesToRequestRowInVBlank[k],
2700 &v->VRatioPrefetchY[k],
2701 &v->VRatioPrefetchC[k],
2702 &v->RequiredPrefetchPixDataBWLuma[k],
2703 &v->RequiredPrefetchPixDataBWChroma[k],
2704 &v->NotEnoughTimeForDynamicMetadata[k],
2705 &v->Tno_bw[k],
2706 &v->prefetch_vmrow_bw[k],
2707 &v->Tdmdl_vm[k],
2708 &v->Tdmdl[k],
2709 &v->TSetup[k],
2710 &v->VUpdateOffsetPix[k],
2711 &v->VUpdateWidthPix[k],
2712 &v->VReadyOffsetPix[k]);
2713
2714 #ifdef __DML_VBA_DEBUG__
2715 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2716 #endif
2717 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2718 }
2719
2720 v->NoEnoughUrgentLatencyHiding = false;
2721 v->NoEnoughUrgentLatencyHidingPre = false;
2722
2723 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2724 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2725 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2726 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2727 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2728
2729 CalculateUrgentBurstFactor(
2730 v->swath_width_luma_ub[k],
2731 v->swath_width_chroma_ub[k],
2732 v->SwathHeightY[k],
2733 v->SwathHeightC[k],
2734 v->HTotal[k] / v->PixelClock[k],
2735 v->UrgentLatency,
2736 v->CursorBufferSize,
2737 v->CursorWidth[k][0],
2738 v->CursorBPP[k][0],
2739 v->VRatio[k],
2740 v->VRatioChroma[k],
2741 v->BytePerPixelDETY[k],
2742 v->BytePerPixelDETC[k],
2743 v->DETBufferSizeY[k],
2744 v->DETBufferSizeC[k],
2745 &v->UrgBurstFactorCursor[k],
2746 &v->UrgBurstFactorLuma[k],
2747 &v->UrgBurstFactorChroma[k],
2748 &v->NoUrgentLatencyHiding[k]);
2749
2750 CalculateUrgentBurstFactor(
2751 v->swath_width_luma_ub[k],
2752 v->swath_width_chroma_ub[k],
2753 v->SwathHeightY[k],
2754 v->SwathHeightC[k],
2755 v->HTotal[k] / v->PixelClock[k],
2756 v->UrgentLatency,
2757 v->CursorBufferSize,
2758 v->CursorWidth[k][0],
2759 v->CursorBPP[k][0],
2760 v->VRatioPrefetchY[k],
2761 v->VRatioPrefetchC[k],
2762 v->BytePerPixelDETY[k],
2763 v->BytePerPixelDETC[k],
2764 v->DETBufferSizeY[k],
2765 v->DETBufferSizeC[k],
2766 &v->UrgBurstFactorCursorPre[k],
2767 &v->UrgBurstFactorLumaPre[k],
2768 &v->UrgBurstFactorChromaPre[k],
2769 &v->NoUrgentLatencyHidingPre[k]);
2770
2771 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2772 + dml_max3(
2773 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2774 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2775 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2776 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2777 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2778 v->DPPPerPlane[k]
2779 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2780 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2781 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2782
2783 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2784 + dml_max3(
2785 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2786 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2787 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2788 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2789 + v->cursor_bw_pre[k]);
2790
2791 #ifdef __DML_VBA_DEBUG__
2792 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2793 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2794 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2795 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2796 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2797
2798 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2799 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2800
2801 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2802 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2803 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2804 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2805 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2806 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2807 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2808 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2809 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2810 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2811 #endif
2812
2813 if (v->DestinationLinesForPrefetch[k] < 2)
2814 DestinationLineTimesForPrefetchLessThan2 = true;
2815
2816 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2817 VRatioPrefetchMoreThan4 = true;
2818
2819 if (v->NoUrgentLatencyHiding[k] == true)
2820 v->NoEnoughUrgentLatencyHiding = true;
2821
2822 if (v->NoUrgentLatencyHidingPre[k] == true)
2823 v->NoEnoughUrgentLatencyHidingPre = true;
2824 }
2825
2826 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2827
2828 #ifdef __DML_VBA_DEBUG__
2829 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2830 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2831 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2832 #endif
2833
2834 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2835 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2836 v->PrefetchModeSupported = true;
2837 else {
2838 v->PrefetchModeSupported = false;
2839 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2840 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2841 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2842 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2843 }
2844
2845 // PREVIOUS_ERROR
2846 // This error result check was done after the PrefetchModeSupported. So we will
2847 // still try to calculate flip schedule even prefetch mode not supported
2848 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2849 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2850 v->PrefetchModeSupported = false;
2851 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2852 }
2853 }
2854
2855 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2856 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2857 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2858 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2859 - dml_max(
2860 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2861 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2862 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2863 v->DPPPerPlane[k]
2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2867 }
2868
2869 v->TotImmediateFlipBytes = 0;
2870 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2871 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2872 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2873 }
2874 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2875 CalculateFlipSchedule(
2876 mode_lib,
2877 k,
2878 HostVMInefficiencyFactor,
2879 v->UrgentExtraLatency,
2880 v->UrgentLatency,
2881 v->PDEAndMetaPTEBytesFrame[k],
2882 v->MetaRowByte[k],
2883 v->PixelPTEBytesPerRow[k]);
2884 }
2885
2886 v->total_dcn_read_bw_with_flip = 0.0;
2887 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2889 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2890 + dml_max3(
2891 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2892 v->DPPPerPlane[k] * v->final_flip_bw[k]
2893 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2894 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2895 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2896 v->DPPPerPlane[k]
2897 * (v->final_flip_bw[k]
2898 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2899 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2900 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2901 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2902 + dml_max3(
2903 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2904 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2905 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2906 v->DPPPerPlane[k]
2907 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2908 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2909 }
2910 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2911
2912 v->ImmediateFlipSupported = true;
2913 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2914 #ifdef __DML_VBA_DEBUG__
2915 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2916 #endif
2917 v->ImmediateFlipSupported = false;
2918 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2919 }
2920 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2921 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2922 #ifdef __DML_VBA_DEBUG__
2923 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2924 #endif
2925 v->ImmediateFlipSupported = false;
2926 }
2927 }
2928 } else {
2929 v->ImmediateFlipSupported = false;
2930 }
2931
2932 v->PrefetchAndImmediateFlipSupported =
2933 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2934 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2935 v->ImmediateFlipSupported)) ? true : false;
2936 #ifdef __DML_VBA_DEBUG__
2937 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2938 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2939 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2940 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2941 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2942 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2943 #endif
2944 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2945
2946 v->VStartupLines = v->VStartupLines + 1;
2947 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2948 ASSERT(v->PrefetchAndImmediateFlipSupported);
2949
2950 // Unbounded Request Enabled
2951 CalculateUnboundedRequestAndCompressedBufferSize(
2952 v->DETBufferSizeInKByte[0],
2953 v->ConfigReturnBufferSizeInKByte,
2954 v->UseUnboundedRequesting,
2955 v->TotalActiveDPP,
2956 NoChromaPlanes,
2957 v->MaxNumDPP,
2958 v->CompressedBufferSegmentSizeInkByte,
2959 v->Output,
2960 &v->UnboundedRequestEnabled,
2961 &v->CompressedBufferSizeInkByte);
2962
2963 //Watermarks and NB P-State/DRAM Clock Change Support
2964 {
2965 enum clock_change_support DRAMClockChangeSupport; // dummy
2966
2967 CalculateWatermarksAndDRAMSpeedChangeSupport(
2968 mode_lib,
2969 PrefetchMode,
2970 v->DCFCLK,
2971 v->ReturnBW,
2972 v->UrgentLatency,
2973 v->UrgentExtraLatency,
2974 v->SOCCLK,
2975 v->DCFCLKDeepSleep,
2976 v->DETBufferSizeY,
2977 v->DETBufferSizeC,
2978 v->SwathHeightY,
2979 v->SwathHeightC,
2980 v->SwathWidthY,
2981 v->SwathWidthC,
2982 v->DPPPerPlane,
2983 v->BytePerPixelDETY,
2984 v->BytePerPixelDETC,
2985 v->UnboundedRequestEnabled,
2986 v->CompressedBufferSizeInkByte,
2987 &DRAMClockChangeSupport,
2988 &v->StutterExitWatermark,
2989 &v->StutterEnterPlusExitWatermark,
2990 &v->Z8StutterExitWatermark,
2991 &v->Z8StutterEnterPlusExitWatermark);
2992
2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2994 if (v->WritebackEnable[k] == true) {
2995 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2996 0,
2997 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2998 } else {
2999 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3000 }
3001 }
3002 }
3003
3004 //Display Pipeline Delivery Time in Prefetch, Groups
3005 CalculatePixelDeliveryTimes(
3006 v->NumberOfActivePlanes,
3007 v->VRatio,
3008 v->VRatioChroma,
3009 v->VRatioPrefetchY,
3010 v->VRatioPrefetchC,
3011 v->swath_width_luma_ub,
3012 v->swath_width_chroma_ub,
3013 v->DPPPerPlane,
3014 v->HRatio,
3015 v->HRatioChroma,
3016 v->PixelClock,
3017 v->PSCL_THROUGHPUT_LUMA,
3018 v->PSCL_THROUGHPUT_CHROMA,
3019 v->DPPCLK,
3020 v->BytePerPixelC,
3021 v->SourceScan,
3022 v->NumberOfCursors,
3023 v->CursorWidth,
3024 v->CursorBPP,
3025 v->BlockWidth256BytesY,
3026 v->BlockHeight256BytesY,
3027 v->BlockWidth256BytesC,
3028 v->BlockHeight256BytesC,
3029 v->DisplayPipeLineDeliveryTimeLuma,
3030 v->DisplayPipeLineDeliveryTimeChroma,
3031 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3032 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3033 v->DisplayPipeRequestDeliveryTimeLuma,
3034 v->DisplayPipeRequestDeliveryTimeChroma,
3035 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3036 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3037 v->CursorRequestDeliveryTime,
3038 v->CursorRequestDeliveryTimePrefetch);
3039
3040 CalculateMetaAndPTETimes(
3041 v->NumberOfActivePlanes,
3042 v->GPUVMEnable,
3043 v->MetaChunkSize,
3044 v->MinMetaChunkSizeBytes,
3045 v->HTotal,
3046 v->VRatio,
3047 v->VRatioChroma,
3048 v->DestinationLinesToRequestRowInVBlank,
3049 v->DestinationLinesToRequestRowInImmediateFlip,
3050 v->DCCEnable,
3051 v->PixelClock,
3052 v->BytePerPixelY,
3053 v->BytePerPixelC,
3054 v->SourceScan,
3055 v->dpte_row_height,
3056 v->dpte_row_height_chroma,
3057 v->meta_row_width,
3058 v->meta_row_width_chroma,
3059 v->meta_row_height,
3060 v->meta_row_height_chroma,
3061 v->meta_req_width,
3062 v->meta_req_width_chroma,
3063 v->meta_req_height,
3064 v->meta_req_height_chroma,
3065 v->dpte_group_bytes,
3066 v->PTERequestSizeY,
3067 v->PTERequestSizeC,
3068 v->PixelPTEReqWidthY,
3069 v->PixelPTEReqHeightY,
3070 v->PixelPTEReqWidthC,
3071 v->PixelPTEReqHeightC,
3072 v->dpte_row_width_luma_ub,
3073 v->dpte_row_width_chroma_ub,
3074 v->DST_Y_PER_PTE_ROW_NOM_L,
3075 v->DST_Y_PER_PTE_ROW_NOM_C,
3076 v->DST_Y_PER_META_ROW_NOM_L,
3077 v->DST_Y_PER_META_ROW_NOM_C,
3078 v->TimePerMetaChunkNominal,
3079 v->TimePerChromaMetaChunkNominal,
3080 v->TimePerMetaChunkVBlank,
3081 v->TimePerChromaMetaChunkVBlank,
3082 v->TimePerMetaChunkFlip,
3083 v->TimePerChromaMetaChunkFlip,
3084 v->time_per_pte_group_nom_luma,
3085 v->time_per_pte_group_vblank_luma,
3086 v->time_per_pte_group_flip_luma,
3087 v->time_per_pte_group_nom_chroma,
3088 v->time_per_pte_group_vblank_chroma,
3089 v->time_per_pte_group_flip_chroma);
3090
3091 CalculateVMGroupAndRequestTimes(
3092 v->NumberOfActivePlanes,
3093 v->GPUVMEnable,
3094 v->GPUVMMaxPageTableLevels,
3095 v->HTotal,
3096 v->BytePerPixelC,
3097 v->DestinationLinesToRequestVMInVBlank,
3098 v->DestinationLinesToRequestVMInImmediateFlip,
3099 v->DCCEnable,
3100 v->PixelClock,
3101 v->dpte_row_width_luma_ub,
3102 v->dpte_row_width_chroma_ub,
3103 v->vm_group_bytes,
3104 v->dpde0_bytes_per_frame_ub_l,
3105 v->dpde0_bytes_per_frame_ub_c,
3106 v->meta_pte_bytes_per_frame_ub_l,
3107 v->meta_pte_bytes_per_frame_ub_c,
3108 v->TimePerVMGroupVBlank,
3109 v->TimePerVMGroupFlip,
3110 v->TimePerVMRequestVBlank,
3111 v->TimePerVMRequestFlip);
3112
3113 // Min TTUVBlank
3114 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3115 if (PrefetchMode == 0) {
3116 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3117 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3118 v->MinTTUVBlank[k] = dml_max(
3119 v->DRAMClockChangeWatermark,
3120 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3121 } else if (PrefetchMode == 1) {
3122 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3123 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3124 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3125 } else {
3126 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3127 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3128 v->MinTTUVBlank[k] = v->UrgentWatermark;
3129 }
3130 if (!v->DynamicMetadataEnable[k])
3131 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3132 }
3133
3134 // DCC Configuration
3135 v->ActiveDPPs = 0;
3136 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3137 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3138 v->SourcePixelFormat[k],
3139 v->SurfaceWidthY[k],
3140 v->SurfaceWidthC[k],
3141 v->SurfaceHeightY[k],
3142 v->SurfaceHeightC[k],
3143 v->DETBufferSizeInKByte[0] * 1024,
3144 v->BlockHeight256BytesY[k],
3145 v->BlockHeight256BytesC[k],
3146 v->SurfaceTiling[k],
3147 v->BytePerPixelY[k],
3148 v->BytePerPixelC[k],
3149 v->BytePerPixelDETY[k],
3150 v->BytePerPixelDETC[k],
3151 v->SourceScan[k],
3152 &v->DCCYMaxUncompressedBlock[k],
3153 &v->DCCCMaxUncompressedBlock[k],
3154 &v->DCCYMaxCompressedBlock[k],
3155 &v->DCCCMaxCompressedBlock[k],
3156 &v->DCCYIndependentBlock[k],
3157 &v->DCCCIndependentBlock[k]);
3158 }
3159
3160 // VStartup Adjustment
3161 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3162 bool isInterlaceTiming;
3163 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3164 #ifdef __DML_VBA_DEBUG__
3165 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3166 #endif
3167
3168 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3169
3170 #ifdef __DML_VBA_DEBUG__
3171 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3172 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3173 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3174 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3175 #endif
3176
3177 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3178 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3179 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3180 }
3181
3182 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3183 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3184 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3185 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3186 } else {
3187 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3188 }
3189 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3190 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3191 <= (isInterlaceTiming ?
3192 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3193 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3194 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3195 } else {
3196 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3197 }
3198 #ifdef __DML_VBA_DEBUG__
3199 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3200 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3201 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3202 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3203 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3204 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3205 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3206 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3207 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3208 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3209 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3210 #endif
3211 }
3212
3213 {
3214 //Maximum Bandwidth Used
3215 double TotalWRBandwidth = 0;
3216 double MaxPerPlaneVActiveWRBandwidth = 0;
3217 double WRBandwidth = 0;
3218
3219 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3220 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3221 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3222 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3223 } else if (v->WritebackEnable[k] == true) {
3224 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3225 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3226 }
3227 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3228 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3229 }
3230
3231 v->TotalDataReadBandwidth = 0;
3232 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3233 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3234 }
3235 }
3236 // Stutter Efficiency
3237 CalculateStutterEfficiency(
3238 mode_lib,
3239 v->CompressedBufferSizeInkByte,
3240 v->UnboundedRequestEnabled,
3241 v->ConfigReturnBufferSizeInKByte,
3242 v->MetaFIFOSizeInKEntries,
3243 v->ZeroSizeBufferEntries,
3244 v->NumberOfActivePlanes,
3245 v->ROBBufferSizeInKByte,
3246 v->TotalDataReadBandwidth,
3247 v->DCFCLK,
3248 v->ReturnBW,
3249 v->COMPBUF_RESERVED_SPACE_64B,
3250 v->COMPBUF_RESERVED_SPACE_ZS,
3251 v->SRExitTime,
3252 v->SRExitZ8Time,
3253 v->SynchronizedVBlank,
3254 v->StutterEnterPlusExitWatermark,
3255 v->Z8StutterEnterPlusExitWatermark,
3256 v->ProgressiveToInterlaceUnitInOPP,
3257 v->Interlace,
3258 v->MinTTUVBlank,
3259 v->DPPPerPlane,
3260 v->DETBufferSizeY,
3261 v->BytePerPixelY,
3262 v->BytePerPixelDETY,
3263 v->SwathWidthY,
3264 v->SwathHeightY,
3265 v->SwathHeightC,
3266 v->DCCRateLuma,
3267 v->DCCRateChroma,
3268 v->DCCFractionOfZeroSizeRequestsLuma,
3269 v->DCCFractionOfZeroSizeRequestsChroma,
3270 v->HTotal,
3271 v->VTotal,
3272 v->PixelClock,
3273 v->VRatio,
3274 v->SourceScan,
3275 v->BlockHeight256BytesY,
3276 v->BlockWidth256BytesY,
3277 v->BlockHeight256BytesC,
3278 v->BlockWidth256BytesC,
3279 v->DCCYMaxUncompressedBlock,
3280 v->DCCCMaxUncompressedBlock,
3281 v->VActive,
3282 v->DCCEnable,
3283 v->WritebackEnable,
3284 v->ReadBandwidthPlaneLuma,
3285 v->ReadBandwidthPlaneChroma,
3286 v->meta_row_bw,
3287 v->dpte_row_bw,
3288 &v->StutterEfficiencyNotIncludingVBlank,
3289 &v->StutterEfficiency,
3290 &v->NumberOfStutterBurstsPerFrame,
3291 &v->Z8StutterEfficiencyNotIncludingVBlank,
3292 &v->Z8StutterEfficiency,
3293 &v->Z8NumberOfStutterBurstsPerFrame,
3294 &v->StutterPeriod);
3295 }
3296
3297 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3298 {
3299 struct vba_vars_st *v = &mode_lib->vba;
3300 // Display Pipe Configuration
3301 double BytePerPixDETY[DC__NUM_DPP__MAX];
3302 double BytePerPixDETC[DC__NUM_DPP__MAX];
3303 int BytePerPixY[DC__NUM_DPP__MAX];
3304 int BytePerPixC[DC__NUM_DPP__MAX];
3305 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3306 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3307 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3308 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3309 double dummy1[DC__NUM_DPP__MAX];
3310 double dummy2[DC__NUM_DPP__MAX];
3311 double dummy3[DC__NUM_DPP__MAX];
3312 double dummy4[DC__NUM_DPP__MAX];
3313 int dummy5[DC__NUM_DPP__MAX];
3314 int dummy6[DC__NUM_DPP__MAX];
3315 bool dummy7[DC__NUM_DPP__MAX];
3316 bool dummysinglestring;
3317
3318 unsigned int k;
3319
3320 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3321
3322 CalculateBytePerPixelAnd256BBlockSizes(
3323 v->SourcePixelFormat[k],
3324 v->SurfaceTiling[k],
3325 &BytePerPixY[k],
3326 &BytePerPixC[k],
3327 &BytePerPixDETY[k],
3328 &BytePerPixDETC[k],
3329 &Read256BytesBlockHeightY[k],
3330 &Read256BytesBlockHeightC[k],
3331 &Read256BytesBlockWidthY[k],
3332 &Read256BytesBlockWidthC[k]);
3333 }
3334
3335 CalculateSwathAndDETConfiguration(
3336 false,
3337 v->NumberOfActivePlanes,
3338 v->DETBufferSizeInKByte[0],
3339 dummy1,
3340 dummy2,
3341 v->SourceScan,
3342 v->SourcePixelFormat,
3343 v->SurfaceTiling,
3344 v->ViewportWidth,
3345 v->ViewportHeight,
3346 v->SurfaceWidthY,
3347 v->SurfaceWidthC,
3348 v->SurfaceHeightY,
3349 v->SurfaceHeightC,
3350 Read256BytesBlockHeightY,
3351 Read256BytesBlockHeightC,
3352 Read256BytesBlockWidthY,
3353 Read256BytesBlockWidthC,
3354 v->ODMCombineEnabled,
3355 v->BlendingAndTiming,
3356 BytePerPixY,
3357 BytePerPixC,
3358 BytePerPixDETY,
3359 BytePerPixDETC,
3360 v->HActive,
3361 v->HRatio,
3362 v->HRatioChroma,
3363 v->DPPPerPlane,
3364 dummy5,
3365 dummy6,
3366 dummy3,
3367 dummy4,
3368 v->SwathHeightY,
3369 v->SwathHeightC,
3370 v->DETBufferSizeY,
3371 v->DETBufferSizeC,
3372 dummy7,
3373 &dummysinglestring);
3374 }
3375
3376 static bool CalculateBytePerPixelAnd256BBlockSizes(
3377 enum source_format_class SourcePixelFormat,
3378 enum dm_swizzle_mode SurfaceTiling,
3379 unsigned int *BytePerPixelY,
3380 unsigned int *BytePerPixelC,
3381 double *BytePerPixelDETY,
3382 double *BytePerPixelDETC,
3383 unsigned int *BlockHeight256BytesY,
3384 unsigned int *BlockHeight256BytesC,
3385 unsigned int *BlockWidth256BytesY,
3386 unsigned int *BlockWidth256BytesC)
3387 {
3388 if (SourcePixelFormat == dm_444_64) {
3389 *BytePerPixelDETY = 8;
3390 *BytePerPixelDETC = 0;
3391 *BytePerPixelY = 8;
3392 *BytePerPixelC = 0;
3393 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3394 *BytePerPixelDETY = 4;
3395 *BytePerPixelDETC = 0;
3396 *BytePerPixelY = 4;
3397 *BytePerPixelC = 0;
3398 } else if (SourcePixelFormat == dm_444_16) {
3399 *BytePerPixelDETY = 2;
3400 *BytePerPixelDETC = 0;
3401 *BytePerPixelY = 2;
3402 *BytePerPixelC = 0;
3403 } else if (SourcePixelFormat == dm_444_8) {
3404 *BytePerPixelDETY = 1;
3405 *BytePerPixelDETC = 0;
3406 *BytePerPixelY = 1;
3407 *BytePerPixelC = 0;
3408 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3409 *BytePerPixelDETY = 4;
3410 *BytePerPixelDETC = 1;
3411 *BytePerPixelY = 4;
3412 *BytePerPixelC = 1;
3413 } else if (SourcePixelFormat == dm_420_8) {
3414 *BytePerPixelDETY = 1;
3415 *BytePerPixelDETC = 2;
3416 *BytePerPixelY = 1;
3417 *BytePerPixelC = 2;
3418 } else if (SourcePixelFormat == dm_420_12) {
3419 *BytePerPixelDETY = 2;
3420 *BytePerPixelDETC = 4;
3421 *BytePerPixelY = 2;
3422 *BytePerPixelC = 4;
3423 } else {
3424 *BytePerPixelDETY = 4.0 / 3;
3425 *BytePerPixelDETC = 8.0 / 3;
3426 *BytePerPixelY = 2;
3427 *BytePerPixelC = 4;
3428 }
3429
3430 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3431 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3432 if (SurfaceTiling == dm_sw_linear) {
3433 *BlockHeight256BytesY = 1;
3434 } else if (SourcePixelFormat == dm_444_64) {
3435 *BlockHeight256BytesY = 4;
3436 } else if (SourcePixelFormat == dm_444_8) {
3437 *BlockHeight256BytesY = 16;
3438 } else {
3439 *BlockHeight256BytesY = 8;
3440 }
3441 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3442 *BlockHeight256BytesC = 0;
3443 *BlockWidth256BytesC = 0;
3444 } else {
3445 if (SurfaceTiling == dm_sw_linear) {
3446 *BlockHeight256BytesY = 1;
3447 *BlockHeight256BytesC = 1;
3448 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3449 *BlockHeight256BytesY = 8;
3450 *BlockHeight256BytesC = 16;
3451 } else if (SourcePixelFormat == dm_420_8) {
3452 *BlockHeight256BytesY = 16;
3453 *BlockHeight256BytesC = 8;
3454 } else {
3455 *BlockHeight256BytesY = 8;
3456 *BlockHeight256BytesC = 8;
3457 }
3458 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3459 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3460 }
3461 return true;
3462 }
3463
3464 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3465 {
3466 if (PrefetchMode == 0) {
3467 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3468 } else if (PrefetchMode == 1) {
3469 return dml_max(SREnterPlusExitTime, UrgentLatency);
3470 } else {
3471 return UrgentLatency;
3472 }
3473 }
3474
3475 double dml314_CalculateWriteBackDISPCLK(
3476 enum source_format_class WritebackPixelFormat,
3477 double PixelClock,
3478 double WritebackHRatio,
3479 double WritebackVRatio,
3480 unsigned int WritebackHTaps,
3481 unsigned int WritebackVTaps,
3482 long WritebackSourceWidth,
3483 long WritebackDestinationWidth,
3484 unsigned int HTotal,
3485 unsigned int WritebackLineBufferSize)
3486 {
3487 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3488
3489 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3490 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3491 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3492 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3493 }
3494
3495 static double CalculateWriteBackDelay(
3496 enum source_format_class WritebackPixelFormat,
3497 double WritebackHRatio,
3498 double WritebackVRatio,
3499 unsigned int WritebackVTaps,
3500 int WritebackDestinationWidth,
3501 int WritebackDestinationHeight,
3502 int WritebackSourceHeight,
3503 unsigned int HTotal)
3504 {
3505 double CalculateWriteBackDelay;
3506 double Line_length;
3507 double Output_lines_last_notclamped;
3508 double WritebackVInit;
3509
3510 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3511 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3512 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3513 if (Output_lines_last_notclamped < 0) {
3514 CalculateWriteBackDelay = 0;
3515 } else {
3516 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3517 }
3518 return CalculateWriteBackDelay;
3519 }
3520
3521 static void CalculateVupdateAndDynamicMetadataParameters(
3522 int MaxInterDCNTileRepeaters,
3523 double DPPCLK,
3524 double DISPCLK,
3525 double DCFClkDeepSleep,
3526 double PixelClock,
3527 int HTotal,
3528 int VBlank,
3529 int DynamicMetadataTransmittedBytes,
3530 int DynamicMetadataLinesBeforeActiveRequired,
3531 int InterlaceEnable,
3532 bool ProgressiveToInterlaceUnitInOPP,
3533 double *TSetup,
3534 double *Tdmbf,
3535 double *Tdmec,
3536 double *Tdmsks,
3537 int *VUpdateOffsetPix,
3538 double *VUpdateWidthPix,
3539 double *VReadyOffsetPix)
3540 {
3541 double TotalRepeaterDelayTime;
3542
3543 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3544 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3545 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3546 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3547 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3548 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3549 *Tdmec = HTotal / PixelClock;
3550 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3551 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3552 } else {
3553 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3554 }
3555 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3556 *Tdmsks = *Tdmsks / 2;
3557 }
3558 #ifdef __DML_VBA_DEBUG__
3559 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3560 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3561 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3562 #endif
3563 }
3564
3565 static void CalculateRowBandwidth(
3566 bool GPUVMEnable,
3567 enum source_format_class SourcePixelFormat,
3568 double VRatio,
3569 double VRatioChroma,
3570 bool DCCEnable,
3571 double LineTime,
3572 unsigned int MetaRowByteLuma,
3573 unsigned int MetaRowByteChroma,
3574 unsigned int meta_row_height_luma,
3575 unsigned int meta_row_height_chroma,
3576 unsigned int PixelPTEBytesPerRowLuma,
3577 unsigned int PixelPTEBytesPerRowChroma,
3578 unsigned int dpte_row_height_luma,
3579 unsigned int dpte_row_height_chroma,
3580 double *meta_row_bw,
3581 double *dpte_row_bw)
3582 {
3583 if (DCCEnable != true) {
3584 *meta_row_bw = 0;
3585 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3586 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3587 } else {
3588 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3589 }
3590
3591 if (GPUVMEnable != true) {
3592 *dpte_row_bw = 0;
3593 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3594 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3595 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3596 } else {
3597 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3598 }
3599 }
3600
3601 static void CalculateFlipSchedule(
3602 struct display_mode_lib *mode_lib,
3603 unsigned int k,
3604 double HostVMInefficiencyFactor,
3605 double UrgentExtraLatency,
3606 double UrgentLatency,
3607 double PDEAndMetaPTEBytesPerFrame,
3608 double MetaRowBytes,
3609 double DPTEBytesPerRow)
3610 {
3611 struct vba_vars_st *v = &mode_lib->vba;
3612 double min_row_time = 0.0;
3613 unsigned int HostVMDynamicLevelsTrips;
3614 double TimeForFetchingMetaPTEImmediateFlip;
3615 double TimeForFetchingRowInVBlankImmediateFlip;
3616 double ImmediateFlipBW = 1.0;
3617 double LineTime = v->HTotal[k] / v->PixelClock[k];
3618
3619 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3620 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3621 } else {
3622 HostVMDynamicLevelsTrips = 0;
3623 }
3624
3625 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3626 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3627 }
3628
3629 if (v->GPUVMEnable == true) {
3630 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3631 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3632 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3633 LineTime / 4.0);
3634 } else {
3635 TimeForFetchingMetaPTEImmediateFlip = 0;
3636 }
3637
3638 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3639 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3640 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3641 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3642 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3643 LineTime / 4);
3644 } else {
3645 TimeForFetchingRowInVBlankImmediateFlip = 0;
3646 }
3647
3648 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3649
3650 if (v->GPUVMEnable == true) {
3651 v->final_flip_bw[k] = dml_max(
3652 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3653 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3654 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3655 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3656 } else {
3657 v->final_flip_bw[k] = 0;
3658 }
3659
3660 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3661 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3662 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3663 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3664 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3665 } else {
3666 min_row_time = dml_min4(
3667 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3668 v->meta_row_height[k] * LineTime / v->VRatio[k],
3669 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3670 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3671 }
3672 } else {
3673 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3674 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3675 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3676 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3677 } else {
3678 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3679 }
3680 }
3681
3682 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3683 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3684 v->ImmediateFlipSupportedForPipe[k] = false;
3685 } else {
3686 v->ImmediateFlipSupportedForPipe[k] = true;
3687 }
3688
3689 #ifdef __DML_VBA_DEBUG__
3690 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3691 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3692 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3693 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3694 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3695 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3696 #endif
3697
3698 }
3699
3700 static double TruncToValidBPP(
3701 double LinkBitRate,
3702 int Lanes,
3703 int HTotal,
3704 int HActive,
3705 double PixelClock,
3706 double DesiredBPP,
3707 bool DSCEnable,
3708 enum output_encoder_class Output,
3709 enum output_format_class Format,
3710 unsigned int DSCInputBitPerComponent,
3711 int DSCSlices,
3712 int AudioRate,
3713 int AudioLayout,
3714 enum odm_combine_mode ODMCombine)
3715 {
3716 double MaxLinkBPP;
3717 int MinDSCBPP;
3718 double MaxDSCBPP;
3719 int NonDSCBPP0;
3720 int NonDSCBPP1;
3721 int NonDSCBPP2;
3722
3723 if (Format == dm_420) {
3724 NonDSCBPP0 = 12;
3725 NonDSCBPP1 = 15;
3726 NonDSCBPP2 = 18;
3727 MinDSCBPP = 6;
3728 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3729 } else if (Format == dm_444) {
3730 NonDSCBPP0 = 24;
3731 NonDSCBPP1 = 30;
3732 NonDSCBPP2 = 36;
3733 MinDSCBPP = 8;
3734 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3735 } else {
3736
3737 NonDSCBPP0 = 16;
3738 NonDSCBPP1 = 20;
3739 NonDSCBPP2 = 24;
3740
3741 if (Format == dm_n422) {
3742 MinDSCBPP = 7;
3743 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3744 } else {
3745 MinDSCBPP = 8;
3746 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3747 }
3748 }
3749
3750 if (DSCEnable && Output == dm_dp) {
3751 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3752 } else {
3753 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3754 }
3755
3756 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3757 MaxLinkBPP = 16;
3758 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3759 MaxLinkBPP = 32;
3760 }
3761
3762 if (DesiredBPP == 0) {
3763 if (DSCEnable) {
3764 if (MaxLinkBPP < MinDSCBPP) {
3765 return BPP_INVALID;
3766 } else if (MaxLinkBPP >= MaxDSCBPP) {
3767 return MaxDSCBPP;
3768 } else {
3769 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3770 }
3771 } else {
3772 if (MaxLinkBPP >= NonDSCBPP2) {
3773 return NonDSCBPP2;
3774 } else if (MaxLinkBPP >= NonDSCBPP1) {
3775 return NonDSCBPP1;
3776 } else if (MaxLinkBPP >= NonDSCBPP0) {
3777 return 16.0;
3778 } else {
3779 return BPP_INVALID;
3780 }
3781 }
3782 } else {
3783 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3784 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3785 return BPP_INVALID;
3786 } else {
3787 return DesiredBPP;
3788 }
3789 }
3790 return BPP_INVALID;
3791 }
3792
3793 static noinline void CalculatePrefetchSchedulePerPlane(
3794 struct display_mode_lib *mode_lib,
3795 double HostVMInefficiencyFactor,
3796 int i,
3797 unsigned int j,
3798 unsigned int k)
3799 {
3800 struct vba_vars_st *v = &mode_lib->vba;
3801 Pipe myPipe;
3802
3803 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3804 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3805 myPipe.PixelClock = v->PixelClock[k];
3806 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3807 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3808 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3809 myPipe.VRatio = mode_lib->vba.VRatio[k];
3810 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3811
3812 myPipe.SourceScan = v->SourceScan[k];
3813 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3814 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3815 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3816 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3817 myPipe.InterlaceEnable = v->Interlace[k];
3818 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3819 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3820 myPipe.HTotal = v->HTotal[k];
3821 myPipe.DCCEnable = v->DCCEnable[k];
3822 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3823 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3824 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3825 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3826 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3827 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3828 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3829 mode_lib,
3830 HostVMInefficiencyFactor,
3831 &myPipe,
3832 v->DSCDelayPerState[i][k],
3833 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3834 v->DPPCLKDelaySCL,
3835 v->DPPCLKDelaySCLLBOnly,
3836 v->DPPCLKDelayCNVCCursor,
3837 v->DISPCLKDelaySubtotal,
3838 v->SwathWidthYThisState[k] / v->HRatio[k],
3839 v->OutputFormat[k],
3840 v->MaxInterDCNTileRepeaters,
3841 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3842 v->MaximumVStartup[i][j][k],
3843 v->GPUVMMaxPageTableLevels,
3844 v->GPUVMEnable,
3845 v->HostVMEnable,
3846 v->HostVMMaxNonCachedPageTableLevels,
3847 v->HostVMMinPageSize,
3848 v->DynamicMetadataEnable[k],
3849 v->DynamicMetadataVMEnabled,
3850 v->DynamicMetadataLinesBeforeActiveRequired[k],
3851 v->DynamicMetadataTransmittedBytes[k],
3852 v->UrgLatency[i],
3853 v->ExtraLatency,
3854 v->TimeCalc,
3855 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3856 v->MetaRowBytes[i][j][k],
3857 v->DPTEBytesPerRow[i][j][k],
3858 v->PrefetchLinesY[i][j][k],
3859 v->SwathWidthYThisState[k],
3860 v->PrefillY[k],
3861 v->MaxNumSwY[k],
3862 v->PrefetchLinesC[i][j][k],
3863 v->SwathWidthCThisState[k],
3864 v->PrefillC[k],
3865 v->MaxNumSwC[k],
3866 v->swath_width_luma_ub_this_state[k],
3867 v->swath_width_chroma_ub_this_state[k],
3868 v->SwathHeightYThisState[k],
3869 v->SwathHeightCThisState[k],
3870 v->TWait,
3871 &v->DSTXAfterScaler[k],
3872 &v->DSTYAfterScaler[k],
3873 &v->LineTimesForPrefetch[k],
3874 &v->PrefetchBW[k],
3875 &v->LinesForMetaPTE[k],
3876 &v->LinesForMetaAndDPTERow[k],
3877 &v->VRatioPreY[i][j][k],
3878 &v->VRatioPreC[i][j][k],
3879 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3880 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3881 &v->NoTimeForDynamicMetadata[i][j][k],
3882 &v->Tno_bw[k],
3883 &v->prefetch_vmrow_bw[k],
3884 &v->dummy7[k],
3885 &v->dummy8[k],
3886 &v->dummy13[k],
3887 &v->VUpdateOffsetPix[k],
3888 &v->VUpdateWidthPix[k],
3889 &v->VReadyOffsetPix[k]);
3890 }
3891
3892 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3893 {
3894 struct vba_vars_st *v = &mode_lib->vba;
3895
3896 int i, j;
3897 unsigned int k, m;
3898 int ReorderingBytes;
3899 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3900 bool NoChroma = true;
3901 bool EnoughWritebackUnits = true;
3902 bool P2IWith420 = false;
3903 bool DSCOnlyIfNecessaryWithBPP = false;
3904 bool DSC422NativeNotSupported = false;
3905 double MaxTotalVActiveRDBandwidth;
3906 bool ViewportExceedsSurface = false;
3907 bool FMTBufferExceeded = false;
3908
3909 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3910
3911 CalculateMinAndMaxPrefetchMode(
3912 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3913 &MinPrefetchMode, &MaxPrefetchMode);
3914
3915 /*Scale Ratio, taps Support Check*/
3916
3917 v->ScaleRatioAndTapsSupport = true;
3918 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3919 if (v->ScalerEnabled[k] == false
3920 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3921 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3922 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3923 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3924 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3925 v->ScaleRatioAndTapsSupport = false;
3926 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3927 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3928 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3929 || v->VRatio[k] > v->vtaps[k]
3930 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3931 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3932 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3933 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3934 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3935 || v->HRatioChroma[k] > v->MaxHSCLRatio
3936 || v->VRatioChroma[k] > v->MaxVSCLRatio
3937 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3938 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3939 v->ScaleRatioAndTapsSupport = false;
3940 }
3941 }
3942 /*Source Format, Pixel Format and Scan Support Check*/
3943
3944 v->SourceFormatPixelAndScanSupport = true;
3945 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3946 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3947 v->SourceFormatPixelAndScanSupport = false;
3948 }
3949 }
3950 /*Bandwidth Support Check*/
3951
3952 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3953 CalculateBytePerPixelAnd256BBlockSizes(
3954 v->SourcePixelFormat[k],
3955 v->SurfaceTiling[k],
3956 &v->BytePerPixelY[k],
3957 &v->BytePerPixelC[k],
3958 &v->BytePerPixelInDETY[k],
3959 &v->BytePerPixelInDETC[k],
3960 &v->Read256BlockHeightY[k],
3961 &v->Read256BlockHeightC[k],
3962 &v->Read256BlockWidthY[k],
3963 &v->Read256BlockWidthC[k]);
3964 }
3965 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3966 if (v->SourceScan[k] != dm_vert) {
3967 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3968 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3969 } else {
3970 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3971 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3972 }
3973 }
3974 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3975 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3976 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3977 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3978 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3979 }
3980 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3981 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3982 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3983 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3984 } else if (v->WritebackEnable[k] == true) {
3985 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3986 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3987 } else {
3988 v->WriteBandwidth[k] = 0.0;
3989 }
3990 }
3991
3992 /*Writeback Latency support check*/
3993
3994 v->WritebackLatencySupport = true;
3995 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3996 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3997 v->WritebackLatencySupport = false;
3998 }
3999 }
4000
4001 /*Writeback Mode Support Check*/
4002
4003 v->TotalNumberOfActiveWriteback = 0;
4004 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4005 if (v->WritebackEnable[k] == true) {
4006 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4007 }
4008 }
4009
4010 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4011 EnoughWritebackUnits = false;
4012 }
4013
4014 /*Writeback Scale Ratio and Taps Support Check*/
4015
4016 v->WritebackScaleRatioAndTapsSupport = true;
4017 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4018 if (v->WritebackEnable[k] == true) {
4019 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4020 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4021 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4022 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4023 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4024 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4025 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4026 v->WritebackScaleRatioAndTapsSupport = false;
4027 }
4028 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4029 v->WritebackScaleRatioAndTapsSupport = false;
4030 }
4031 }
4032 }
4033 /*Maximum DISPCLK/DPPCLK Support check*/
4034
4035 v->WritebackRequiredDISPCLK = 0.0;
4036 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4037 if (v->WritebackEnable[k] == true) {
4038 v->WritebackRequiredDISPCLK = dml_max(
4039 v->WritebackRequiredDISPCLK,
4040 dml314_CalculateWriteBackDISPCLK(
4041 v->WritebackPixelFormat[k],
4042 v->PixelClock[k],
4043 v->WritebackHRatio[k],
4044 v->WritebackVRatio[k],
4045 v->WritebackHTaps[k],
4046 v->WritebackVTaps[k],
4047 v->WritebackSourceWidth[k],
4048 v->WritebackDestinationWidth[k],
4049 v->HTotal[k],
4050 v->WritebackLineBufferSize));
4051 }
4052 }
4053 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4054 if (v->HRatio[k] > 1.0) {
4055 v->PSCL_FACTOR[k] = dml_min(
4056 v->MaxDCHUBToPSCLThroughput,
4057 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4058 } else {
4059 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4060 }
4061 if (v->BytePerPixelC[k] == 0.0) {
4062 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4063 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4064 * dml_max3(
4065 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4066 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4067 1.0);
4068 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4069 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4070 }
4071 } else {
4072 if (v->HRatioChroma[k] > 1.0) {
4073 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4074 v->MaxDCHUBToPSCLThroughput,
4075 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4076 } else {
4077 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4078 }
4079 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4080 * dml_max5(
4081 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4082 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4083 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4084 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4085 1.0);
4086 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4087 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4088 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4089 }
4090 }
4091 }
4092 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4093 int MaximumSwathWidthSupportLuma;
4094 int MaximumSwathWidthSupportChroma;
4095
4096 if (v->SurfaceTiling[k] == dm_sw_linear) {
4097 MaximumSwathWidthSupportLuma = 8192.0;
4098 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4099 MaximumSwathWidthSupportLuma = 2880.0;
4100 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4101 MaximumSwathWidthSupportLuma = 3840.0;
4102 } else {
4103 MaximumSwathWidthSupportLuma = 5760.0;
4104 }
4105
4106 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4107 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4108 } else {
4109 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4110 }
4111 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4112 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4113 if (v->BytePerPixelC[k] == 0.0) {
4114 v->MaximumSwathWidthInLineBufferChroma = 0;
4115 } else {
4116 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4117 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4118 }
4119 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4120 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4121 }
4122
4123 CalculateSwathAndDETConfiguration(
4124 true,
4125 v->NumberOfActivePlanes,
4126 v->DETBufferSizeInKByte[0],
4127 v->MaximumSwathWidthLuma,
4128 v->MaximumSwathWidthChroma,
4129 v->SourceScan,
4130 v->SourcePixelFormat,
4131 v->SurfaceTiling,
4132 v->ViewportWidth,
4133 v->ViewportHeight,
4134 v->SurfaceWidthY,
4135 v->SurfaceWidthC,
4136 v->SurfaceHeightY,
4137 v->SurfaceHeightC,
4138 v->Read256BlockHeightY,
4139 v->Read256BlockHeightC,
4140 v->Read256BlockWidthY,
4141 v->Read256BlockWidthC,
4142 v->odm_combine_dummy,
4143 v->BlendingAndTiming,
4144 v->BytePerPixelY,
4145 v->BytePerPixelC,
4146 v->BytePerPixelInDETY,
4147 v->BytePerPixelInDETC,
4148 v->HActive,
4149 v->HRatio,
4150 v->HRatioChroma,
4151 v->NoOfDPPThisState,
4152 v->swath_width_luma_ub_this_state,
4153 v->swath_width_chroma_ub_this_state,
4154 v->SwathWidthYThisState,
4155 v->SwathWidthCThisState,
4156 v->SwathHeightYThisState,
4157 v->SwathHeightCThisState,
4158 v->DETBufferSizeYThisState,
4159 v->DETBufferSizeCThisState,
4160 v->SingleDPPViewportSizeSupportPerPlane,
4161 &v->ViewportSizeSupport[0][0]);
4162
4163 for (i = 0; i < v->soc.num_states; i++) {
4164 for (j = 0; j < 2; j++) {
4165 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4166 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4167 v->RequiredDISPCLK[i][j] = 0.0;
4168 v->DISPCLK_DPPCLK_Support[i][j] = true;
4169 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4170 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4171 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4172 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4173 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4174 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4175 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4176 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4177 }
4178 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4179 * (1 + v->DISPCLKRampingMargin / 100.0);
4180 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4181 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4182 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4183 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4184 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4185 }
4186 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4187 * (1 + v->DISPCLKRampingMargin / 100.0);
4188 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4189 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4190 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4191 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4192 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4193 }
4194
4195 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4196 || !(v->Output[k] == dm_dp ||
4197 v->Output[k] == dm_dp2p0 ||
4198 v->Output[k] == dm_edp)) {
4199 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4200 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4201
4202 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4203 FMTBufferExceeded = true;
4204 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4205 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4206 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4207 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4208 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4211 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4212 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4213 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4214 } else {
4215 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4216 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4217 }
4218 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4219 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4220 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4221 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4222 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4223 } else {
4224 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4225 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4226 }
4227 }
4228 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4229 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4230 if (v->Output[k] == dm_hdmi) {
4231 FMTBufferExceeded = true;
4232 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4233 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4234 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4235
4236 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4237 FMTBufferExceeded = true;
4238 } else {
4239 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4240 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4241 }
4242 }
4243 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4244 v->MPCCombine[i][j][k] = false;
4245 v->NoOfDPP[i][j][k] = 4;
4246 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4247 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4248 v->MPCCombine[i][j][k] = false;
4249 v->NoOfDPP[i][j][k] = 2;
4250 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4251 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4252 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4253 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4254 v->MPCCombine[i][j][k] = false;
4255 v->NoOfDPP[i][j][k] = 1;
4256 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4257 } else {
4258 v->MPCCombine[i][j][k] = true;
4259 v->NoOfDPP[i][j][k] = 2;
4260 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4261 }
4262 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4263 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4264 > v->MaxDppclkRoundedDownToDFSGranularity)
4265 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4266 v->DISPCLK_DPPCLK_Support[i][j] = false;
4267 }
4268 }
4269 v->TotalNumberOfActiveDPP[i][j] = 0;
4270 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4271 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4272 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4273 if (v->NoOfDPP[i][j][k] == 1)
4274 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4275 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4276 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4277 NoChroma = false;
4278 }
4279
4280 // UPTO
4281 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4282 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4283 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4284 double BWOfNonSplitPlaneOfMaximumBandwidth;
4285 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4286
4287 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4288 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4289 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4290 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4291 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4292 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4293 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4294 }
4295 }
4296 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4297 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4298 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4299 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4300 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4301 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4302 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4303 }
4304 }
4305 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4306 v->RequiredDISPCLK[i][j] = 0.0;
4307 v->DISPCLK_DPPCLK_Support[i][j] = true;
4308 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4309 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4310 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4311 v->MPCCombine[i][j][k] = true;
4312 v->NoOfDPP[i][j][k] = 2;
4313 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4314 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4315 } else {
4316 v->MPCCombine[i][j][k] = false;
4317 v->NoOfDPP[i][j][k] = 1;
4318 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4319 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4320 }
4321 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4322 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4323 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4324 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4325 } else {
4326 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4327 }
4328 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4329 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4330 > v->MaxDppclkRoundedDownToDFSGranularity)
4331 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4332 v->DISPCLK_DPPCLK_Support[i][j] = false;
4333 }
4334 }
4335 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4336 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4337 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4338 }
4339 }
4340 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4341 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4342 v->DISPCLK_DPPCLK_Support[i][j] = false;
4343 }
4344 }
4345 }
4346
4347 /*Total Available Pipes Support Check*/
4348
4349 for (i = 0; i < v->soc.num_states; i++) {
4350 for (j = 0; j < 2; j++) {
4351 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4352 v->TotalAvailablePipesSupport[i][j] = true;
4353 } else {
4354 v->TotalAvailablePipesSupport[i][j] = false;
4355 }
4356 }
4357 }
4358 /*Display IO and DSC Support Check*/
4359
4360 v->NonsupportedDSCInputBPC = false;
4361 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4362 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4363 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4364 v->NonsupportedDSCInputBPC = true;
4365 }
4366 }
4367
4368 /*Number Of DSC Slices*/
4369 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4370 if (v->BlendingAndTiming[k] == k) {
4371 if (v->PixelClockBackEnd[k] > 3200) {
4372 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4373 } else if (v->PixelClockBackEnd[k] > 1360) {
4374 v->NumberOfDSCSlices[k] = 8;
4375 } else if (v->PixelClockBackEnd[k] > 680) {
4376 v->NumberOfDSCSlices[k] = 4;
4377 } else if (v->PixelClockBackEnd[k] > 340) {
4378 v->NumberOfDSCSlices[k] = 2;
4379 } else {
4380 v->NumberOfDSCSlices[k] = 1;
4381 }
4382 } else {
4383 v->NumberOfDSCSlices[k] = 0;
4384 }
4385 }
4386
4387 for (i = 0; i < v->soc.num_states; i++) {
4388 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4389 v->RequiresDSC[i][k] = false;
4390 v->RequiresFEC[i][k] = false;
4391 if (v->BlendingAndTiming[k] == k) {
4392 if (v->Output[k] == dm_hdmi) {
4393 v->RequiresDSC[i][k] = false;
4394 v->RequiresFEC[i][k] = false;
4395 v->OutputBppPerState[i][k] = TruncToValidBPP(
4396 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4397 3,
4398 v->HTotal[k],
4399 v->HActive[k],
4400 v->PixelClockBackEnd[k],
4401 v->ForcedOutputLinkBPP[k],
4402 false,
4403 v->Output[k],
4404 v->OutputFormat[k],
4405 v->DSCInputBitPerComponent[k],
4406 v->NumberOfDSCSlices[k],
4407 v->AudioSampleRate[k],
4408 v->AudioSampleLayout[k],
4409 v->ODMCombineEnablePerState[i][k]);
4410 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4411 if (v->DSCEnable[k] == true) {
4412 v->RequiresDSC[i][k] = true;
4413 v->LinkDSCEnable = true;
4414 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4415 v->RequiresFEC[i][k] = true;
4416 } else {
4417 v->RequiresFEC[i][k] = false;
4418 }
4419 } else {
4420 v->RequiresDSC[i][k] = false;
4421 v->LinkDSCEnable = false;
4422 if (v->Output[k] == dm_dp2p0) {
4423 v->RequiresFEC[i][k] = true;
4424 } else {
4425 v->RequiresFEC[i][k] = false;
4426 }
4427 }
4428 if (v->Output[k] == dm_dp2p0) {
4429 v->Outbpp = BPP_INVALID;
4430 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4431 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4432 v->Outbpp = TruncToValidBPP(
4433 (1.0 - v->Downspreading / 100.0) * 10000,
4434 v->OutputLinkDPLanes[k],
4435 v->HTotal[k],
4436 v->HActive[k],
4437 v->PixelClockBackEnd[k],
4438 v->ForcedOutputLinkBPP[k],
4439 v->LinkDSCEnable,
4440 v->Output[k],
4441 v->OutputFormat[k],
4442 v->DSCInputBitPerComponent[k],
4443 v->NumberOfDSCSlices[k],
4444 v->AudioSampleRate[k],
4445 v->AudioSampleLayout[k],
4446 v->ODMCombineEnablePerState[i][k]);
4447 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4448 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4449 v->RequiresDSC[i][k] = true;
4450 v->LinkDSCEnable = true;
4451 v->Outbpp = TruncToValidBPP(
4452 (1.0 - v->Downspreading / 100.0) * 10000,
4453 v->OutputLinkDPLanes[k],
4454 v->HTotal[k],
4455 v->HActive[k],
4456 v->PixelClockBackEnd[k],
4457 v->ForcedOutputLinkBPP[k],
4458 v->LinkDSCEnable,
4459 v->Output[k],
4460 v->OutputFormat[k],
4461 v->DSCInputBitPerComponent[k],
4462 v->NumberOfDSCSlices[k],
4463 v->AudioSampleRate[k],
4464 v->AudioSampleLayout[k],
4465 v->ODMCombineEnablePerState[i][k]);
4466 }
4467 v->OutputBppPerState[i][k] = v->Outbpp;
4468 // TODO: Need some other way to handle this nonsense
4469 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4470 }
4471 if (v->Outbpp == BPP_INVALID &&
4472 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4473 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4474 v->Outbpp = TruncToValidBPP(
4475 (1.0 - v->Downspreading / 100.0) * 13500,
4476 v->OutputLinkDPLanes[k],
4477 v->HTotal[k],
4478 v->HActive[k],
4479 v->PixelClockBackEnd[k],
4480 v->ForcedOutputLinkBPP[k],
4481 v->LinkDSCEnable,
4482 v->Output[k],
4483 v->OutputFormat[k],
4484 v->DSCInputBitPerComponent[k],
4485 v->NumberOfDSCSlices[k],
4486 v->AudioSampleRate[k],
4487 v->AudioSampleLayout[k],
4488 v->ODMCombineEnablePerState[i][k]);
4489 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4490 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4491 v->RequiresDSC[i][k] = true;
4492 v->LinkDSCEnable = true;
4493 v->Outbpp = TruncToValidBPP(
4494 (1.0 - v->Downspreading / 100.0) * 13500,
4495 v->OutputLinkDPLanes[k],
4496 v->HTotal[k],
4497 v->HActive[k],
4498 v->PixelClockBackEnd[k],
4499 v->ForcedOutputLinkBPP[k],
4500 v->LinkDSCEnable,
4501 v->Output[k],
4502 v->OutputFormat[k],
4503 v->DSCInputBitPerComponent[k],
4504 v->NumberOfDSCSlices[k],
4505 v->AudioSampleRate[k],
4506 v->AudioSampleLayout[k],
4507 v->ODMCombineEnablePerState[i][k]);
4508 }
4509 v->OutputBppPerState[i][k] = v->Outbpp;
4510 // TODO: Need some other way to handle this nonsense
4511 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4512 }
4513 if (v->Outbpp == BPP_INVALID &&
4514 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4515 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4516 v->Outbpp = TruncToValidBPP(
4517 (1.0 - v->Downspreading / 100.0) * 20000,
4518 v->OutputLinkDPLanes[k],
4519 v->HTotal[k],
4520 v->HActive[k],
4521 v->PixelClockBackEnd[k],
4522 v->ForcedOutputLinkBPP[k],
4523 v->LinkDSCEnable,
4524 v->Output[k],
4525 v->OutputFormat[k],
4526 v->DSCInputBitPerComponent[k],
4527 v->NumberOfDSCSlices[k],
4528 v->AudioSampleRate[k],
4529 v->AudioSampleLayout[k],
4530 v->ODMCombineEnablePerState[i][k]);
4531 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4532 v->ForcedOutputLinkBPP[k] == 0) {
4533 v->RequiresDSC[i][k] = true;
4534 v->LinkDSCEnable = true;
4535 v->Outbpp = TruncToValidBPP(
4536 (1.0 - v->Downspreading / 100.0) * 20000,
4537 v->OutputLinkDPLanes[k],
4538 v->HTotal[k],
4539 v->HActive[k],
4540 v->PixelClockBackEnd[k],
4541 v->ForcedOutputLinkBPP[k],
4542 v->LinkDSCEnable,
4543 v->Output[k],
4544 v->OutputFormat[k],
4545 v->DSCInputBitPerComponent[k],
4546 v->NumberOfDSCSlices[k],
4547 v->AudioSampleRate[k],
4548 v->AudioSampleLayout[k],
4549 v->ODMCombineEnablePerState[i][k]);
4550 }
4551 v->OutputBppPerState[i][k] = v->Outbpp;
4552 // TODO: Need some other way to handle this nonsense
4553 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4554 }
4555 } else {
4556 v->Outbpp = BPP_INVALID;
4557 if (v->PHYCLKPerState[i] >= 270.0) {
4558 v->Outbpp = TruncToValidBPP(
4559 (1.0 - v->Downspreading / 100.0) * 2700,
4560 v->OutputLinkDPLanes[k],
4561 v->HTotal[k],
4562 v->HActive[k],
4563 v->PixelClockBackEnd[k],
4564 v->ForcedOutputLinkBPP[k],
4565 v->LinkDSCEnable,
4566 v->Output[k],
4567 v->OutputFormat[k],
4568 v->DSCInputBitPerComponent[k],
4569 v->NumberOfDSCSlices[k],
4570 v->AudioSampleRate[k],
4571 v->AudioSampleLayout[k],
4572 v->ODMCombineEnablePerState[i][k]);
4573 v->OutputBppPerState[i][k] = v->Outbpp;
4574 // TODO: Need some other way to handle this nonsense
4575 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4576 }
4577 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4578 v->Outbpp = TruncToValidBPP(
4579 (1.0 - v->Downspreading / 100.0) * 5400,
4580 v->OutputLinkDPLanes[k],
4581 v->HTotal[k],
4582 v->HActive[k],
4583 v->PixelClockBackEnd[k],
4584 v->ForcedOutputLinkBPP[k],
4585 v->LinkDSCEnable,
4586 v->Output[k],
4587 v->OutputFormat[k],
4588 v->DSCInputBitPerComponent[k],
4589 v->NumberOfDSCSlices[k],
4590 v->AudioSampleRate[k],
4591 v->AudioSampleLayout[k],
4592 v->ODMCombineEnablePerState[i][k]);
4593 v->OutputBppPerState[i][k] = v->Outbpp;
4594 // TODO: Need some other way to handle this nonsense
4595 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4596 }
4597 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4598 v->Outbpp = TruncToValidBPP(
4599 (1.0 - v->Downspreading / 100.0) * 8100,
4600 v->OutputLinkDPLanes[k],
4601 v->HTotal[k],
4602 v->HActive[k],
4603 v->PixelClockBackEnd[k],
4604 v->ForcedOutputLinkBPP[k],
4605 v->LinkDSCEnable,
4606 v->Output[k],
4607 v->OutputFormat[k],
4608 v->DSCInputBitPerComponent[k],
4609 v->NumberOfDSCSlices[k],
4610 v->AudioSampleRate[k],
4611 v->AudioSampleLayout[k],
4612 v->ODMCombineEnablePerState[i][k]);
4613 v->OutputBppPerState[i][k] = v->Outbpp;
4614 // TODO: Need some other way to handle this nonsense
4615 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4616 }
4617 }
4618 }
4619 } else {
4620 v->OutputBppPerState[i][k] = 0;
4621 }
4622 }
4623 }
4624
4625 for (i = 0; i < v->soc.num_states; i++) {
4626 v->LinkCapacitySupport[i] = true;
4627 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4628 if (v->BlendingAndTiming[k] == k
4629 && (v->Output[k] == dm_dp ||
4630 v->Output[k] == dm_edp ||
4631 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4632 v->LinkCapacitySupport[i] = false;
4633 }
4634 }
4635 }
4636
4637 // UPTO 2172
4638 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4639 if (v->BlendingAndTiming[k] == k
4640 && (v->Output[k] == dm_dp ||
4641 v->Output[k] == dm_edp ||
4642 v->Output[k] == dm_hdmi)) {
4643 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4644 P2IWith420 = true;
4645 }
4646 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4647 && !v->DSC422NativeSupport) {
4648 DSC422NativeNotSupported = true;
4649 }
4650 }
4651 }
4652
4653
4654 for (i = 0; i < v->soc.num_states; ++i) {
4655 v->ODMCombine4To1SupportCheckOK[i] = true;
4656 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4657 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4658 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4659 || v->Output[k] == dm_hdmi)) {
4660 v->ODMCombine4To1SupportCheckOK[i] = false;
4661 }
4662 }
4663 }
4664
4665 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4666
4667 for (i = 0; i < v->soc.num_states; i++) {
4668 v->NotEnoughDSCUnits[i] = false;
4669 v->TotalDSCUnitsRequired = 0.0;
4670 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4671 if (v->RequiresDSC[i][k] == true) {
4672 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4673 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4674 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4675 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4676 } else {
4677 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4678 }
4679 }
4680 }
4681 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4682 v->NotEnoughDSCUnits[i] = true;
4683 }
4684 }
4685 /*DSC Delay per state*/
4686
4687 for (i = 0; i < v->soc.num_states; i++) {
4688 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4689 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4690 v->BPP = 0.0;
4691 } else {
4692 v->BPP = v->OutputBppPerState[i][k];
4693 }
4694 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4695 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4696 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4697 v->DSCInputBitPerComponent[k],
4698 v->BPP,
4699 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4700 v->NumberOfDSCSlices[k],
4701 v->OutputFormat[k],
4702 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4703 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4704 v->DSCDelayPerState[i][k] = 2.0
4705 * (dscceComputeDelay(
4706 v->DSCInputBitPerComponent[k],
4707 v->BPP,
4708 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4709 v->NumberOfDSCSlices[k] / 2,
4710 v->OutputFormat[k],
4711 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4712 } else {
4713 v->DSCDelayPerState[i][k] = 4.0
4714 * (dscceComputeDelay(
4715 v->DSCInputBitPerComponent[k],
4716 v->BPP,
4717 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4718 v->NumberOfDSCSlices[k] / 4,
4719 v->OutputFormat[k],
4720 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4721 }
4722 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4723 } else {
4724 v->DSCDelayPerState[i][k] = 0.0;
4725 }
4726 }
4727 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4728 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4729 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4730 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4731 }
4732 }
4733 }
4734 }
4735
4736 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4737 //
4738 for (i = 0; i < v->soc.num_states; ++i) {
4739 for (j = 0; j <= 1; ++j) {
4740 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4741 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4742 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4743 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4744 }
4745
4746 CalculateSwathAndDETConfiguration(
4747 false,
4748 v->NumberOfActivePlanes,
4749 v->DETBufferSizeInKByte[0],
4750 v->MaximumSwathWidthLuma,
4751 v->MaximumSwathWidthChroma,
4752 v->SourceScan,
4753 v->SourcePixelFormat,
4754 v->SurfaceTiling,
4755 v->ViewportWidth,
4756 v->ViewportHeight,
4757 v->SurfaceWidthY,
4758 v->SurfaceWidthC,
4759 v->SurfaceHeightY,
4760 v->SurfaceHeightC,
4761 v->Read256BlockHeightY,
4762 v->Read256BlockHeightC,
4763 v->Read256BlockWidthY,
4764 v->Read256BlockWidthC,
4765 v->ODMCombineEnableThisState,
4766 v->BlendingAndTiming,
4767 v->BytePerPixelY,
4768 v->BytePerPixelC,
4769 v->BytePerPixelInDETY,
4770 v->BytePerPixelInDETC,
4771 v->HActive,
4772 v->HRatio,
4773 v->HRatioChroma,
4774 v->NoOfDPPThisState,
4775 v->swath_width_luma_ub_this_state,
4776 v->swath_width_chroma_ub_this_state,
4777 v->SwathWidthYThisState,
4778 v->SwathWidthCThisState,
4779 v->SwathHeightYThisState,
4780 v->SwathHeightCThisState,
4781 v->DETBufferSizeYThisState,
4782 v->DETBufferSizeCThisState,
4783 v->dummystring,
4784 &v->ViewportSizeSupport[i][j]);
4785
4786 CalculateDCFCLKDeepSleep(
4787 mode_lib,
4788 v->NumberOfActivePlanes,
4789 v->BytePerPixelY,
4790 v->BytePerPixelC,
4791 v->VRatio,
4792 v->VRatioChroma,
4793 v->SwathWidthYThisState,
4794 v->SwathWidthCThisState,
4795 v->NoOfDPPThisState,
4796 v->HRatio,
4797 v->HRatioChroma,
4798 v->PixelClock,
4799 v->PSCL_FACTOR,
4800 v->PSCL_FACTOR_CHROMA,
4801 v->RequiredDPPCLKThisState,
4802 v->ReadBandwidthLuma,
4803 v->ReadBandwidthChroma,
4804 v->ReturnBusWidth,
4805 &v->ProjectedDCFCLKDeepSleep[i][j]);
4806
4807 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4808 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4809 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4810 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4811 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4812 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4813 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4814 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4815 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4816 }
4817 }
4818 }
4819
4820 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4821 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4822 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4823 }
4824
4825 for (i = 0; i < v->soc.num_states; i++) {
4826 for (j = 0; j < 2; j++) {
4827 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4828
4829 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4830 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4831 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4832 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4833 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4834 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4835 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4836 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4837 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4838 }
4839
4840 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4841 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4842 if (v->DCCEnable[k] == true) {
4843 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4844 }
4845 }
4846
4847 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4848 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4849 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4850
4851 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4852 && v->SourceScan[k] != dm_vert) {
4853 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4854 / 2;
4855 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4856 } else {
4857 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4858 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4859 }
4860
4861 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4862 mode_lib,
4863 v->DCCEnable[k],
4864 v->Read256BlockHeightC[k],
4865 v->Read256BlockWidthC[k],
4866 v->SourcePixelFormat[k],
4867 v->SurfaceTiling[k],
4868 v->BytePerPixelC[k],
4869 v->SourceScan[k],
4870 v->SwathWidthCThisState[k],
4871 v->ViewportHeightChroma[k],
4872 v->GPUVMEnable,
4873 v->HostVMEnable,
4874 v->HostVMMaxNonCachedPageTableLevels,
4875 v->GPUVMMinPageSize,
4876 v->HostVMMinPageSize,
4877 v->PTEBufferSizeInRequestsForChroma,
4878 v->PitchC[k],
4879 0.0,
4880 &v->MacroTileWidthC[k],
4881 &v->MetaRowBytesC,
4882 &v->DPTEBytesPerRowC,
4883 &v->PTEBufferSizeNotExceededC[i][j][k],
4884 &v->dummyinteger7,
4885 &v->dpte_row_height_chroma[k],
4886 &v->dummyinteger28,
4887 &v->dummyinteger26,
4888 &v->dummyinteger23,
4889 &v->meta_row_height_chroma[k],
4890 &v->dummyinteger8,
4891 &v->dummyinteger9,
4892 &v->dummyinteger19,
4893 &v->dummyinteger20,
4894 &v->dummyinteger17,
4895 &v->dummyinteger10,
4896 &v->dummyinteger11);
4897
4898 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4899 mode_lib,
4900 v->VRatioChroma[k],
4901 v->VTAPsChroma[k],
4902 v->Interlace[k],
4903 v->ProgressiveToInterlaceUnitInOPP,
4904 v->SwathHeightCThisState[k],
4905 v->ViewportYStartC[k],
4906 &v->PrefillC[k],
4907 &v->MaxNumSwC[k]);
4908 } else {
4909 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4910 v->PTEBufferSizeInRequestsForChroma = 0;
4911 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4912 v->MetaRowBytesC = 0.0;
4913 v->DPTEBytesPerRowC = 0.0;
4914 v->PrefetchLinesC[i][j][k] = 0.0;
4915 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4916 }
4917 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4918 mode_lib,
4919 v->DCCEnable[k],
4920 v->Read256BlockHeightY[k],
4921 v->Read256BlockWidthY[k],
4922 v->SourcePixelFormat[k],
4923 v->SurfaceTiling[k],
4924 v->BytePerPixelY[k],
4925 v->SourceScan[k],
4926 v->SwathWidthYThisState[k],
4927 v->ViewportHeight[k],
4928 v->GPUVMEnable,
4929 v->HostVMEnable,
4930 v->HostVMMaxNonCachedPageTableLevels,
4931 v->GPUVMMinPageSize,
4932 v->HostVMMinPageSize,
4933 v->PTEBufferSizeInRequestsForLuma,
4934 v->PitchY[k],
4935 v->DCCMetaPitchY[k],
4936 &v->MacroTileWidthY[k],
4937 &v->MetaRowBytesY,
4938 &v->DPTEBytesPerRowY,
4939 &v->PTEBufferSizeNotExceededY[i][j][k],
4940 &v->dummyinteger7,
4941 &v->dpte_row_height[k],
4942 &v->dummyinteger29,
4943 &v->dummyinteger27,
4944 &v->dummyinteger24,
4945 &v->meta_row_height[k],
4946 &v->dummyinteger25,
4947 &v->dpte_group_bytes[k],
4948 &v->dummyinteger21,
4949 &v->dummyinteger22,
4950 &v->dummyinteger18,
4951 &v->dummyinteger5,
4952 &v->dummyinteger6);
4953 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4954 mode_lib,
4955 v->VRatio[k],
4956 v->vtaps[k],
4957 v->Interlace[k],
4958 v->ProgressiveToInterlaceUnitInOPP,
4959 v->SwathHeightYThisState[k],
4960 v->ViewportYStartY[k],
4961 &v->PrefillY[k],
4962 &v->MaxNumSwY[k]);
4963 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4964 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4965 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4966
4967 CalculateRowBandwidth(
4968 v->GPUVMEnable,
4969 v->SourcePixelFormat[k],
4970 v->VRatio[k],
4971 v->VRatioChroma[k],
4972 v->DCCEnable[k],
4973 v->HTotal[k] / v->PixelClock[k],
4974 v->MetaRowBytesY,
4975 v->MetaRowBytesC,
4976 v->meta_row_height[k],
4977 v->meta_row_height_chroma[k],
4978 v->DPTEBytesPerRowY,
4979 v->DPTEBytesPerRowC,
4980 v->dpte_row_height[k],
4981 v->dpte_row_height_chroma[k],
4982 &v->meta_row_bandwidth[i][j][k],
4983 &v->dpte_row_bandwidth[i][j][k]);
4984 }
4985 /*
4986 * DCCMetaBufferSizeSupport(i, j) = True
4987 * For k = 0 To NumberOfActivePlanes - 1
4988 * If MetaRowBytes(i, j, k) > 24064 Then
4989 * DCCMetaBufferSizeSupport(i, j) = False
4990 * End If
4991 * Next k
4992 */
4993 v->DCCMetaBufferSizeSupport[i][j] = true;
4994 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4995 if (v->MetaRowBytes[i][j][k] > 24064)
4996 v->DCCMetaBufferSizeSupport[i][j] = false;
4997 }
4998 v->UrgLatency[i] = CalculateUrgentLatency(
4999 v->UrgentLatencyPixelDataOnly,
5000 v->UrgentLatencyPixelMixedWithVMData,
5001 v->UrgentLatencyVMDataOnly,
5002 v->DoUrgentLatencyAdjustment,
5003 v->UrgentLatencyAdjustmentFabricClockComponent,
5004 v->UrgentLatencyAdjustmentFabricClockReference,
5005 v->FabricClockPerState[i]);
5006
5007 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5008 CalculateUrgentBurstFactor(
5009 v->swath_width_luma_ub_this_state[k],
5010 v->swath_width_chroma_ub_this_state[k],
5011 v->SwathHeightYThisState[k],
5012 v->SwathHeightCThisState[k],
5013 v->HTotal[k] / v->PixelClock[k],
5014 v->UrgLatency[i],
5015 v->CursorBufferSize,
5016 v->CursorWidth[k][0],
5017 v->CursorBPP[k][0],
5018 v->VRatio[k],
5019 v->VRatioChroma[k],
5020 v->BytePerPixelInDETY[k],
5021 v->BytePerPixelInDETC[k],
5022 v->DETBufferSizeYThisState[k],
5023 v->DETBufferSizeCThisState[k],
5024 &v->UrgentBurstFactorCursor[k],
5025 &v->UrgentBurstFactorLuma[k],
5026 &v->UrgentBurstFactorChroma[k],
5027 &NotUrgentLatencyHiding[k]);
5028 }
5029
5030 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5031 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5032 if (NotUrgentLatencyHiding[k]) {
5033 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5034 }
5035 }
5036
5037 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5038 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5039 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5040 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5041 }
5042
5043 v->TotalVActivePixelBandwidth[i][j] = 0;
5044 v->TotalVActiveCursorBandwidth[i][j] = 0;
5045 v->TotalMetaRowBandwidth[i][j] = 0;
5046 v->TotalDPTERowBandwidth[i][j] = 0;
5047 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5048 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5049 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5050 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5051 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5052 }
5053 }
5054 }
5055
5056 //Calculate Return BW
5057 for (i = 0; i < v->soc.num_states; ++i) {
5058 for (j = 0; j <= 1; ++j) {
5059 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5060 if (v->BlendingAndTiming[k] == k) {
5061 if (v->WritebackEnable[k] == true) {
5062 v->WritebackDelayTime[k] = v->WritebackLatency
5063 + CalculateWriteBackDelay(
5064 v->WritebackPixelFormat[k],
5065 v->WritebackHRatio[k],
5066 v->WritebackVRatio[k],
5067 v->WritebackVTaps[k],
5068 v->WritebackDestinationWidth[k],
5069 v->WritebackDestinationHeight[k],
5070 v->WritebackSourceHeight[k],
5071 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5072 } else {
5073 v->WritebackDelayTime[k] = 0.0;
5074 }
5075 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5076 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5077 v->WritebackDelayTime[k] = dml_max(
5078 v->WritebackDelayTime[k],
5079 v->WritebackLatency
5080 + CalculateWriteBackDelay(
5081 v->WritebackPixelFormat[m],
5082 v->WritebackHRatio[m],
5083 v->WritebackVRatio[m],
5084 v->WritebackVTaps[m],
5085 v->WritebackDestinationWidth[m],
5086 v->WritebackDestinationHeight[m],
5087 v->WritebackSourceHeight[m],
5088 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5089 }
5090 }
5091 }
5092 }
5093 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5094 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5095 if (v->BlendingAndTiming[k] == m) {
5096 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5097 }
5098 }
5099 }
5100 v->MaxMaxVStartup[i][j] = 0;
5101 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5102 v->MaximumVStartup[i][j][k] =
5103 CalculateMaxVStartup(
5104 v->VTotal[k],
5105 v->VActive[k],
5106 v->VBlankNom[k],
5107 v->HTotal[k],
5108 v->PixelClock[k],
5109 v->ProgressiveToInterlaceUnitInOPP,
5110 v->Interlace[k],
5111 v->ip.VBlankNomDefaultUS,
5112 v->WritebackDelayTime[k]);
5113 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5114 }
5115 }
5116 }
5117
5118 ReorderingBytes = v->NumberOfChannels
5119 * dml_max3(
5120 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5121 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5122 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5123
5124 for (i = 0; i < v->soc.num_states; ++i) {
5125 for (j = 0; j <= 1; ++j) {
5126 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5127 }
5128 }
5129
5130 if (v->UseMinimumRequiredDCFCLK == true)
5131 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5132
5133 for (i = 0; i < v->soc.num_states; ++i) {
5134 for (j = 0; j <= 1; ++j) {
5135 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5136 v->ReturnBusWidth * v->DCFCLKState[i][j],
5137 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5138 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5139 double PixelDataOnlyReturnBWPerState = dml_min(
5140 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5141 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5142 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5143 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5144 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5145
5146 if (v->HostVMEnable != true) {
5147 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5148 } else {
5149 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5150 }
5151 }
5152 }
5153
5154 //Re-ordering Buffer Support Check
5155 for (i = 0; i < v->soc.num_states; ++i) {
5156 for (j = 0; j <= 1; ++j) {
5157 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5158 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5159 v->ROBSupport[i][j] = true;
5160 } else {
5161 v->ROBSupport[i][j] = false;
5162 }
5163 }
5164 }
5165
5166 //Vertical Active BW support check
5167
5168 MaxTotalVActiveRDBandwidth = 0;
5169 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5170 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5171 }
5172
5173 for (i = 0; i < v->soc.num_states; ++i) {
5174 for (j = 0; j <= 1; ++j) {
5175 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5176 dml_min(
5177 v->ReturnBusWidth * v->DCFCLKState[i][j],
5178 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5179 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5180 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5181 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5182
5183 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5184 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5185 } else {
5186 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5187 }
5188 }
5189 }
5190
5191 v->UrgentLatency = CalculateUrgentLatency(
5192 v->UrgentLatencyPixelDataOnly,
5193 v->UrgentLatencyPixelMixedWithVMData,
5194 v->UrgentLatencyVMDataOnly,
5195 v->DoUrgentLatencyAdjustment,
5196 v->UrgentLatencyAdjustmentFabricClockComponent,
5197 v->UrgentLatencyAdjustmentFabricClockReference,
5198 v->FabricClock);
5199 //Prefetch Check
5200 for (i = 0; i < v->soc.num_states; ++i) {
5201 for (j = 0; j <= 1; ++j) {
5202 double VMDataOnlyReturnBWPerState;
5203 double HostVMInefficiencyFactor = 1;
5204 int NextPrefetchModeState = MinPrefetchMode;
5205 bool UnboundedRequestEnabledThisState = false;
5206 int CompressedBufferSizeInkByteThisState = 0;
5207 double dummy;
5208
5209 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5210
5211 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5212 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5213 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5214 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5215 }
5216
5217 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5218 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5219 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5220 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5221 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5222 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5223 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5224 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5225 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5226 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5227 }
5228
5229 VMDataOnlyReturnBWPerState = dml_min(
5230 dml_min(
5231 v->ReturnBusWidth * v->DCFCLKState[i][j],
5232 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5233 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5234 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5235 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5236 if (v->GPUVMEnable && v->HostVMEnable)
5237 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5238
5239 v->ExtraLatency = CalculateExtraLatency(
5240 v->RoundTripPingLatencyCycles,
5241 ReorderingBytes,
5242 v->DCFCLKState[i][j],
5243 v->TotalNumberOfActiveDPP[i][j],
5244 v->PixelChunkSizeInKByte,
5245 v->TotalNumberOfDCCActiveDPP[i][j],
5246 v->MetaChunkSize,
5247 v->ReturnBWPerState[i][j],
5248 v->GPUVMEnable,
5249 v->HostVMEnable,
5250 v->NumberOfActivePlanes,
5251 v->NoOfDPPThisState,
5252 v->dpte_group_bytes,
5253 HostVMInefficiencyFactor,
5254 v->HostVMMinPageSize,
5255 v->HostVMMaxNonCachedPageTableLevels);
5256
5257 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5258 do {
5259 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5260 v->MaxVStartup = v->NextMaxVStartup;
5261
5262 v->TWait = CalculateTWait(
5263 v->PrefetchModePerState[i][j],
5264 v->DRAMClockChangeLatency,
5265 v->UrgLatency[i],
5266 v->SREnterPlusExitTime);
5267
5268 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5269 CalculatePrefetchSchedulePerPlane(mode_lib,
5270 HostVMInefficiencyFactor,
5271 i, j, k);
5272 }
5273
5274 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5275 CalculateUrgentBurstFactor(
5276 v->swath_width_luma_ub_this_state[k],
5277 v->swath_width_chroma_ub_this_state[k],
5278 v->SwathHeightYThisState[k],
5279 v->SwathHeightCThisState[k],
5280 v->HTotal[k] / v->PixelClock[k],
5281 v->UrgLatency[i],
5282 v->CursorBufferSize,
5283 v->CursorWidth[k][0],
5284 v->CursorBPP[k][0],
5285 v->VRatioPreY[i][j][k],
5286 v->VRatioPreC[i][j][k],
5287 v->BytePerPixelInDETY[k],
5288 v->BytePerPixelInDETC[k],
5289 v->DETBufferSizeYThisState[k],
5290 v->DETBufferSizeCThisState[k],
5291 &v->UrgentBurstFactorCursorPre[k],
5292 &v->UrgentBurstFactorLumaPre[k],
5293 &v->UrgentBurstFactorChromaPre[k],
5294 &v->NotUrgentLatencyHidingPre[k]);
5295 }
5296
5297 v->MaximumReadBandwidthWithPrefetch = 0.0;
5298 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5299 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5300 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5301
5302 v->MaximumReadBandwidthWithPrefetch =
5303 v->MaximumReadBandwidthWithPrefetch
5304 + dml_max3(
5305 v->VActivePixelBandwidth[i][j][k]
5306 + v->VActiveCursorBandwidth[i][j][k]
5307 + v->NoOfDPP[i][j][k]
5308 * (v->meta_row_bandwidth[i][j][k]
5309 + v->dpte_row_bandwidth[i][j][k]),
5310 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5311 v->NoOfDPP[i][j][k]
5312 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5313 * v->UrgentBurstFactorLumaPre[k]
5314 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5315 * v->UrgentBurstFactorChromaPre[k])
5316 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5317 }
5318
5319 v->NotEnoughUrgentLatencyHidingPre = false;
5320 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5321 if (v->NotUrgentLatencyHidingPre[k] == true) {
5322 v->NotEnoughUrgentLatencyHidingPre = true;
5323 }
5324 }
5325
5326 v->PrefetchSupported[i][j] = true;
5327 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5328 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5329 v->PrefetchSupported[i][j] = false;
5330 }
5331 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5332 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5333 || v->NoTimeForPrefetch[i][j][k] == true) {
5334 v->PrefetchSupported[i][j] = false;
5335 }
5336 }
5337
5338 v->DynamicMetadataSupported[i][j] = true;
5339 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5340 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5341 v->DynamicMetadataSupported[i][j] = false;
5342 }
5343 }
5344
5345 v->VRatioInPrefetchSupported[i][j] = true;
5346 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5347 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5348 v->VRatioInPrefetchSupported[i][j] = false;
5349 }
5350 }
5351 v->AnyLinesForVMOrRowTooLarge = false;
5352 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5353 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5354 v->AnyLinesForVMOrRowTooLarge = true;
5355 }
5356 }
5357
5358 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5359
5360 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5361 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5362 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5363 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5364 - dml_max(
5365 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5366 v->NoOfDPP[i][j][k]
5367 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5368 * v->UrgentBurstFactorLumaPre[k]
5369 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5370 * v->UrgentBurstFactorChromaPre[k])
5371 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5372 }
5373 v->TotImmediateFlipBytes = 0.0;
5374 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5375 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5376 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5377 + v->DPTEBytesPerRow[i][j][k]);
5378 }
5379
5380 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5381 CalculateFlipSchedule(
5382 mode_lib,
5383 k,
5384 HostVMInefficiencyFactor,
5385 v->ExtraLatency,
5386 v->UrgLatency[i],
5387 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5388 v->MetaRowBytes[i][j][k],
5389 v->DPTEBytesPerRow[i][j][k]);
5390 }
5391 v->total_dcn_read_bw_with_flip = 0.0;
5392 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5393 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5394 + dml_max3(
5395 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5396 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5397 + v->VActiveCursorBandwidth[i][j][k],
5398 v->NoOfDPP[i][j][k]
5399 * (v->final_flip_bw[k]
5400 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5401 * v->UrgentBurstFactorLumaPre[k]
5402 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5403 * v->UrgentBurstFactorChromaPre[k])
5404 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5405 }
5406 v->ImmediateFlipSupportedForState[i][j] = true;
5407 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5408 v->ImmediateFlipSupportedForState[i][j] = false;
5409 }
5410 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5411 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5412 v->ImmediateFlipSupportedForState[i][j] = false;
5413 }
5414 }
5415 } else {
5416 v->ImmediateFlipSupportedForState[i][j] = false;
5417 }
5418
5419 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5420 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5421 NextPrefetchModeState = NextPrefetchModeState + 1;
5422 } else {
5423 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5424 }
5425 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5426 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5427 && ((v->HostVMEnable == false &&
5428 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5429 || v->ImmediateFlipSupportedForState[i][j] == true))
5430 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5431
5432 CalculateUnboundedRequestAndCompressedBufferSize(
5433 v->DETBufferSizeInKByte[0],
5434 v->ConfigReturnBufferSizeInKByte,
5435 v->UseUnboundedRequesting,
5436 v->TotalNumberOfActiveDPP[i][j],
5437 NoChroma,
5438 v->MaxNumDPP,
5439 v->CompressedBufferSegmentSizeInkByte,
5440 v->Output,
5441 &UnboundedRequestEnabledThisState,
5442 &CompressedBufferSizeInkByteThisState);
5443
5444 CalculateWatermarksAndDRAMSpeedChangeSupport(
5445 mode_lib,
5446 v->PrefetchModePerState[i][j],
5447 v->DCFCLKState[i][j],
5448 v->ReturnBWPerState[i][j],
5449 v->UrgLatency[i],
5450 v->ExtraLatency,
5451 v->SOCCLKPerState[i],
5452 v->ProjectedDCFCLKDeepSleep[i][j],
5453 v->DETBufferSizeYThisState,
5454 v->DETBufferSizeCThisState,
5455 v->SwathHeightYThisState,
5456 v->SwathHeightCThisState,
5457 v->SwathWidthYThisState,
5458 v->SwathWidthCThisState,
5459 v->NoOfDPPThisState,
5460 v->BytePerPixelInDETY,
5461 v->BytePerPixelInDETC,
5462 UnboundedRequestEnabledThisState,
5463 CompressedBufferSizeInkByteThisState,
5464 &v->DRAMClockChangeSupport[i][j],
5465 &dummy,
5466 &dummy,
5467 &dummy,
5468 &dummy);
5469 }
5470 }
5471
5472 /*PTE Buffer Size Check*/
5473 for (i = 0; i < v->soc.num_states; i++) {
5474 for (j = 0; j < 2; j++) {
5475 v->PTEBufferSizeNotExceeded[i][j] = true;
5476 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5477 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5478 v->PTEBufferSizeNotExceeded[i][j] = false;
5479 }
5480 }
5481 }
5482 }
5483
5484 /*Cursor Support Check*/
5485 v->CursorSupport = true;
5486 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5487 if (v->CursorWidth[k][0] > 0.0) {
5488 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5489 v->CursorSupport = false;
5490 }
5491 }
5492 }
5493
5494 /*Valid Pitch Check*/
5495 v->PitchSupport = true;
5496 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5497 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5498 if (v->DCCEnable[k] == true) {
5499 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5500 } else {
5501 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5502 }
5503 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5504 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5505 && v->SourcePixelFormat[k] != dm_mono_8) {
5506 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5507 if (v->DCCEnable[k] == true) {
5508 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5509 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5510 64.0 * v->Read256BlockWidthC[k]);
5511 } else {
5512 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5513 }
5514 } else {
5515 v->AlignedCPitch[k] = v->PitchC[k];
5516 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5517 }
5518 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5519 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5520 v->PitchSupport = false;
5521 }
5522 }
5523
5524 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5525 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5526 ViewportExceedsSurface = true;
5527 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5528 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5529 && v->SourcePixelFormat[k] != dm_rgbe) {
5530 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5531 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5532 ViewportExceedsSurface = true;
5533 }
5534 }
5535 }
5536 }
5537
5538 /*Mode Support, Voltage State and SOC Configuration*/
5539 for (i = v->soc.num_states - 1; i >= 0; i--) {
5540 for (j = 0; j < 2; j++) {
5541 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5542 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5543 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5544 && v->DTBCLKRequiredMoreThanSupported[i] == false
5545 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5546 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5547 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5548 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5549 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5550 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5551 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5552 && ((v->HostVMEnable == false
5553 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5554 || v->ImmediateFlipSupportedForState[i][j] == true)
5555 && FMTBufferExceeded == false) {
5556 v->ModeSupport[i][j] = true;
5557 } else {
5558 v->ModeSupport[i][j] = false;
5559 }
5560 }
5561 }
5562 for (i = v->soc.num_states; i >= 0; i--) {
5563 for (j = 0; j < 2; j++) {
5564 enum dm_validation_status status = DML_VALIDATION_OK;
5565
5566 if (!v->ScaleRatioAndTapsSupport) {
5567 status = DML_FAIL_SCALE_RATIO_TAP;
5568 } else if (!v->SourceFormatPixelAndScanSupport) {
5569 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5570 } else if (!v->ViewportSizeSupport[i][j]) {
5571 status = DML_FAIL_VIEWPORT_SIZE;
5572 } else if (P2IWith420) {
5573 status = DML_FAIL_P2I_WITH_420;
5574 } else if (DSCOnlyIfNecessaryWithBPP) {
5575 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5576 } else if (DSC422NativeNotSupported) {
5577 status = DML_FAIL_NOT_DSC422_NATIVE;
5578 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5579 status = DML_FAIL_ODM_COMBINE4TO1;
5580 } else if (v->NotEnoughDSCUnits[i]) {
5581 status = DML_FAIL_NOT_ENOUGH_DSC;
5582 } else if (!v->ROBSupport[i][j]) {
5583 status = DML_FAIL_REORDERING_BUFFER;
5584 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5585 status = DML_FAIL_DISPCLK_DPPCLK;
5586 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5587 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5588 } else if (!EnoughWritebackUnits) {
5589 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5590 } else if (!v->WritebackLatencySupport) {
5591 status = DML_FAIL_WRITEBACK_LATENCY;
5592 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5593 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5594 } else if (!v->CursorSupport) {
5595 status = DML_FAIL_CURSOR_SUPPORT;
5596 } else if (!v->PitchSupport) {
5597 status = DML_FAIL_PITCH_SUPPORT;
5598 } else if (ViewportExceedsSurface) {
5599 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5600 } else if (!v->PrefetchSupported[i][j]) {
5601 status = DML_FAIL_PREFETCH_SUPPORT;
5602 } else if (!v->DynamicMetadataSupported[i][j]) {
5603 status = DML_FAIL_DYNAMIC_METADATA;
5604 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5605 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5606 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5607 status = DML_FAIL_V_RATIO_PREFETCH;
5608 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5609 status = DML_FAIL_PTE_BUFFER_SIZE;
5610 } else if (v->NonsupportedDSCInputBPC) {
5611 status = DML_FAIL_DSC_INPUT_BPC;
5612 } else if ((v->HostVMEnable
5613 && !v->ImmediateFlipSupportedForState[i][j])) {
5614 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5615 } else if (FMTBufferExceeded) {
5616 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5617 }
5618 mode_lib->vba.ValidationStatus[i] = status;
5619 }
5620 }
5621
5622 {
5623 unsigned int MaximumMPCCombine = 0;
5624
5625 for (i = v->soc.num_states; i >= 0; i--) {
5626 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5627 v->VoltageLevel = i;
5628 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5629 if (v->ModeSupport[i][0] == true) {
5630 MaximumMPCCombine = 0;
5631 } else {
5632 MaximumMPCCombine = 1;
5633 }
5634 }
5635 }
5636 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5637 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5638 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5639 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5640 }
5641 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5642 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5643 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5644 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5645 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5646 v->maxMpcComb = MaximumMPCCombine;
5647 }
5648 }
5649
5650 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5651 struct display_mode_lib *mode_lib,
5652 unsigned int PrefetchMode,
5653 double DCFCLK,
5654 double ReturnBW,
5655 double UrgentLatency,
5656 double ExtraLatency,
5657 double SOCCLK,
5658 double DCFCLKDeepSleep,
5659 unsigned int DETBufferSizeY[],
5660 unsigned int DETBufferSizeC[],
5661 unsigned int SwathHeightY[],
5662 unsigned int SwathHeightC[],
5663 double SwathWidthY[],
5664 double SwathWidthC[],
5665 unsigned int DPPPerPlane[],
5666 double BytePerPixelDETY[],
5667 double BytePerPixelDETC[],
5668 bool UnboundedRequestEnabled,
5669 unsigned int CompressedBufferSizeInkByte,
5670 enum clock_change_support *DRAMClockChangeSupport,
5671 double *StutterExitWatermark,
5672 double *StutterEnterPlusExitWatermark,
5673 double *Z8StutterExitWatermark,
5674 double *Z8StutterEnterPlusExitWatermark)
5675 {
5676 struct vba_vars_st *v = &mode_lib->vba;
5677 double EffectiveLBLatencyHidingY;
5678 double EffectiveLBLatencyHidingC;
5679 double LinesInDETY[DC__NUM_DPP__MAX];
5680 double LinesInDETC;
5681 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5682 unsigned int LinesInDETCRoundedDownToSwath;
5683 double FullDETBufferingTimeY;
5684 double FullDETBufferingTimeC;
5685 double ActiveDRAMClockChangeLatencyMarginY;
5686 double ActiveDRAMClockChangeLatencyMarginC;
5687 double WritebackDRAMClockChangeLatencyMargin;
5688 double PlaneWithMinActiveDRAMClockChangeMargin;
5689 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5690 double WritebackDRAMClockChangeLatencyHiding;
5691 double TotalPixelBW = 0.0;
5692 int k, j;
5693
5694 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5695
5696 #ifdef __DML_VBA_DEBUG__
5697 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5698 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5699 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5700 #endif
5701
5702 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5703
5704 #ifdef __DML_VBA_DEBUG__
5705 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5706 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5707 #endif
5708
5709 v->TotalActiveWriteback = 0;
5710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5711 if (v->WritebackEnable[k] == true) {
5712 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5713 }
5714 }
5715
5716 if (v->TotalActiveWriteback <= 1) {
5717 v->WritebackUrgentWatermark = v->WritebackLatency;
5718 } else {
5719 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5720 }
5721
5722 if (v->TotalActiveWriteback <= 1) {
5723 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5724 } else {
5725 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5726 }
5727
5728 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5729 TotalPixelBW = TotalPixelBW
5730 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5731 / (v->HTotal[k] / v->PixelClock[k]);
5732 }
5733
5734 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5735 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5736
5737 v->LBLatencyHidingSourceLinesY = dml_min(
5738 (double) v->MaxLineBufferLines,
5739 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5740
5741 v->LBLatencyHidingSourceLinesC = dml_min(
5742 (double) v->MaxLineBufferLines,
5743 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5744
5745 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5746
5747 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5748
5749 if (UnboundedRequestEnabled) {
5750 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5751 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5752 }
5753
5754 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5755 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5756 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5757 if (BytePerPixelDETC[k] > 0) {
5758 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5759 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5760 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5761 } else {
5762 LinesInDETC = 0;
5763 FullDETBufferingTimeC = 999999;
5764 }
5765
5766 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5767 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5768
5769 if (v->NumberOfActivePlanes > 1) {
5770 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5771 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5772 }
5773
5774 if (BytePerPixelDETC[k] > 0) {
5775 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5776 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5777
5778 if (v->NumberOfActivePlanes > 1) {
5779 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5780 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5781 }
5782 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5783 } else {
5784 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5785 }
5786
5787 if (v->WritebackEnable[k] == true) {
5788 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5789 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5790 if (v->WritebackPixelFormat[k] == dm_444_64) {
5791 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5792 }
5793 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5794 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5795 }
5796 }
5797
5798 v->MinActiveDRAMClockChangeMargin = 999999;
5799 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5800 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5801 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5802 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5803 if (v->BlendingAndTiming[k] == k) {
5804 PlaneWithMinActiveDRAMClockChangeMargin = k;
5805 } else {
5806 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5807 if (v->BlendingAndTiming[k] == j) {
5808 PlaneWithMinActiveDRAMClockChangeMargin = j;
5809 }
5810 }
5811 }
5812 }
5813 }
5814
5815 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5816
5817 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5818 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5819 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5820 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5821 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5822 }
5823 }
5824
5825 v->TotalNumberOfActiveOTG = 0;
5826
5827 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5828 if (v->BlendingAndTiming[k] == k) {
5829 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5830 }
5831 }
5832
5833 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5834 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5835 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5836 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5837 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5838 } else {
5839 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5840 }
5841
5842 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5843 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5844 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5845 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5846
5847 #ifdef __DML_VBA_DEBUG__
5848 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5849 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5850 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5851 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5852 #endif
5853 }
5854
5855 static void CalculateDCFCLKDeepSleep(
5856 struct display_mode_lib *mode_lib,
5857 unsigned int NumberOfActivePlanes,
5858 int BytePerPixelY[],
5859 int BytePerPixelC[],
5860 double VRatio[],
5861 double VRatioChroma[],
5862 double SwathWidthY[],
5863 double SwathWidthC[],
5864 unsigned int DPPPerPlane[],
5865 double HRatio[],
5866 double HRatioChroma[],
5867 double PixelClock[],
5868 double PSCL_THROUGHPUT[],
5869 double PSCL_THROUGHPUT_CHROMA[],
5870 double DPPCLK[],
5871 double ReadBandwidthLuma[],
5872 double ReadBandwidthChroma[],
5873 int ReturnBusWidth,
5874 double *DCFCLKDeepSleep)
5875 {
5876 struct vba_vars_st *v = &mode_lib->vba;
5877 double DisplayPipeLineDeliveryTimeLuma;
5878 double DisplayPipeLineDeliveryTimeChroma;
5879 double ReadBandwidth = 0.0;
5880 int k;
5881
5882 for (k = 0; k < NumberOfActivePlanes; ++k) {
5883
5884 if (VRatio[k] <= 1) {
5885 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5886 } else {
5887 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5888 }
5889 if (BytePerPixelC[k] == 0) {
5890 DisplayPipeLineDeliveryTimeChroma = 0;
5891 } else {
5892 if (VRatioChroma[k] <= 1) {
5893 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5894 } else {
5895 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5896 }
5897 }
5898
5899 if (BytePerPixelC[k] > 0) {
5900 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5901 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5902 } else {
5903 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5904 }
5905 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5906
5907 }
5908
5909 for (k = 0; k < NumberOfActivePlanes; ++k) {
5910 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5911 }
5912
5913 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5914
5915 for (k = 0; k < NumberOfActivePlanes; ++k) {
5916 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5917 }
5918 }
5919
5920 static void CalculateUrgentBurstFactor(
5921 int swath_width_luma_ub,
5922 int swath_width_chroma_ub,
5923 unsigned int SwathHeightY,
5924 unsigned int SwathHeightC,
5925 double LineTime,
5926 double UrgentLatency,
5927 double CursorBufferSize,
5928 unsigned int CursorWidth,
5929 unsigned int CursorBPP,
5930 double VRatio,
5931 double VRatioC,
5932 double BytePerPixelInDETY,
5933 double BytePerPixelInDETC,
5934 double DETBufferSizeY,
5935 double DETBufferSizeC,
5936 double *UrgentBurstFactorCursor,
5937 double *UrgentBurstFactorLuma,
5938 double *UrgentBurstFactorChroma,
5939 bool *NotEnoughUrgentLatencyHiding)
5940 {
5941 double LinesInDETLuma;
5942 double LinesInDETChroma;
5943 unsigned int LinesInCursorBuffer;
5944 double CursorBufferSizeInTime;
5945 double DETBufferSizeInTimeLuma;
5946 double DETBufferSizeInTimeChroma;
5947
5948 *NotEnoughUrgentLatencyHiding = 0;
5949
5950 if (CursorWidth > 0) {
5951 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5952 if (VRatio > 0) {
5953 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5954 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5955 *NotEnoughUrgentLatencyHiding = 1;
5956 *UrgentBurstFactorCursor = 0;
5957 } else {
5958 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5959 }
5960 } else {
5961 *UrgentBurstFactorCursor = 1;
5962 }
5963 }
5964
5965 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5966 if (VRatio > 0) {
5967 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5968 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5969 *NotEnoughUrgentLatencyHiding = 1;
5970 *UrgentBurstFactorLuma = 0;
5971 } else {
5972 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5973 }
5974 } else {
5975 *UrgentBurstFactorLuma = 1;
5976 }
5977
5978 if (BytePerPixelInDETC > 0) {
5979 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5980 if (VRatio > 0) {
5981 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5982 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5983 *NotEnoughUrgentLatencyHiding = 1;
5984 *UrgentBurstFactorChroma = 0;
5985 } else {
5986 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5987 }
5988 } else {
5989 *UrgentBurstFactorChroma = 1;
5990 }
5991 }
5992 }
5993
5994 static void CalculatePixelDeliveryTimes(
5995 unsigned int NumberOfActivePlanes,
5996 double VRatio[],
5997 double VRatioChroma[],
5998 double VRatioPrefetchY[],
5999 double VRatioPrefetchC[],
6000 unsigned int swath_width_luma_ub[],
6001 unsigned int swath_width_chroma_ub[],
6002 unsigned int DPPPerPlane[],
6003 double HRatio[],
6004 double HRatioChroma[],
6005 double PixelClock[],
6006 double PSCL_THROUGHPUT[],
6007 double PSCL_THROUGHPUT_CHROMA[],
6008 double DPPCLK[],
6009 int BytePerPixelC[],
6010 enum scan_direction_class SourceScan[],
6011 unsigned int NumberOfCursors[],
6012 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6013 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6014 unsigned int BlockWidth256BytesY[],
6015 unsigned int BlockHeight256BytesY[],
6016 unsigned int BlockWidth256BytesC[],
6017 unsigned int BlockHeight256BytesC[],
6018 double DisplayPipeLineDeliveryTimeLuma[],
6019 double DisplayPipeLineDeliveryTimeChroma[],
6020 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6021 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6022 double DisplayPipeRequestDeliveryTimeLuma[],
6023 double DisplayPipeRequestDeliveryTimeChroma[],
6024 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6025 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6026 double CursorRequestDeliveryTime[],
6027 double CursorRequestDeliveryTimePrefetch[])
6028 {
6029 double req_per_swath_ub;
6030 int k;
6031
6032 for (k = 0; k < NumberOfActivePlanes; ++k) {
6033 if (VRatio[k] <= 1) {
6034 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6035 } else {
6036 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6037 }
6038
6039 if (BytePerPixelC[k] == 0) {
6040 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6041 } else {
6042 if (VRatioChroma[k] <= 1) {
6043 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6044 } else {
6045 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6046 }
6047 }
6048
6049 if (VRatioPrefetchY[k] <= 1) {
6050 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6051 } else {
6052 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6053 }
6054
6055 if (BytePerPixelC[k] == 0) {
6056 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6057 } else {
6058 if (VRatioPrefetchC[k] <= 1) {
6059 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6060 } else {
6061 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6062 }
6063 }
6064 }
6065
6066 for (k = 0; k < NumberOfActivePlanes; ++k) {
6067 if (SourceScan[k] != dm_vert) {
6068 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6069 } else {
6070 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6071 }
6072 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6073 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6074 if (BytePerPixelC[k] == 0) {
6075 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6076 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6077 } else {
6078 if (SourceScan[k] != dm_vert) {
6079 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6080 } else {
6081 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6082 }
6083 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6084 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6085 }
6086 #ifdef __DML_VBA_DEBUG__
6087 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6088 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6089 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6090 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6091 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6092 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6093 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6094 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6095 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6096 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6097 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6098 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6099 #endif
6100 }
6101
6102 for (k = 0; k < NumberOfActivePlanes; ++k) {
6103 int cursor_req_per_width;
6104
6105 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6106 if (NumberOfCursors[k] > 0) {
6107 if (VRatio[k] <= 1) {
6108 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6109 } else {
6110 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6111 }
6112 if (VRatioPrefetchY[k] <= 1) {
6113 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6114 } else {
6115 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6116 }
6117 } else {
6118 CursorRequestDeliveryTime[k] = 0;
6119 CursorRequestDeliveryTimePrefetch[k] = 0;
6120 }
6121 #ifdef __DML_VBA_DEBUG__
6122 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6123 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6124 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6125 #endif
6126 }
6127 }
6128
6129 static void CalculateMetaAndPTETimes(
6130 int NumberOfActivePlanes,
6131 bool GPUVMEnable,
6132 int MetaChunkSize,
6133 int MinMetaChunkSizeBytes,
6134 int HTotal[],
6135 double VRatio[],
6136 double VRatioChroma[],
6137 double DestinationLinesToRequestRowInVBlank[],
6138 double DestinationLinesToRequestRowInImmediateFlip[],
6139 bool DCCEnable[],
6140 double PixelClock[],
6141 int BytePerPixelY[],
6142 int BytePerPixelC[],
6143 enum scan_direction_class SourceScan[],
6144 int dpte_row_height[],
6145 int dpte_row_height_chroma[],
6146 int meta_row_width[],
6147 int meta_row_width_chroma[],
6148 int meta_row_height[],
6149 int meta_row_height_chroma[],
6150 int meta_req_width[],
6151 int meta_req_width_chroma[],
6152 int meta_req_height[],
6153 int meta_req_height_chroma[],
6154 int dpte_group_bytes[],
6155 int PTERequestSizeY[],
6156 int PTERequestSizeC[],
6157 int PixelPTEReqWidthY[],
6158 int PixelPTEReqHeightY[],
6159 int PixelPTEReqWidthC[],
6160 int PixelPTEReqHeightC[],
6161 int dpte_row_width_luma_ub[],
6162 int dpte_row_width_chroma_ub[],
6163 double DST_Y_PER_PTE_ROW_NOM_L[],
6164 double DST_Y_PER_PTE_ROW_NOM_C[],
6165 double DST_Y_PER_META_ROW_NOM_L[],
6166 double DST_Y_PER_META_ROW_NOM_C[],
6167 double TimePerMetaChunkNominal[],
6168 double TimePerChromaMetaChunkNominal[],
6169 double TimePerMetaChunkVBlank[],
6170 double TimePerChromaMetaChunkVBlank[],
6171 double TimePerMetaChunkFlip[],
6172 double TimePerChromaMetaChunkFlip[],
6173 double time_per_pte_group_nom_luma[],
6174 double time_per_pte_group_vblank_luma[],
6175 double time_per_pte_group_flip_luma[],
6176 double time_per_pte_group_nom_chroma[],
6177 double time_per_pte_group_vblank_chroma[],
6178 double time_per_pte_group_flip_chroma[])
6179 {
6180 unsigned int meta_chunk_width;
6181 unsigned int min_meta_chunk_width;
6182 unsigned int meta_chunk_per_row_int;
6183 unsigned int meta_row_remainder;
6184 unsigned int meta_chunk_threshold;
6185 unsigned int meta_chunks_per_row_ub;
6186 unsigned int meta_chunk_width_chroma;
6187 unsigned int min_meta_chunk_width_chroma;
6188 unsigned int meta_chunk_per_row_int_chroma;
6189 unsigned int meta_row_remainder_chroma;
6190 unsigned int meta_chunk_threshold_chroma;
6191 unsigned int meta_chunks_per_row_ub_chroma;
6192 unsigned int dpte_group_width_luma;
6193 unsigned int dpte_groups_per_row_luma_ub;
6194 unsigned int dpte_group_width_chroma;
6195 unsigned int dpte_groups_per_row_chroma_ub;
6196 int k;
6197
6198 for (k = 0; k < NumberOfActivePlanes; ++k) {
6199 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6200 if (BytePerPixelC[k] == 0) {
6201 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6202 } else {
6203 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6204 }
6205 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6206 if (BytePerPixelC[k] == 0) {
6207 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6208 } else {
6209 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6210 }
6211 }
6212
6213 for (k = 0; k < NumberOfActivePlanes; ++k) {
6214 if (DCCEnable[k] == true) {
6215 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6216 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6217 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6218 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6219 if (SourceScan[k] != dm_vert) {
6220 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6221 } else {
6222 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6223 }
6224 if (meta_row_remainder <= meta_chunk_threshold) {
6225 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6226 } else {
6227 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6228 }
6229 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6230 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6231 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6232 if (BytePerPixelC[k] == 0) {
6233 TimePerChromaMetaChunkNominal[k] = 0;
6234 TimePerChromaMetaChunkVBlank[k] = 0;
6235 TimePerChromaMetaChunkFlip[k] = 0;
6236 } else {
6237 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6238 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6239 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6240 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6241 if (SourceScan[k] != dm_vert) {
6242 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6243 } else {
6244 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6245 }
6246 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6247 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6248 } else {
6249 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6250 }
6251 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6252 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6253 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6254 }
6255 } else {
6256 TimePerMetaChunkNominal[k] = 0;
6257 TimePerMetaChunkVBlank[k] = 0;
6258 TimePerMetaChunkFlip[k] = 0;
6259 TimePerChromaMetaChunkNominal[k] = 0;
6260 TimePerChromaMetaChunkVBlank[k] = 0;
6261 TimePerChromaMetaChunkFlip[k] = 0;
6262 }
6263 }
6264
6265 for (k = 0; k < NumberOfActivePlanes; ++k) {
6266 if (GPUVMEnable == true) {
6267 if (SourceScan[k] != dm_vert) {
6268 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6269 } else {
6270 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6271 }
6272 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6273 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6274 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6275 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6276 if (BytePerPixelC[k] == 0) {
6277 time_per_pte_group_nom_chroma[k] = 0;
6278 time_per_pte_group_vblank_chroma[k] = 0;
6279 time_per_pte_group_flip_chroma[k] = 0;
6280 } else {
6281 if (SourceScan[k] != dm_vert) {
6282 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6283 } else {
6284 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6285 }
6286 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6287 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6288 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6289 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6290 }
6291 } else {
6292 time_per_pte_group_nom_luma[k] = 0;
6293 time_per_pte_group_vblank_luma[k] = 0;
6294 time_per_pte_group_flip_luma[k] = 0;
6295 time_per_pte_group_nom_chroma[k] = 0;
6296 time_per_pte_group_vblank_chroma[k] = 0;
6297 time_per_pte_group_flip_chroma[k] = 0;
6298 }
6299 }
6300 }
6301
6302 static void CalculateVMGroupAndRequestTimes(
6303 unsigned int NumberOfActivePlanes,
6304 bool GPUVMEnable,
6305 unsigned int GPUVMMaxPageTableLevels,
6306 unsigned int HTotal[],
6307 int BytePerPixelC[],
6308 double DestinationLinesToRequestVMInVBlank[],
6309 double DestinationLinesToRequestVMInImmediateFlip[],
6310 bool DCCEnable[],
6311 double PixelClock[],
6312 int dpte_row_width_luma_ub[],
6313 int dpte_row_width_chroma_ub[],
6314 int vm_group_bytes[],
6315 unsigned int dpde0_bytes_per_frame_ub_l[],
6316 unsigned int dpde0_bytes_per_frame_ub_c[],
6317 int meta_pte_bytes_per_frame_ub_l[],
6318 int meta_pte_bytes_per_frame_ub_c[],
6319 double TimePerVMGroupVBlank[],
6320 double TimePerVMGroupFlip[],
6321 double TimePerVMRequestVBlank[],
6322 double TimePerVMRequestFlip[])
6323 {
6324 int num_group_per_lower_vm_stage;
6325 int num_req_per_lower_vm_stage;
6326 int k;
6327
6328 for (k = 0; k < NumberOfActivePlanes; ++k) {
6329 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6330 if (DCCEnable[k] == false) {
6331 if (BytePerPixelC[k] > 0) {
6332 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6333 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6334 } else {
6335 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6336 }
6337 } else {
6338 if (GPUVMMaxPageTableLevels == 1) {
6339 if (BytePerPixelC[k] > 0) {
6340 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6341 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6342 } else {
6343 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6344 }
6345 } else {
6346 if (BytePerPixelC[k] > 0) {
6347 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6348 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6349 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6350 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6351 } else {
6352 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6353 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6354 }
6355 }
6356 }
6357
6358 if (DCCEnable[k] == false) {
6359 if (BytePerPixelC[k] > 0) {
6360 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6361 } else {
6362 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6363 }
6364 } else {
6365 if (GPUVMMaxPageTableLevels == 1) {
6366 if (BytePerPixelC[k] > 0) {
6367 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6368 } else {
6369 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6370 }
6371 } else {
6372 if (BytePerPixelC[k] > 0) {
6373 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6374 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6375 } else {
6376 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6377 }
6378 }
6379 }
6380
6381 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6382 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6383 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6384 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6385
6386 if (GPUVMMaxPageTableLevels > 2) {
6387 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6388 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6389 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6390 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6391 }
6392
6393 } else {
6394 TimePerVMGroupVBlank[k] = 0;
6395 TimePerVMGroupFlip[k] = 0;
6396 TimePerVMRequestVBlank[k] = 0;
6397 TimePerVMRequestFlip[k] = 0;
6398 }
6399 }
6400 }
6401
6402 static void CalculateStutterEfficiency(
6403 struct display_mode_lib *mode_lib,
6404 int CompressedBufferSizeInkByte,
6405 bool UnboundedRequestEnabled,
6406 int ConfigReturnBufferSizeInKByte,
6407 int MetaFIFOSizeInKEntries,
6408 int ZeroSizeBufferEntries,
6409 int NumberOfActivePlanes,
6410 int ROBBufferSizeInKByte,
6411 double TotalDataReadBandwidth,
6412 double DCFCLK,
6413 double ReturnBW,
6414 double COMPBUF_RESERVED_SPACE_64B,
6415 double COMPBUF_RESERVED_SPACE_ZS,
6416 double SRExitTime,
6417 double SRExitZ8Time,
6418 bool SynchronizedVBlank,
6419 double Z8StutterEnterPlusExitWatermark,
6420 double StutterEnterPlusExitWatermark,
6421 bool ProgressiveToInterlaceUnitInOPP,
6422 bool Interlace[],
6423 double MinTTUVBlank[],
6424 int DPPPerPlane[],
6425 unsigned int DETBufferSizeY[],
6426 int BytePerPixelY[],
6427 double BytePerPixelDETY[],
6428 double SwathWidthY[],
6429 int SwathHeightY[],
6430 int SwathHeightC[],
6431 double NetDCCRateLuma[],
6432 double NetDCCRateChroma[],
6433 double DCCFractionOfZeroSizeRequestsLuma[],
6434 double DCCFractionOfZeroSizeRequestsChroma[],
6435 int HTotal[],
6436 int VTotal[],
6437 double PixelClock[],
6438 double VRatio[],
6439 enum scan_direction_class SourceScan[],
6440 int BlockHeight256BytesY[],
6441 int BlockWidth256BytesY[],
6442 int BlockHeight256BytesC[],
6443 int BlockWidth256BytesC[],
6444 int DCCYMaxUncompressedBlock[],
6445 int DCCCMaxUncompressedBlock[],
6446 int VActive[],
6447 bool DCCEnable[],
6448 bool WritebackEnable[],
6449 double ReadBandwidthPlaneLuma[],
6450 double ReadBandwidthPlaneChroma[],
6451 double meta_row_bw[],
6452 double dpte_row_bw[],
6453 double *StutterEfficiencyNotIncludingVBlank,
6454 double *StutterEfficiency,
6455 int *NumberOfStutterBurstsPerFrame,
6456 double *Z8StutterEfficiencyNotIncludingVBlank,
6457 double *Z8StutterEfficiency,
6458 int *Z8NumberOfStutterBurstsPerFrame,
6459 double *StutterPeriod)
6460 {
6461 struct vba_vars_st *v = &mode_lib->vba;
6462
6463 double DETBufferingTimeY;
6464 double SwathWidthYCriticalPlane = 0;
6465 double VActiveTimeCriticalPlane = 0;
6466 double FrameTimeCriticalPlane = 0;
6467 int BytePerPixelYCriticalPlane = 0;
6468 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6469 double MinTTUVBlankCriticalPlane = 0;
6470 double TotalCompressedReadBandwidth;
6471 double TotalRowReadBandwidth;
6472 double AverageDCCCompressionRate;
6473 double EffectiveCompressedBufferSize;
6474 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6475 double StutterBurstTime;
6476 int TotalActiveWriteback;
6477 double LinesInDETY;
6478 double LinesInDETYRoundedDownToSwath;
6479 double MaximumEffectiveCompressionLuma;
6480 double MaximumEffectiveCompressionChroma;
6481 double TotalZeroSizeRequestReadBandwidth;
6482 double TotalZeroSizeCompressedReadBandwidth;
6483 double AverageDCCZeroSizeFraction;
6484 double AverageZeroSizeCompressionRate;
6485 int TotalNumberOfActiveOTG = 0;
6486 double LastStutterPeriod = 0.0;
6487 double LastZ8StutterPeriod = 0.0;
6488 int k;
6489
6490 TotalZeroSizeRequestReadBandwidth = 0;
6491 TotalZeroSizeCompressedReadBandwidth = 0;
6492 TotalRowReadBandwidth = 0;
6493 TotalCompressedReadBandwidth = 0;
6494
6495 for (k = 0; k < NumberOfActivePlanes; ++k) {
6496 if (DCCEnable[k] == true) {
6497 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6498 || DCCYMaxUncompressedBlock[k] < 256) {
6499 MaximumEffectiveCompressionLuma = 2;
6500 } else {
6501 MaximumEffectiveCompressionLuma = 4;
6502 }
6503 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6504 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6505 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6506 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6507 if (ReadBandwidthPlaneChroma[k] > 0) {
6508 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6509 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6510 MaximumEffectiveCompressionChroma = 2;
6511 } else {
6512 MaximumEffectiveCompressionChroma = 4;
6513 }
6514 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6515 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6516 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6517 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6518 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6519 }
6520 } else {
6521 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6522 }
6523 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6524 }
6525
6526 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6527 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6528
6529 #ifdef __DML_VBA_DEBUG__
6530 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6531 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6532 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6533 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6534 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6535 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6536 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6537 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6538 #endif
6539
6540 if (AverageDCCZeroSizeFraction == 1) {
6541 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6542 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6543 } else if (AverageDCCZeroSizeFraction > 0) {
6544 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6545 EffectiveCompressedBufferSize = dml_min(
6546 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6547 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6548 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6549 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6550 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6551 dml_print(
6552 "DML::%s: min 2 = %f\n",
6553 __func__,
6554 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6555 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6556 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6557 } else {
6558 EffectiveCompressedBufferSize = dml_min(
6559 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6560 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6561 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6562 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6563 }
6564
6565 #ifdef __DML_VBA_DEBUG__
6566 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6567 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6568 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6569 #endif
6570
6571 *StutterPeriod = 0;
6572 for (k = 0; k < NumberOfActivePlanes; ++k) {
6573 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6574 / BytePerPixelDETY[k] / SwathWidthY[k];
6575 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6576 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6577 #ifdef __DML_VBA_DEBUG__
6578 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6579 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6580 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6581 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6582 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6583 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6584 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6585 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6586 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6587 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6588 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6589 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6590 #endif
6591
6592 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6593 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6594
6595 *StutterPeriod = DETBufferingTimeY;
6596 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6597 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6598 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6599 SwathWidthYCriticalPlane = SwathWidthY[k];
6600 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6601 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6602
6603 #ifdef __DML_VBA_DEBUG__
6604 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6605 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6606 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6607 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6608 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6609 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6610 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6611 #endif
6612 }
6613 }
6614
6615 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6616 #ifdef __DML_VBA_DEBUG__
6617 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6618 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6619 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6620 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6621 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6622 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6623 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6624 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6625 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6626 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6627 #endif
6628
6629 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6630 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6631 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6632 #ifdef __DML_VBA_DEBUG__
6633 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6634 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6635 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6636 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6637 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6638 #endif
6639 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6640
6641 dml_print(
6642 "DML::%s: Time to finish residue swath=%f\n",
6643 __func__,
6644 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6645
6646 TotalActiveWriteback = 0;
6647 for (k = 0; k < NumberOfActivePlanes; ++k) {
6648 if (WritebackEnable[k]) {
6649 TotalActiveWriteback = TotalActiveWriteback + 1;
6650 }
6651 }
6652
6653 if (TotalActiveWriteback == 0) {
6654 #ifdef __DML_VBA_DEBUG__
6655 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6656 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6657 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6658 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6659 #endif
6660 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6661 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6662 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6663 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6664 } else {
6665 *StutterEfficiencyNotIncludingVBlank = 0.;
6666 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6667 *NumberOfStutterBurstsPerFrame = 0;
6668 *Z8NumberOfStutterBurstsPerFrame = 0;
6669 }
6670 #ifdef __DML_VBA_DEBUG__
6671 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6672 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6673 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6674 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6675 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6676 #endif
6677
6678 for (k = 0; k < NumberOfActivePlanes; ++k) {
6679 if (v->BlendingAndTiming[k] == k) {
6680 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6681 }
6682 }
6683
6684 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6685 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6686
6687 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6688 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6689 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6690 } else {
6691 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6692 }
6693 } else {
6694 *StutterEfficiency = 0;
6695 }
6696
6697 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6698 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6699 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6700 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6701 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6702 } else {
6703 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6704 }
6705 } else {
6706 *Z8StutterEfficiency = 0.;
6707 }
6708
6709 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6710 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6711 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6712 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6713 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6714 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6715 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6716 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6717 }
6718
6719 static void CalculateSwathAndDETConfiguration(
6720 bool ForceSingleDPP,
6721 int NumberOfActivePlanes,
6722 unsigned int DETBufferSizeInKByte,
6723 double MaximumSwathWidthLuma[],
6724 double MaximumSwathWidthChroma[],
6725 enum scan_direction_class SourceScan[],
6726 enum source_format_class SourcePixelFormat[],
6727 enum dm_swizzle_mode SurfaceTiling[],
6728 int ViewportWidth[],
6729 int ViewportHeight[],
6730 int SurfaceWidthY[],
6731 int SurfaceWidthC[],
6732 int SurfaceHeightY[],
6733 int SurfaceHeightC[],
6734 int Read256BytesBlockHeightY[],
6735 int Read256BytesBlockHeightC[],
6736 int Read256BytesBlockWidthY[],
6737 int Read256BytesBlockWidthC[],
6738 enum odm_combine_mode ODMCombineEnabled[],
6739 int BlendingAndTiming[],
6740 int BytePerPixY[],
6741 int BytePerPixC[],
6742 double BytePerPixDETY[],
6743 double BytePerPixDETC[],
6744 int HActive[],
6745 double HRatio[],
6746 double HRatioChroma[],
6747 int DPPPerPlane[],
6748 int swath_width_luma_ub[],
6749 int swath_width_chroma_ub[],
6750 double SwathWidth[],
6751 double SwathWidthChroma[],
6752 int SwathHeightY[],
6753 int SwathHeightC[],
6754 unsigned int DETBufferSizeY[],
6755 unsigned int DETBufferSizeC[],
6756 bool ViewportSizeSupportPerPlane[],
6757 bool *ViewportSizeSupport)
6758 {
6759 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6760 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6761 int MinimumSwathHeightY;
6762 int MinimumSwathHeightC;
6763 int RoundedUpMaxSwathSizeBytesY;
6764 int RoundedUpMaxSwathSizeBytesC;
6765 int RoundedUpMinSwathSizeBytesY;
6766 int RoundedUpMinSwathSizeBytesC;
6767 int RoundedUpSwathSizeBytesY;
6768 int RoundedUpSwathSizeBytesC;
6769 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6770 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6771 int k;
6772
6773 CalculateSwathWidth(
6774 ForceSingleDPP,
6775 NumberOfActivePlanes,
6776 SourcePixelFormat,
6777 SourceScan,
6778 ViewportWidth,
6779 ViewportHeight,
6780 SurfaceWidthY,
6781 SurfaceWidthC,
6782 SurfaceHeightY,
6783 SurfaceHeightC,
6784 ODMCombineEnabled,
6785 BytePerPixY,
6786 BytePerPixC,
6787 Read256BytesBlockHeightY,
6788 Read256BytesBlockHeightC,
6789 Read256BytesBlockWidthY,
6790 Read256BytesBlockWidthC,
6791 BlendingAndTiming,
6792 HActive,
6793 HRatio,
6794 DPPPerPlane,
6795 SwathWidthSingleDPP,
6796 SwathWidthSingleDPPChroma,
6797 SwathWidth,
6798 SwathWidthChroma,
6799 MaximumSwathHeightY,
6800 MaximumSwathHeightC,
6801 swath_width_luma_ub,
6802 swath_width_chroma_ub);
6803
6804 *ViewportSizeSupport = true;
6805 for (k = 0; k < NumberOfActivePlanes; ++k) {
6806 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6807 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6808 if (SurfaceTiling[k] == dm_sw_linear
6809 || (SourcePixelFormat[k] == dm_444_64
6810 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6811 && SourceScan[k] != dm_vert)) {
6812 MinimumSwathHeightY = MaximumSwathHeightY[k];
6813 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6814 MinimumSwathHeightY = MaximumSwathHeightY[k];
6815 } else {
6816 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6817 }
6818 MinimumSwathHeightC = MaximumSwathHeightC[k];
6819 } else {
6820 if (SurfaceTiling[k] == dm_sw_linear) {
6821 MinimumSwathHeightY = MaximumSwathHeightY[k];
6822 MinimumSwathHeightC = MaximumSwathHeightC[k];
6823 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6824 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6825 MinimumSwathHeightC = MaximumSwathHeightC[k];
6826 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6827 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6828 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6829 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6830 MinimumSwathHeightY = MaximumSwathHeightY[k];
6831 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6832 } else {
6833 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6834 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6835 }
6836 }
6837
6838 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6839 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6840 if (SourcePixelFormat[k] == dm_420_10) {
6841 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6842 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6843 }
6844 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6845 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6846 if (SourcePixelFormat[k] == dm_420_10) {
6847 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6848 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6849 }
6850
6851 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6852 SwathHeightY[k] = MaximumSwathHeightY[k];
6853 SwathHeightC[k] = MaximumSwathHeightC[k];
6854 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6855 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6856 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6857 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6858 SwathHeightY[k] = MinimumSwathHeightY;
6859 SwathHeightC[k] = MaximumSwathHeightC[k];
6860 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6861 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6862 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6863 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6864 SwathHeightY[k] = MaximumSwathHeightY[k];
6865 SwathHeightC[k] = MinimumSwathHeightC;
6866 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6867 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6868 } else {
6869 SwathHeightY[k] = MinimumSwathHeightY;
6870 SwathHeightC[k] = MinimumSwathHeightC;
6871 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6872 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6873 }
6874 {
6875 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6876
6877 if (SwathHeightC[k] == 0) {
6878 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6879 DETBufferSizeC[k] = 0;
6880 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6881 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6882 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6883 } else {
6884 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6885 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6886 }
6887
6888 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6889 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6890 *ViewportSizeSupport = false;
6891 ViewportSizeSupportPerPlane[k] = false;
6892 } else {
6893 ViewportSizeSupportPerPlane[k] = true;
6894 }
6895 }
6896 }
6897 }
6898
6899 static void CalculateSwathWidth(
6900 bool ForceSingleDPP,
6901 int NumberOfActivePlanes,
6902 enum source_format_class SourcePixelFormat[],
6903 enum scan_direction_class SourceScan[],
6904 int ViewportWidth[],
6905 int ViewportHeight[],
6906 int SurfaceWidthY[],
6907 int SurfaceWidthC[],
6908 int SurfaceHeightY[],
6909 int SurfaceHeightC[],
6910 enum odm_combine_mode ODMCombineEnabled[],
6911 int BytePerPixY[],
6912 int BytePerPixC[],
6913 int Read256BytesBlockHeightY[],
6914 int Read256BytesBlockHeightC[],
6915 int Read256BytesBlockWidthY[],
6916 int Read256BytesBlockWidthC[],
6917 int BlendingAndTiming[],
6918 int HActive[],
6919 double HRatio[],
6920 int DPPPerPlane[],
6921 double SwathWidthSingleDPPY[],
6922 double SwathWidthSingleDPPC[],
6923 double SwathWidthY[],
6924 double SwathWidthC[],
6925 int MaximumSwathHeightY[],
6926 int MaximumSwathHeightC[],
6927 int swath_width_luma_ub[],
6928 int swath_width_chroma_ub[])
6929 {
6930 enum odm_combine_mode MainPlaneODMCombine;
6931 int j, k;
6932
6933 #ifdef __DML_VBA_DEBUG__
6934 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6935 #endif
6936
6937 for (k = 0; k < NumberOfActivePlanes; ++k) {
6938 if (SourceScan[k] != dm_vert) {
6939 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6940 } else {
6941 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6942 }
6943
6944 #ifdef __DML_VBA_DEBUG__
6945 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6946 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6947 #endif
6948
6949 MainPlaneODMCombine = ODMCombineEnabled[k];
6950 for (j = 0; j < NumberOfActivePlanes; ++j) {
6951 if (BlendingAndTiming[k] == j) {
6952 MainPlaneODMCombine = ODMCombineEnabled[j];
6953 }
6954 }
6955
6956 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6957 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6958 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6959 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6960 else if (DPPPerPlane[k] == 2)
6961 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6962 else
6963 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6964
6965 #ifdef __DML_VBA_DEBUG__
6966 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6967 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6968 #endif
6969
6970 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6971 SwathWidthC[k] = SwathWidthY[k] / 2;
6972 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6973 } else {
6974 SwathWidthC[k] = SwathWidthY[k];
6975 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6976 }
6977
6978 if (ForceSingleDPP == true) {
6979 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6980 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6981 }
6982 {
6983 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6984 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6985
6986 #ifdef __DML_VBA_DEBUG__
6987 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6988 #endif
6989
6990 if (SourceScan[k] != dm_vert) {
6991 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6992 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6993 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6994 if (BytePerPixC[k] > 0) {
6995 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6996
6997 swath_width_chroma_ub[k] = dml_min(
6998 surface_width_ub_c,
6999 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7000 } else {
7001 swath_width_chroma_ub[k] = 0;
7002 }
7003 } else {
7004 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7005 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7006 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7007 if (BytePerPixC[k] > 0) {
7008 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7009
7010 swath_width_chroma_ub[k] = dml_min(
7011 surface_height_ub_c,
7012 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7013 } else {
7014 swath_width_chroma_ub[k] = 0;
7015 }
7016 }
7017 }
7018 }
7019 }
7020
7021 static double CalculateExtraLatency(
7022 int RoundTripPingLatencyCycles,
7023 int ReorderingBytes,
7024 double DCFCLK,
7025 int TotalNumberOfActiveDPP,
7026 int PixelChunkSizeInKByte,
7027 int TotalNumberOfDCCActiveDPP,
7028 int MetaChunkSize,
7029 double ReturnBW,
7030 bool GPUVMEnable,
7031 bool HostVMEnable,
7032 int NumberOfActivePlanes,
7033 int NumberOfDPP[],
7034 int dpte_group_bytes[],
7035 double HostVMInefficiencyFactor,
7036 double HostVMMinPageSize,
7037 int HostVMMaxNonCachedPageTableLevels)
7038 {
7039 double ExtraLatencyBytes;
7040 double ExtraLatency;
7041
7042 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7043 ReorderingBytes,
7044 TotalNumberOfActiveDPP,
7045 PixelChunkSizeInKByte,
7046 TotalNumberOfDCCActiveDPP,
7047 MetaChunkSize,
7048 GPUVMEnable,
7049 HostVMEnable,
7050 NumberOfActivePlanes,
7051 NumberOfDPP,
7052 dpte_group_bytes,
7053 HostVMInefficiencyFactor,
7054 HostVMMinPageSize,
7055 HostVMMaxNonCachedPageTableLevels);
7056
7057 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7058
7059 #ifdef __DML_VBA_DEBUG__
7060 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7061 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7062 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7063 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7064 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7065 #endif
7066
7067 return ExtraLatency;
7068 }
7069
7070 static double CalculateExtraLatencyBytes(
7071 int ReorderingBytes,
7072 int TotalNumberOfActiveDPP,
7073 int PixelChunkSizeInKByte,
7074 int TotalNumberOfDCCActiveDPP,
7075 int MetaChunkSize,
7076 bool GPUVMEnable,
7077 bool HostVMEnable,
7078 int NumberOfActivePlanes,
7079 int NumberOfDPP[],
7080 int dpte_group_bytes[],
7081 double HostVMInefficiencyFactor,
7082 double HostVMMinPageSize,
7083 int HostVMMaxNonCachedPageTableLevels)
7084 {
7085 double ret;
7086 int HostVMDynamicLevels = 0, k;
7087
7088 if (GPUVMEnable == true && HostVMEnable == true) {
7089 if (HostVMMinPageSize < 2048)
7090 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7091 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7092 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7093 else
7094 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7095 } else {
7096 HostVMDynamicLevels = 0;
7097 }
7098
7099 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7100
7101 if (GPUVMEnable == true) {
7102 for (k = 0; k < NumberOfActivePlanes; ++k)
7103 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7104 }
7105 return ret;
7106 }
7107
7108 static double CalculateUrgentLatency(
7109 double UrgentLatencyPixelDataOnly,
7110 double UrgentLatencyPixelMixedWithVMData,
7111 double UrgentLatencyVMDataOnly,
7112 bool DoUrgentLatencyAdjustment,
7113 double UrgentLatencyAdjustmentFabricClockComponent,
7114 double UrgentLatencyAdjustmentFabricClockReference,
7115 double FabricClock)
7116 {
7117 double ret;
7118
7119 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7120 if (DoUrgentLatencyAdjustment == true)
7121 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7122 return ret;
7123 }
7124
7125 static noinline_for_stack void UseMinimumDCFCLK(
7126 struct display_mode_lib *mode_lib,
7127 int MaxPrefetchMode,
7128 int ReorderingBytes)
7129 {
7130 struct vba_vars_st *v = &mode_lib->vba;
7131 int dummy1, i, j, k;
7132 double NormalEfficiency, dummy2, dummy3;
7133 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7134
7135 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7136 for (i = 0; i < v->soc.num_states; ++i) {
7137 for (j = 0; j <= 1; ++j) {
7138 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7139 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7140 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7141 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7142 double MinimumTWait;
7143 double NonDPTEBandwidth;
7144 double DPTEBandwidth;
7145 double DCFCLKRequiredForAverageBandwidth;
7146 double ExtraLatencyBytes;
7147 double ExtraLatencyCycles;
7148 double DCFCLKRequiredForPeakBandwidth;
7149 int NoOfDPPState[DC__NUM_DPP__MAX];
7150 double MinimumTvmPlus2Tr0;
7151
7152 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7153 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7154 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7155 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7156 }
7157
7158 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7159 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7160
7161 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7162 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7163 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7164 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7165 DCFCLKRequiredForAverageBandwidth = dml_max3(
7166 v->ProjectedDCFCLKDeepSleep[i][j],
7167 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7168 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7169 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7170
7171 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7172 ReorderingBytes,
7173 v->TotalNumberOfActiveDPP[i][j],
7174 v->PixelChunkSizeInKByte,
7175 v->TotalNumberOfDCCActiveDPP[i][j],
7176 v->MetaChunkSize,
7177 v->GPUVMEnable,
7178 v->HostVMEnable,
7179 v->NumberOfActivePlanes,
7180 NoOfDPPState,
7181 v->dpte_group_bytes,
7182 1,
7183 v->HostVMMinPageSize,
7184 v->HostVMMaxNonCachedPageTableLevels);
7185 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7187 double DCFCLKCyclesRequiredInPrefetch;
7188 double ExpectedPrefetchBWAcceleration;
7189 double PrefetchTime;
7190
7191 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7192 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7193 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7194 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7195 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7196 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7197 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7198 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7199 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7200 DynamicMetadataVMExtraLatency[k] =
7201 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7202 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7203 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7204 - v->UrgLatency[i]
7205 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7206 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7207 - DynamicMetadataVMExtraLatency[k];
7208
7209 if (PrefetchTime > 0) {
7210 double ExpectedVRatioPrefetch;
7211
7212 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7213 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7214 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7215 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7216 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7217 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7218 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7219 }
7220 } else {
7221 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7222 }
7223 if (v->DynamicMetadataEnable[k] == true) {
7224 double TSetupPipe;
7225 double TdmbfPipe;
7226 double TdmsksPipe;
7227 double TdmecPipe;
7228 double AllowedTimeForUrgentExtraLatency;
7229
7230 CalculateVupdateAndDynamicMetadataParameters(
7231 v->MaxInterDCNTileRepeaters,
7232 v->RequiredDPPCLK[i][j][k],
7233 v->RequiredDISPCLK[i][j],
7234 v->ProjectedDCFCLKDeepSleep[i][j],
7235 v->PixelClock[k],
7236 v->HTotal[k],
7237 v->VTotal[k] - v->VActive[k],
7238 v->DynamicMetadataTransmittedBytes[k],
7239 v->DynamicMetadataLinesBeforeActiveRequired[k],
7240 v->Interlace[k],
7241 v->ProgressiveToInterlaceUnitInOPP,
7242 &TSetupPipe,
7243 &TdmbfPipe,
7244 &TdmecPipe,
7245 &TdmsksPipe,
7246 &dummy1,
7247 &dummy2,
7248 &dummy3);
7249 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7250 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7251 if (AllowedTimeForUrgentExtraLatency > 0) {
7252 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7253 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7254 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7255 } else {
7256 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7257 }
7258 }
7259 }
7260 DCFCLKRequiredForPeakBandwidth = 0;
7261 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7262 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7263
7264 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7265 * (v->GPUVMEnable == true ?
7266 (v->HostVMEnable == true ?
7267 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7268 0);
7269 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7270 double MaximumTvmPlus2Tr0PlusTsw;
7271
7272 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7273 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7274 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7275 } else {
7276 DCFCLKRequiredForPeakBandwidth = dml_max3(
7277 DCFCLKRequiredForPeakBandwidth,
7278 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7279 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7280 }
7281 }
7282 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7283 }
7284 }
7285 }
7286
7287 static void CalculateUnboundedRequestAndCompressedBufferSize(
7288 unsigned int DETBufferSizeInKByte,
7289 int ConfigReturnBufferSizeInKByte,
7290 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7291 int TotalActiveDPP,
7292 bool NoChromaPlanes,
7293 int MaxNumDPP,
7294 int CompressedBufferSegmentSizeInkByteFinal,
7295 enum output_encoder_class *Output,
7296 bool *UnboundedRequestEnabled,
7297 int *CompressedBufferSizeInkByte)
7298 {
7299 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7300
7301 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7302 *CompressedBufferSizeInkByte = (
7303 *UnboundedRequestEnabled == true ?
7304 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7305 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7306 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7307
7308 #ifdef __DML_VBA_DEBUG__
7309 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7310 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7311 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7312 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7313 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7314 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7315 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7316 #endif
7317 }
7318
7319 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7320 {
7321 bool ret_val = false;
7322
7323 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7324 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7325 ret_val = false;
7326 return ret_val;
7327 }
7328
7329 static unsigned int CalculateMaxVStartup(
7330 unsigned int VTotal,
7331 unsigned int VActive,
7332 unsigned int VBlankNom,
7333 unsigned int HTotal,
7334 double PixelClock,
7335 bool ProgressiveTointerlaceUnitinOPP,
7336 bool Interlace,
7337 unsigned int VBlankNomDefaultUS,
7338 double WritebackDelayTime)
7339 {
7340 unsigned int MaxVStartup = 0;
7341 unsigned int vblank_size = 0;
7342 double line_time_us = HTotal / PixelClock;
7343 unsigned int vblank_actual = VTotal - VActive;
7344 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7345 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7346 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7347
7348 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7349 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7350 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7351 else
7352 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7353 if (MaxVStartup > 1023)
7354 MaxVStartup = 1023;
7355 return MaxVStartup;
7356 }
7357