1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26
27 #include "../display_mode_lib.h"
28 #include "../dml_inline_defs.h"
29 #include "../display_mode_vba.h"
30 #include "display_mode_vba_21.h"
31
32
33 /*
34 * NOTE:
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 *
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
40 */
41 typedef struct {
42 double DPPCLK;
43 double DISPCLK;
44 double PixelClock;
45 double DCFCLKDeepSleep;
46 unsigned int DPPPerPlane;
47 bool ScalerEnabled;
48 enum scan_direction_class SourceScan;
49 unsigned int BlockWidth256BytesY;
50 unsigned int BlockHeight256BytesY;
51 unsigned int BlockWidth256BytesC;
52 unsigned int BlockHeight256BytesC;
53 unsigned int InterlaceEnable;
54 unsigned int NumberOfCursors;
55 unsigned int VBlank;
56 unsigned int HTotal;
57 } Pipe;
58
59 typedef struct {
60 bool Enable;
61 unsigned int MaxPageTableLevels;
62 unsigned int CachedPageTableLevels;
63 } HostVM;
64
65 #define BPP_INVALID 0
66 #define BPP_BLENDED_PIPE 0xffffffff
67 #define DCN21_MAX_DSC_IMAGE_WIDTH 5184
68 #define DCN21_MAX_420_IMAGE_WIDTH 4096
69
70 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
71 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
72 struct display_mode_lib *mode_lib);
73 static unsigned int dscceComputeDelay(
74 unsigned int bpc,
75 double bpp,
76 unsigned int sliceWidth,
77 unsigned int numSlices,
78 enum output_format_class pixelFormat);
79 static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotal,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCFormater,
91 double DPPCLKDelayCNVCCursor,
92 double DISPCLKDelaySubtotal,
93 unsigned int ScalerRecoutWidth,
94 enum output_format_class OutputFormat,
95 unsigned int MaxInterDCNTileRepeaters,
96 unsigned int VStartup,
97 unsigned int MaxVStartup,
98 unsigned int GPUVMPageTableLevels,
99 bool GPUVMEnable,
100 HostVM *myHostVM,
101 bool DynamicMetadataEnable,
102 int DynamicMetadataLinesBeforeActiveRequired,
103 unsigned int DynamicMetadataTransmittedBytes,
104 bool DCCEnable,
105 double UrgentLatency,
106 double UrgentExtraLatency,
107 double TCalc,
108 unsigned int PDEAndMetaPTEBytesFrame,
109 unsigned int MetaRowByte,
110 unsigned int PixelPTEBytesPerRow,
111 double PrefetchSourceLinesY,
112 unsigned int SwathWidthY,
113 double BytePerPixelDETY,
114 double VInitPreFillY,
115 unsigned int MaxNumSwathY,
116 double PrefetchSourceLinesC,
117 double BytePerPixelDETC,
118 double VInitPreFillC,
119 unsigned int MaxNumSwathC,
120 unsigned int SwathHeightY,
121 unsigned int SwathHeightC,
122 double TWait,
123 bool XFCEnabled,
124 double XFCRemoteSurfaceFlipDelay,
125 bool ProgressiveToInterlaceUnitInOPP,
126 double *DSTXAfterScaler,
127 double *DSTYAfterScaler,
128 double *DestinationLinesForPrefetch,
129 double *PrefetchBandwidth,
130 double *DestinationLinesToRequestVMInVBlank,
131 double *DestinationLinesToRequestRowInVBlank,
132 double *VRatioPrefetchY,
133 double *VRatioPrefetchC,
134 double *RequiredPrefetchPixDataBWLuma,
135 double *RequiredPrefetchPixDataBWChroma,
136 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
137 double *Tno_bw,
138 double *prefetch_vmrow_bw,
139 unsigned int *swath_width_luma_ub,
140 unsigned int *swath_width_chroma_ub,
141 unsigned int *VUpdateOffsetPix,
142 double *VUpdateWidthPix,
143 double *VReadyOffsetPix);
144 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
145 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
146 static double CalculateDCCConfiguration(
147 bool DCCEnabled,
148 bool DCCProgrammingAssumesScanDirectionUnknown,
149 unsigned int ViewportWidth,
150 unsigned int ViewportHeight,
151 double DETBufferSize,
152 unsigned int RequestHeight256Byte,
153 unsigned int SwathHeight,
154 enum dm_swizzle_mode TilingFormat,
155 unsigned int BytePerPixel,
156 enum scan_direction_class ScanOrientation,
157 unsigned int *MaxUncompressedBlock,
158 unsigned int *MaxCompressedBlock,
159 unsigned int *Independent64ByteBlock);
160 static double CalculatePrefetchSourceLines(
161 struct display_mode_lib *mode_lib,
162 double VRatio,
163 double vtaps,
164 bool Interlace,
165 bool ProgressiveToInterlaceUnitInOPP,
166 unsigned int SwathHeight,
167 unsigned int ViewportYStart,
168 double *VInitPreFill,
169 unsigned int *MaxNumSwath);
170 static unsigned int CalculateVMAndRowBytes(
171 struct display_mode_lib *mode_lib,
172 bool DCCEnable,
173 unsigned int BlockHeight256Bytes,
174 unsigned int BlockWidth256Bytes,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceTiling,
177 unsigned int BytePerPixel,
178 enum scan_direction_class ScanDirection,
179 unsigned int ViewportWidth,
180 unsigned int ViewportHeight,
181 unsigned int SwathWidthY,
182 bool GPUVMEnable,
183 bool HostVMEnable,
184 unsigned int HostVMMaxPageTableLevels,
185 unsigned int HostVMCachedPageTableLevels,
186 unsigned int VMMPageSize,
187 unsigned int PTEBufferSizeInRequests,
188 unsigned int Pitch,
189 unsigned int DCCMetaPitch,
190 unsigned int *MacroTileWidth,
191 unsigned int *MetaRowByte,
192 unsigned int *PixelPTEBytesPerRow,
193 bool *PTEBufferSizeNotExceeded,
194 unsigned int *dpte_row_width_ub,
195 unsigned int *dpte_row_height,
196 unsigned int *MetaRequestWidth,
197 unsigned int *MetaRequestHeight,
198 unsigned int *meta_row_width,
199 unsigned int *meta_row_height,
200 unsigned int *vm_group_bytes,
201 unsigned int *dpte_group_bytes,
202 unsigned int *PixelPTEReqWidth,
203 unsigned int *PixelPTEReqHeight,
204 unsigned int *PTERequestSize,
205 unsigned int *DPDE0BytesFrame,
206 unsigned int *MetaPTEBytesFrame);
207
208 static double CalculateTWait(
209 unsigned int PrefetchMode,
210 double DRAMClockChangeLatency,
211 double UrgentLatency,
212 double SREnterPlusExitTime);
213 static double CalculateRemoteSurfaceFlipDelay(
214 struct display_mode_lib *mode_lib,
215 double VRatio,
216 double SwathWidth,
217 double Bpp,
218 double LineTime,
219 double XFCTSlvVupdateOffset,
220 double XFCTSlvVupdateWidth,
221 double XFCTSlvVreadyOffset,
222 double XFCXBUFLatencyTolerance,
223 double XFCFillBWOverhead,
224 double XFCSlvChunkSize,
225 double XFCBusTransportTime,
226 double TCalc,
227 double TWait,
228 double *SrcActiveDrainRate,
229 double *TInitXFill,
230 double *TslvChk);
231 static void CalculateActiveRowBandwidth(
232 bool GPUVMEnable,
233 enum source_format_class SourcePixelFormat,
234 double VRatio,
235 bool DCCEnable,
236 double LineTime,
237 unsigned int MetaRowByteLuma,
238 unsigned int MetaRowByteChroma,
239 unsigned int meta_row_height_luma,
240 unsigned int meta_row_height_chroma,
241 unsigned int PixelPTEBytesPerRowLuma,
242 unsigned int PixelPTEBytesPerRowChroma,
243 unsigned int dpte_row_height_luma,
244 unsigned int dpte_row_height_chroma,
245 double *meta_row_bw,
246 double *dpte_row_bw);
247 static void CalculateFlipSchedule(
248 struct display_mode_lib *mode_lib,
249 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
250 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
251 double UrgentExtraLatency,
252 double UrgentLatency,
253 unsigned int GPUVMMaxPageTableLevels,
254 bool HostVMEnable,
255 unsigned int HostVMMaxPageTableLevels,
256 unsigned int HostVMCachedPageTableLevels,
257 bool GPUVMEnable,
258 double PDEAndMetaPTEBytesPerFrame,
259 double MetaRowBytes,
260 double DPTEBytesPerRow,
261 double BandwidthAvailableForImmediateFlip,
262 unsigned int TotImmediateFlipBytes,
263 enum source_format_class SourcePixelFormat,
264 double LineTime,
265 double VRatio,
266 double Tno_bw,
267 bool DCCEnable,
268 unsigned int dpte_row_height,
269 unsigned int meta_row_height,
270 unsigned int dpte_row_height_chroma,
271 unsigned int meta_row_height_chroma,
272 double *DestinationLinesToRequestVMInImmediateFlip,
273 double *DestinationLinesToRequestRowInImmediateFlip,
274 double *final_flip_bw,
275 bool *ImmediateFlipSupportedForPipe);
276 static double CalculateWriteBackDelay(
277 enum source_format_class WritebackPixelFormat,
278 double WritebackHRatio,
279 double WritebackVRatio,
280 unsigned int WritebackLumaHTaps,
281 unsigned int WritebackLumaVTaps,
282 unsigned int WritebackChromaHTaps,
283 unsigned int WritebackChromaVTaps,
284 unsigned int WritebackDestinationWidth);
285 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
286 struct display_mode_lib *mode_lib,
287 unsigned int PrefetchMode,
288 unsigned int NumberOfActivePlanes,
289 unsigned int MaxLineBufferLines,
290 unsigned int LineBufferSize,
291 unsigned int DPPOutputBufferPixels,
292 double DETBufferSizeInKByte,
293 unsigned int WritebackInterfaceLumaBufferSize,
294 unsigned int WritebackInterfaceChromaBufferSize,
295 double DCFCLK,
296 double UrgentOutOfOrderReturn,
297 double ReturnBW,
298 bool GPUVMEnable,
299 int dpte_group_bytes[],
300 unsigned int MetaChunkSize,
301 double UrgentLatency,
302 double ExtraLatency,
303 double WritebackLatency,
304 double WritebackChunkSize,
305 double SOCCLK,
306 double DRAMClockChangeLatency,
307 double SRExitTime,
308 double SREnterPlusExitTime,
309 double DCFCLKDeepSleep,
310 int DPPPerPlane[],
311 bool DCCEnable[],
312 double DPPCLK[],
313 double SwathWidthSingleDPPY[],
314 unsigned int SwathHeightY[],
315 double ReadBandwidthPlaneLuma[],
316 unsigned int SwathHeightC[],
317 double ReadBandwidthPlaneChroma[],
318 unsigned int LBBitPerPixel[],
319 double SwathWidthY[],
320 double HRatio[],
321 unsigned int vtaps[],
322 unsigned int VTAPsChroma[],
323 double VRatio[],
324 unsigned int HTotal[],
325 double PixelClock[],
326 unsigned int BlendingAndTiming[],
327 double BytePerPixelDETY[],
328 double BytePerPixelDETC[],
329 bool WritebackEnable[],
330 enum source_format_class WritebackPixelFormat[],
331 double WritebackDestinationWidth[],
332 double WritebackDestinationHeight[],
333 double WritebackSourceHeight[],
334 enum clock_change_support *DRAMClockChangeSupport,
335 double *UrgentWatermark,
336 double *WritebackUrgentWatermark,
337 double *DRAMClockChangeWatermark,
338 double *WritebackDRAMClockChangeWatermark,
339 double *StutterExitWatermark,
340 double *StutterEnterPlusExitWatermark,
341 double *MinActiveDRAMClockChangeLatencySupported);
342 static void CalculateDCFCLKDeepSleep(
343 struct display_mode_lib *mode_lib,
344 unsigned int NumberOfActivePlanes,
345 double BytePerPixelDETY[],
346 double BytePerPixelDETC[],
347 double VRatio[],
348 double SwathWidthY[],
349 int DPPPerPlane[],
350 double HRatio[],
351 double PixelClock[],
352 double PSCL_THROUGHPUT[],
353 double PSCL_THROUGHPUT_CHROMA[],
354 double DPPCLK[],
355 double *DCFCLKDeepSleep);
356 static void CalculateDETBufferSize(
357 double DETBufferSizeInKByte,
358 unsigned int SwathHeightY,
359 unsigned int SwathHeightC,
360 double *DETBufferSizeY,
361 double *DETBufferSizeC);
362 static void CalculateUrgentBurstFactor(
363 unsigned int DETBufferSizeInKByte,
364 unsigned int SwathHeightY,
365 unsigned int SwathHeightC,
366 unsigned int SwathWidthY,
367 double LineTime,
368 double UrgentLatency,
369 double CursorBufferSize,
370 unsigned int CursorWidth,
371 unsigned int CursorBPP,
372 double VRatio,
373 double VRatioPreY,
374 double VRatioPreC,
375 double BytePerPixelInDETY,
376 double BytePerPixelInDETC,
377 double *UrgentBurstFactorCursor,
378 double *UrgentBurstFactorCursorPre,
379 double *UrgentBurstFactorLuma,
380 double *UrgentBurstFactorLumaPre,
381 double *UrgentBurstFactorChroma,
382 double *UrgentBurstFactorChromaPre,
383 unsigned int *NotEnoughUrgentLatencyHiding,
384 unsigned int *NotEnoughUrgentLatencyHidingPre);
385
386 static void CalculatePixelDeliveryTimes(
387 unsigned int NumberOfActivePlanes,
388 double VRatio[],
389 double VRatioPrefetchY[],
390 double VRatioPrefetchC[],
391 unsigned int swath_width_luma_ub[],
392 unsigned int swath_width_chroma_ub[],
393 int DPPPerPlane[],
394 double HRatio[],
395 double PixelClock[],
396 double PSCL_THROUGHPUT[],
397 double PSCL_THROUGHPUT_CHROMA[],
398 double DPPCLK[],
399 double BytePerPixelDETC[],
400 enum scan_direction_class SourceScan[],
401 unsigned int BlockWidth256BytesY[],
402 unsigned int BlockHeight256BytesY[],
403 unsigned int BlockWidth256BytesC[],
404 unsigned int BlockHeight256BytesC[],
405 double DisplayPipeLineDeliveryTimeLuma[],
406 double DisplayPipeLineDeliveryTimeChroma[],
407 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
408 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
409 double DisplayPipeRequestDeliveryTimeLuma[],
410 double DisplayPipeRequestDeliveryTimeChroma[],
411 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
412 double DisplayPipeRequestDeliveryTimeChromaPrefetch[]);
413
414 static void CalculateMetaAndPTETimes(
415 unsigned int NumberOfActivePlanes,
416 bool GPUVMEnable,
417 unsigned int MetaChunkSize,
418 unsigned int MinMetaChunkSizeBytes,
419 unsigned int GPUVMMaxPageTableLevels,
420 unsigned int HTotal[],
421 double VRatio[],
422 double VRatioPrefetchY[],
423 double VRatioPrefetchC[],
424 double DestinationLinesToRequestRowInVBlank[],
425 double DestinationLinesToRequestRowInImmediateFlip[],
426 double DestinationLinesToRequestVMInVBlank[],
427 double DestinationLinesToRequestVMInImmediateFlip[],
428 bool DCCEnable[],
429 double PixelClock[],
430 double BytePerPixelDETY[],
431 double BytePerPixelDETC[],
432 enum scan_direction_class SourceScan[],
433 unsigned int dpte_row_height[],
434 unsigned int dpte_row_height_chroma[],
435 unsigned int meta_row_width[],
436 unsigned int meta_row_height[],
437 unsigned int meta_req_width[],
438 unsigned int meta_req_height[],
439 int dpte_group_bytes[],
440 unsigned int PTERequestSizeY[],
441 unsigned int PTERequestSizeC[],
442 unsigned int PixelPTEReqWidthY[],
443 unsigned int PixelPTEReqHeightY[],
444 unsigned int PixelPTEReqWidthC[],
445 unsigned int PixelPTEReqHeightC[],
446 unsigned int dpte_row_width_luma_ub[],
447 unsigned int dpte_row_width_chroma_ub[],
448 unsigned int vm_group_bytes[],
449 unsigned int dpde0_bytes_per_frame_ub_l[],
450 unsigned int dpde0_bytes_per_frame_ub_c[],
451 unsigned int meta_pte_bytes_per_frame_ub_l[],
452 unsigned int meta_pte_bytes_per_frame_ub_c[],
453 double DST_Y_PER_PTE_ROW_NOM_L[],
454 double DST_Y_PER_PTE_ROW_NOM_C[],
455 double DST_Y_PER_META_ROW_NOM_L[],
456 double TimePerMetaChunkNominal[],
457 double TimePerMetaChunkVBlank[],
458 double TimePerMetaChunkFlip[],
459 double time_per_pte_group_nom_luma[],
460 double time_per_pte_group_vblank_luma[],
461 double time_per_pte_group_flip_luma[],
462 double time_per_pte_group_nom_chroma[],
463 double time_per_pte_group_vblank_chroma[],
464 double time_per_pte_group_flip_chroma[],
465 double TimePerVMGroupVBlank[],
466 double TimePerVMGroupFlip[],
467 double TimePerVMRequestVBlank[],
468 double TimePerVMRequestFlip[]);
469
470 static double CalculateExtraLatency(
471 double UrgentRoundTripAndOutOfOrderLatency,
472 int TotalNumberOfActiveDPP,
473 int PixelChunkSizeInKByte,
474 int TotalNumberOfDCCActiveDPP,
475 int MetaChunkSize,
476 double ReturnBW,
477 bool GPUVMEnable,
478 bool HostVMEnable,
479 int NumberOfActivePlanes,
480 int NumberOfDPP[],
481 int dpte_group_bytes[],
482 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
483 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
484 int HostVMMaxPageTableLevels,
485 int HostVMCachedPageTableLevels);
486
dml21_recalculate(struct display_mode_lib * mode_lib)487 void dml21_recalculate(struct display_mode_lib *mode_lib)
488 {
489 ModeSupportAndSystemConfiguration(mode_lib);
490 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
491 DisplayPipeConfiguration(mode_lib);
492 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
493 }
494
dscceComputeDelay(unsigned int bpc,double bpp,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat)495 static unsigned int dscceComputeDelay(
496 unsigned int bpc,
497 double bpp,
498 unsigned int sliceWidth,
499 unsigned int numSlices,
500 enum output_format_class pixelFormat)
501 {
502 // valid bpc = source bits per component in the set of {8, 10, 12}
503 // valid bpp = increments of 1/16 of a bit
504 // min = 6/7/8 in N420/N422/444, respectively
505 // max = such that compression is 1:1
506 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
507 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
508 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
509
510 // fixed value
511 unsigned int rcModelSize = 8192;
512
513 // N422/N420 operate at 2 pixels per clock
514 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l,
515 Delay, pixels;
516
517 if (pixelFormat == dm_n422 || pixelFormat == dm_420)
518 pixelsPerClock = 2;
519 // #all other modes operate at 1 pixel per clock
520 else
521 pixelsPerClock = 1;
522
523 //initial transmit delay as per PPS
524 initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
525
526 //compute ssm delay
527 if (bpc == 8)
528 D = 81;
529 else if (bpc == 10)
530 D = 89;
531 else
532 D = 113;
533
534 //divide by pixel per cycle to compute slice width as seen by DSC
535 w = sliceWidth / pixelsPerClock;
536
537 //422 mode has an additional cycle of delay
538 if (pixelFormat == dm_s422)
539 S = 1;
540 else
541 S = 0;
542
543 //main calculation for the dscce
544 ix = initalXmitDelay + 45;
545 wx = (w + 2) / 3;
546 p = 3 * wx - w;
547 l0 = ix / w;
548 a = ix + p * l0;
549 ax = (a + 2) / 3 + D + 6 + 1;
550 l = (ax + wx - 1) / wx;
551 if ((ix % w) == 0 && p != 0)
552 lstall = 1;
553 else
554 lstall = 0;
555 Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22;
556
557 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
558 pixels = Delay * 3 * pixelsPerClock;
559 return pixels;
560 }
561
dscComputeDelay(enum output_format_class pixelFormat)562 static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
563 {
564 unsigned int Delay = 0;
565
566 if (pixelFormat == dm_420) {
567 // sfr
568 Delay = Delay + 2;
569 // dsccif
570 Delay = Delay + 0;
571 // dscc - input deserializer
572 Delay = Delay + 3;
573 // dscc gets pixels every other cycle
574 Delay = Delay + 2;
575 // dscc - input cdc fifo
576 Delay = Delay + 12;
577 // dscc gets pixels every other cycle
578 Delay = Delay + 13;
579 // dscc - cdc uncertainty
580 Delay = Delay + 2;
581 // dscc - output cdc fifo
582 Delay = Delay + 7;
583 // dscc gets pixels every other cycle
584 Delay = Delay + 3;
585 // dscc - cdc uncertainty
586 Delay = Delay + 2;
587 // dscc - output serializer
588 Delay = Delay + 1;
589 // sft
590 Delay = Delay + 1;
591 } else if (pixelFormat == dm_n422) {
592 // sfr
593 Delay = Delay + 2;
594 // dsccif
595 Delay = Delay + 1;
596 // dscc - input deserializer
597 Delay = Delay + 5;
598 // dscc - input cdc fifo
599 Delay = Delay + 25;
600 // dscc - cdc uncertainty
601 Delay = Delay + 2;
602 // dscc - output cdc fifo
603 Delay = Delay + 10;
604 // dscc - cdc uncertainty
605 Delay = Delay + 2;
606 // dscc - output serializer
607 Delay = Delay + 1;
608 // sft
609 Delay = Delay + 1;
610 } else {
611 // sfr
612 Delay = Delay + 2;
613 // dsccif
614 Delay = Delay + 0;
615 // dscc - input deserializer
616 Delay = Delay + 3;
617 // dscc - input cdc fifo
618 Delay = Delay + 12;
619 // dscc - cdc uncertainty
620 Delay = Delay + 2;
621 // dscc - output cdc fifo
622 Delay = Delay + 7;
623 // dscc - output serializer
624 Delay = Delay + 1;
625 // dscc - cdc uncertainty
626 Delay = Delay + 2;
627 // sft
628 Delay = Delay + 1;
629 }
630
631 return Delay;
632 }
633
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotal,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCFormater,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int ScalerRecoutWidth,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,HostVM * myHostVM,bool DynamicMetadataEnable,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,bool DCCEnable,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double BytePerPixelDETY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,double BytePerPixelDETC,double VInitPreFillC,unsigned int MaxNumSwathC,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool XFCEnabled,double XFCRemoteSurfaceFlipDelay,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,unsigned int * VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,unsigned int * swath_width_luma_ub,unsigned int * swath_width_chroma_ub,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)634 static bool CalculatePrefetchSchedule(
635 struct display_mode_lib *mode_lib,
636 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
637 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
638 Pipe *myPipe,
639 unsigned int DSCDelay,
640 double DPPCLKDelaySubtotal,
641 double DPPCLKDelaySCL,
642 double DPPCLKDelaySCLLBOnly,
643 double DPPCLKDelayCNVCFormater,
644 double DPPCLKDelayCNVCCursor,
645 double DISPCLKDelaySubtotal,
646 unsigned int ScalerRecoutWidth,
647 enum output_format_class OutputFormat,
648 unsigned int MaxInterDCNTileRepeaters,
649 unsigned int VStartup,
650 unsigned int MaxVStartup,
651 unsigned int GPUVMPageTableLevels,
652 bool GPUVMEnable,
653 HostVM *myHostVM,
654 bool DynamicMetadataEnable,
655 int DynamicMetadataLinesBeforeActiveRequired,
656 unsigned int DynamicMetadataTransmittedBytes,
657 bool DCCEnable,
658 double UrgentLatency,
659 double UrgentExtraLatency,
660 double TCalc,
661 unsigned int PDEAndMetaPTEBytesFrame,
662 unsigned int MetaRowByte,
663 unsigned int PixelPTEBytesPerRow,
664 double PrefetchSourceLinesY,
665 unsigned int SwathWidthY,
666 double BytePerPixelDETY,
667 double VInitPreFillY,
668 unsigned int MaxNumSwathY,
669 double PrefetchSourceLinesC,
670 double BytePerPixelDETC,
671 double VInitPreFillC,
672 unsigned int MaxNumSwathC,
673 unsigned int SwathHeightY,
674 unsigned int SwathHeightC,
675 double TWait,
676 bool XFCEnabled,
677 double XFCRemoteSurfaceFlipDelay,
678 bool ProgressiveToInterlaceUnitInOPP,
679 double *DSTXAfterScaler,
680 double *DSTYAfterScaler,
681 double *DestinationLinesForPrefetch,
682 double *PrefetchBandwidth,
683 double *DestinationLinesToRequestVMInVBlank,
684 double *DestinationLinesToRequestRowInVBlank,
685 double *VRatioPrefetchY,
686 double *VRatioPrefetchC,
687 double *RequiredPrefetchPixDataBWLuma,
688 double *RequiredPrefetchPixDataBWChroma,
689 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
690 double *Tno_bw,
691 double *prefetch_vmrow_bw,
692 unsigned int *swath_width_luma_ub,
693 unsigned int *swath_width_chroma_ub,
694 unsigned int *VUpdateOffsetPix,
695 double *VUpdateWidthPix,
696 double *VReadyOffsetPix)
697 {
698 bool MyError = false;
699 unsigned int DPPCycles, DISPCLKCycles;
700 double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime;
701 double Tdm, LineTime, Tsetup;
702 double dst_y_prefetch_equ;
703 double Tsw_oto;
704 double prefetch_bw_oto;
705 double Tvm_oto;
706 double Tr0_oto;
707 double Tvm_oto_lines;
708 double Tr0_oto_lines;
709 double Tsw_oto_lines;
710 double dst_y_prefetch_oto;
711 double TimeForFetchingMetaPTE = 0;
712 double TimeForFetchingRowInVBlank = 0;
713 double LinesToRequestPrefetchPixelData = 0;
714 double HostVMInefficiencyFactor;
715 unsigned int HostVMDynamicLevels;
716
717 if (GPUVMEnable == true && myHostVM->Enable == true) {
718 HostVMInefficiencyFactor =
719 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
720 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
721 HostVMDynamicLevels = myHostVM->MaxPageTableLevels
722 - myHostVM->CachedPageTableLevels;
723 } else {
724 HostVMInefficiencyFactor = 1;
725 HostVMDynamicLevels = 0;
726 }
727
728 if (myPipe->ScalerEnabled)
729 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
730 else
731 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
732
733 DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
734
735 DISPCLKCycles = DISPCLKDelaySubtotal;
736
737 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
738 return true;
739
740 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK
741 + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
742
743 if (myPipe->DPPPerPlane > 1)
744 *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
745
746 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
747 *DSTYAfterScaler = 1;
748 else
749 *DSTYAfterScaler = 0;
750
751 DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler;
752 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
753 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
754
755 *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1);
756 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK);
757 *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime)
758 * myPipe->PixelClock;
759
760 *VReadyOffsetPix = dml_max(
761 150.0 / myPipe->DPPCLK,
762 TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK)
763 * myPipe->PixelClock;
764
765 Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock;
766
767 LineTime = (double) myPipe->HTotal / myPipe->PixelClock;
768
769 if (DynamicMetadataEnable) {
770 double Tdmbf, Tdmec, Tdmsks;
771
772 Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
773 Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK;
774 Tdmec = LineTime;
775 if (DynamicMetadataLinesBeforeActiveRequired == -1)
776 Tdmsks = myPipe->VBlank * LineTime / 2.0;
777 else
778 Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
779 if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
780 Tdmsks = Tdmsks / 2;
781 if (VStartup * LineTime
782 < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
783 MyError = true;
784 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait
785 + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime;
786 } else
787 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0;
788 } else
789 Tdm = 0;
790
791 if (GPUVMEnable) {
792 if (GPUVMPageTableLevels >= 3)
793 *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1);
794 else
795 *Tno_bw = 0;
796 } else if (!DCCEnable)
797 *Tno_bw = LineTime;
798 else
799 *Tno_bw = LineTime / 4;
800
801 dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
802 - (Tsetup + Tdm) / LineTime
803 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
804
805 Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
806
807 if (myPipe->SourceScan == dm_horz) {
808 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
809 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
810 } else {
811 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
812 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
813 }
814
815 prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
816
817
818 if (GPUVMEnable == true) {
819 Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
820 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1),
821 LineTime / 4.0));
822 } else
823 Tvm_oto = LineTime / 4.0;
824
825 if ((GPUVMEnable == true || DCCEnable == true)) {
826 Tr0_oto = dml_max(
827 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
828 dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4)));
829 } else
830 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
831
832 Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0;
833 Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0;
834 Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0;
835 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75;
836
837 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
838
839 if (dst_y_prefetch_oto < dst_y_prefetch_equ)
840 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
841 else
842 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
843
844 dml_print("DML: VStartup: %d\n", VStartup);
845 dml_print("DML: TCalc: %f\n", TCalc);
846 dml_print("DML: TWait: %f\n", TWait);
847 dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
848 dml_print("DML: LineTime: %f\n", LineTime);
849 dml_print("DML: Tsetup: %f\n", Tsetup);
850 dml_print("DML: Tdm: %f\n", Tdm);
851 dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler);
852 dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler);
853 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
854
855 *PrefetchBandwidth = 0;
856 *DestinationLinesToRequestVMInVBlank = 0;
857 *DestinationLinesToRequestRowInVBlank = 0;
858 *VRatioPrefetchY = 0;
859 *VRatioPrefetchC = 0;
860 *RequiredPrefetchPixDataBWLuma = 0;
861 if (*DestinationLinesForPrefetch > 1) {
862 double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
863 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
864 + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1)
865 + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2))
866 / (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
867
868 double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
869 HostVMInefficiencyFactor + PrefetchSourceLinesY *
870 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
871 PrefetchSourceLinesC * *swath_width_chroma_ub *
872 dml_ceil(BytePerPixelDETC, 2)) /
873 (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 *
874 UrgentLatency * (1 + HostVMDynamicLevels));
875
876 double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow
877 * HostVMInefficiencyFactor + PrefetchSourceLinesY *
878 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
879 PrefetchSourceLinesC * *swath_width_chroma_ub *
880 dml_ceil(BytePerPixelDETC, 2)) /
881 (*DestinationLinesForPrefetch * LineTime -
882 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
883 * (HostVMDynamicLevels + 1) - 1));
884
885 double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub *
886 dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC *
887 *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) /
888 (*DestinationLinesForPrefetch * LineTime -
889 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
890 * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency *
891 (1 + HostVMDynamicLevels));
892
893 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) {
894 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw);
895 }
896 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
897 *PrefetchBandwidth = PrefetchBandwidth1;
898 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) {
899 *PrefetchBandwidth = PrefetchBandwidth2;
900 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
901 *PrefetchBandwidth = PrefetchBandwidth3;
902 } else {
903 *PrefetchBandwidth = PrefetchBandwidth4;
904 }
905
906 if (GPUVMEnable) {
907 TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth,
908 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4));
909 } else {
910 // 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor
911 // so if this needs to be reinstated, then it should be officially done in the VBA code as well.
912 // if (mode_lib->NumberOfCursors > 0 || XFCEnabled)
913 TimeForFetchingMetaPTE = LineTime / 4;
914 // else
915 // TimeForFetchingMetaPTE = 0.0;
916 }
917
918 if ((GPUVMEnable == true || DCCEnable == true)) {
919 TimeForFetchingRowInVBlank =
920 dml_max(
921 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
922 / *PrefetchBandwidth,
923 dml_max(
924 UrgentLatency * (1 + HostVMDynamicLevels),
925 dml_max(
926 (LineTime
927 - TimeForFetchingMetaPTE) / 2.0,
928 LineTime
929 / 4.0)));
930 } else {
931 // See note above dated 5/30/2018
932 // if (NumberOfCursors > 0 || XFCEnabled)
933 TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0;
934 // else // TODO: Did someone else add this??
935 // TimeForFetchingRowInVBlank = 0.0;
936 }
937
938 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
939
940 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
941
942 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
943 // See note above dated 5/30/2018
944 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
945 - ((GPUVMEnable || DCCEnable) ?
946 (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) :
947 0.0); // TODO: Did someone else add this??
948
949 if (LinesToRequestPrefetchPixelData > 0) {
950
951 *VRatioPrefetchY = (double) PrefetchSourceLinesY
952 / LinesToRequestPrefetchPixelData;
953 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
954 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
955 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
956 *VRatioPrefetchY =
957 dml_max(
958 (double) PrefetchSourceLinesY
959 / LinesToRequestPrefetchPixelData,
960 (double) MaxNumSwathY
961 * SwathHeightY
962 / (LinesToRequestPrefetchPixelData
963 - (VInitPreFillY
964 - 3.0)
965 / 2.0));
966 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
967 } else {
968 MyError = true;
969 *VRatioPrefetchY = 0;
970 }
971 }
972
973 *VRatioPrefetchC = (double) PrefetchSourceLinesC
974 / LinesToRequestPrefetchPixelData;
975 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
976
977 if ((SwathHeightC > 4)) {
978 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
979 *VRatioPrefetchC =
980 dml_max(
981 *VRatioPrefetchC,
982 (double) MaxNumSwathC
983 * SwathHeightC
984 / (LinesToRequestPrefetchPixelData
985 - (VInitPreFillC
986 - 3.0)
987 / 2.0));
988 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
989 } else {
990 MyError = true;
991 *VRatioPrefetchC = 0;
992 }
993 }
994
995 *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane
996 * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData
997 * dml_ceil(BytePerPixelDETY, 1)
998 * *swath_width_luma_ub / LineTime;
999 *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane
1000 * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData
1001 * dml_ceil(BytePerPixelDETC, 2)
1002 * *swath_width_chroma_ub / LineTime;
1003 } else {
1004 MyError = true;
1005 *VRatioPrefetchY = 0;
1006 *VRatioPrefetchC = 0;
1007 *RequiredPrefetchPixDataBWLuma = 0;
1008 *RequiredPrefetchPixDataBWChroma = 0;
1009 }
1010
1011 dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE);
1012 dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank);
1013 dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank);
1014 dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime);
1015 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1016
1017 } else {
1018 MyError = true;
1019 }
1020
1021 {
1022 double prefetch_vm_bw;
1023 double prefetch_row_bw;
1024
1025 if (PDEAndMetaPTEBytesFrame == 0) {
1026 prefetch_vm_bw = 0;
1027 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1028 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1029 } else {
1030 prefetch_vm_bw = 0;
1031 MyError = true;
1032 }
1033 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1034 prefetch_row_bw = 0;
1035 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1036 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1037 } else {
1038 prefetch_row_bw = 0;
1039 MyError = true;
1040 }
1041
1042 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1043 }
1044
1045 if (MyError) {
1046 *PrefetchBandwidth = 0;
1047 TimeForFetchingMetaPTE = 0;
1048 TimeForFetchingRowInVBlank = 0;
1049 *DestinationLinesToRequestVMInVBlank = 0;
1050 *DestinationLinesToRequestRowInVBlank = 0;
1051 *DestinationLinesForPrefetch = 0;
1052 LinesToRequestPrefetchPixelData = 0;
1053 *VRatioPrefetchY = 0;
1054 *VRatioPrefetchC = 0;
1055 *RequiredPrefetchPixDataBWLuma = 0;
1056 *RequiredPrefetchPixDataBWChroma = 0;
1057 }
1058
1059 return MyError;
1060 }
1061
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1062 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1063 {
1064 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1065 }
1066
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1067 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1068 {
1069 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
1070 }
1071
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,unsigned int ViewportWidth,unsigned int ViewportHeight,double DETBufferSize,unsigned int RequestHeight256Byte,unsigned int SwathHeight,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixel,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlock,unsigned int * MaxCompressedBlock,unsigned int * Independent64ByteBlock)1072 static double CalculateDCCConfiguration(
1073 bool DCCEnabled,
1074 bool DCCProgrammingAssumesScanDirectionUnknown,
1075 unsigned int ViewportWidth,
1076 unsigned int ViewportHeight,
1077 double DETBufferSize,
1078 unsigned int RequestHeight256Byte,
1079 unsigned int SwathHeight,
1080 enum dm_swizzle_mode TilingFormat,
1081 unsigned int BytePerPixel,
1082 enum scan_direction_class ScanOrientation,
1083 unsigned int *MaxUncompressedBlock,
1084 unsigned int *MaxCompressedBlock,
1085 unsigned int *Independent64ByteBlock)
1086 {
1087 double MaximumDCCCompressionSurface = 0.0;
1088 enum {
1089 REQ_256Bytes,
1090 REQ_128BytesNonContiguous,
1091 REQ_128BytesContiguous,
1092 REQ_NA
1093 } Request = REQ_NA;
1094
1095 if (DCCEnabled == true) {
1096 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1097 if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel
1098 && DETBufferSize
1099 >= 256 / RequestHeight256Byte
1100 * ViewportHeight) {
1101 Request = REQ_256Bytes;
1102 } else if ((DETBufferSize
1103 < RequestHeight256Byte * ViewportWidth * BytePerPixel
1104 && (BytePerPixel == 2 || BytePerPixel == 4))
1105 || (DETBufferSize
1106 < 256 / RequestHeight256Byte
1107 * ViewportHeight
1108 && BytePerPixel == 8
1109 && (TilingFormat == dm_sw_4kb_d
1110 || TilingFormat
1111 == dm_sw_4kb_d_x
1112 || TilingFormat
1113 == dm_sw_var_d
1114 || TilingFormat
1115 == dm_sw_var_d_x
1116 || TilingFormat
1117 == dm_sw_64kb_d
1118 || TilingFormat
1119 == dm_sw_64kb_d_x
1120 || TilingFormat
1121 == dm_sw_64kb_d_t
1122 || TilingFormat
1123 == dm_sw_64kb_r_x))) {
1124 Request = REQ_128BytesNonContiguous;
1125 } else {
1126 Request = REQ_128BytesContiguous;
1127 }
1128 } else {
1129 if (BytePerPixel == 1) {
1130 if (ScanOrientation == dm_vert || SwathHeight == 16) {
1131 Request = REQ_256Bytes;
1132 } else {
1133 Request = REQ_128BytesContiguous;
1134 }
1135 } else if (BytePerPixel == 2) {
1136 if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) {
1137 Request = REQ_256Bytes;
1138 } else if (ScanOrientation == dm_vert) {
1139 Request = REQ_128BytesContiguous;
1140 } else {
1141 Request = REQ_128BytesNonContiguous;
1142 }
1143 } else if (BytePerPixel == 4) {
1144 if (SwathHeight == 8) {
1145 Request = REQ_256Bytes;
1146 } else if (ScanOrientation == dm_vert) {
1147 Request = REQ_128BytesContiguous;
1148 } else {
1149 Request = REQ_128BytesNonContiguous;
1150 }
1151 } else if (BytePerPixel == 8) {
1152 if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x
1153 || TilingFormat == dm_sw_var_d
1154 || TilingFormat == dm_sw_var_d_x
1155 || TilingFormat == dm_sw_64kb_d
1156 || TilingFormat == dm_sw_64kb_d_x
1157 || TilingFormat == dm_sw_64kb_d_t
1158 || TilingFormat == dm_sw_64kb_r_x) {
1159 if ((ScanOrientation == dm_vert && SwathHeight == 8)
1160 || (ScanOrientation != dm_vert
1161 && SwathHeight == 4)) {
1162 Request = REQ_256Bytes;
1163 } else if (ScanOrientation != dm_vert) {
1164 Request = REQ_128BytesContiguous;
1165 } else {
1166 Request = REQ_128BytesNonContiguous;
1167 }
1168 } else {
1169 if (ScanOrientation != dm_vert || SwathHeight == 8) {
1170 Request = REQ_256Bytes;
1171 } else {
1172 Request = REQ_128BytesContiguous;
1173 }
1174 }
1175 }
1176 }
1177 } else {
1178 Request = REQ_NA;
1179 }
1180
1181 if (Request == REQ_256Bytes) {
1182 *MaxUncompressedBlock = 256;
1183 *MaxCompressedBlock = 256;
1184 *Independent64ByteBlock = false;
1185 MaximumDCCCompressionSurface = 4.0;
1186 } else if (Request == REQ_128BytesContiguous) {
1187 *MaxUncompressedBlock = 128;
1188 *MaxCompressedBlock = 128;
1189 *Independent64ByteBlock = false;
1190 MaximumDCCCompressionSurface = 2.0;
1191 } else if (Request == REQ_128BytesNonContiguous) {
1192 *MaxUncompressedBlock = 256;
1193 *MaxCompressedBlock = 64;
1194 *Independent64ByteBlock = true;
1195 MaximumDCCCompressionSurface = 4.0;
1196 } else {
1197 *MaxUncompressedBlock = 0;
1198 *MaxCompressedBlock = 0;
1199 *Independent64ByteBlock = 0;
1200 MaximumDCCCompressionSurface = 0.0;
1201 }
1202
1203 return MaximumDCCCompressionSurface;
1204 }
1205
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1206 static double CalculatePrefetchSourceLines(
1207 struct display_mode_lib *mode_lib,
1208 double VRatio,
1209 double vtaps,
1210 bool Interlace,
1211 bool ProgressiveToInterlaceUnitInOPP,
1212 unsigned int SwathHeight,
1213 unsigned int ViewportYStart,
1214 double *VInitPreFill,
1215 unsigned int *MaxNumSwath)
1216 {
1217 unsigned int MaxPartialSwath;
1218
1219 if (ProgressiveToInterlaceUnitInOPP)
1220 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1221 else
1222 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1223
1224 if (!mode_lib->vba.IgnoreViewportPositioning) {
1225
1226 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1227
1228 if (*VInitPreFill > 1.0)
1229 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1230 else
1231 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1232 % SwathHeight;
1233 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1234
1235 } else {
1236
1237 if (ViewportYStart != 0)
1238 dml_print(
1239 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1240
1241 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1242
1243 if (*VInitPreFill > 1.0)
1244 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1245 else
1246 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1247 % SwathHeight;
1248 }
1249
1250 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1251 }
1252
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int ViewportWidth,unsigned int ViewportHeight,unsigned int SwathWidth,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,unsigned int VMMPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1253 static unsigned int CalculateVMAndRowBytes(
1254 struct display_mode_lib *mode_lib,
1255 bool DCCEnable,
1256 unsigned int BlockHeight256Bytes,
1257 unsigned int BlockWidth256Bytes,
1258 enum source_format_class SourcePixelFormat,
1259 unsigned int SurfaceTiling,
1260 unsigned int BytePerPixel,
1261 enum scan_direction_class ScanDirection,
1262 unsigned int ViewportWidth,
1263 unsigned int ViewportHeight,
1264 unsigned int SwathWidth,
1265 bool GPUVMEnable,
1266 bool HostVMEnable,
1267 unsigned int HostVMMaxPageTableLevels,
1268 unsigned int HostVMCachedPageTableLevels,
1269 unsigned int VMMPageSize,
1270 unsigned int PTEBufferSizeInRequests,
1271 unsigned int Pitch,
1272 unsigned int DCCMetaPitch,
1273 unsigned int *MacroTileWidth,
1274 unsigned int *MetaRowByte,
1275 unsigned int *PixelPTEBytesPerRow,
1276 bool *PTEBufferSizeNotExceeded,
1277 unsigned int *dpte_row_width_ub,
1278 unsigned int *dpte_row_height,
1279 unsigned int *MetaRequestWidth,
1280 unsigned int *MetaRequestHeight,
1281 unsigned int *meta_row_width,
1282 unsigned int *meta_row_height,
1283 unsigned int *vm_group_bytes,
1284 unsigned int *dpte_group_bytes,
1285 unsigned int *PixelPTEReqWidth,
1286 unsigned int *PixelPTEReqHeight,
1287 unsigned int *PTERequestSize,
1288 unsigned int *DPDE0BytesFrame,
1289 unsigned int *MetaPTEBytesFrame)
1290 {
1291 unsigned int MPDEBytesFrame;
1292 unsigned int DCCMetaSurfaceBytes;
1293 unsigned int MacroTileSizeBytes;
1294 unsigned int MacroTileHeight;
1295 unsigned int ExtraDPDEBytesFrame;
1296 unsigned int PDEAndMetaPTEBytesFrame;
1297 unsigned int PixelPTEReqHeightPTEs = 0;
1298
1299 if (DCCEnable == true) {
1300 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1301 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1302 if (ScanDirection == dm_horz) {
1303 *meta_row_height = *MetaRequestHeight;
1304 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1305 + *MetaRequestWidth;
1306 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1307 } else {
1308 *meta_row_height = *MetaRequestWidth;
1309 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1310 + *MetaRequestHeight;
1311 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1312 }
1313 if (ScanDirection == dm_horz) {
1314 DCCMetaSurfaceBytes = DCCMetaPitch
1315 * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1316 + 64 * BlockHeight256Bytes) * BytePerPixel
1317 / 256;
1318 } else {
1319 DCCMetaSurfaceBytes = DCCMetaPitch
1320 * (dml_ceil(
1321 (double) ViewportHeight - 1,
1322 64 * BlockHeight256Bytes)
1323 + 64 * BlockHeight256Bytes) * BytePerPixel
1324 / 256;
1325 }
1326 if (GPUVMEnable == true) {
1327 *MetaPTEBytesFrame = (dml_ceil(
1328 (double) (DCCMetaSurfaceBytes - VMMPageSize)
1329 / (8 * VMMPageSize),
1330 1) + 1) * 64;
1331 MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2);
1332 } else {
1333 *MetaPTEBytesFrame = 0;
1334 MPDEBytesFrame = 0;
1335 }
1336 } else {
1337 *MetaPTEBytesFrame = 0;
1338 MPDEBytesFrame = 0;
1339 *MetaRowByte = 0;
1340 }
1341
1342 if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
1343 MacroTileSizeBytes = 256;
1344 MacroTileHeight = BlockHeight256Bytes;
1345 } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
1346 || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
1347 MacroTileSizeBytes = 4096;
1348 MacroTileHeight = 4 * BlockHeight256Bytes;
1349 } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
1350 || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
1351 || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
1352 || SurfaceTiling == dm_sw_64kb_r_x) {
1353 MacroTileSizeBytes = 65536;
1354 MacroTileHeight = 16 * BlockHeight256Bytes;
1355 } else {
1356 MacroTileSizeBytes = 262144;
1357 MacroTileHeight = 32 * BlockHeight256Bytes;
1358 }
1359 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1360
1361 if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) {
1362 if (ScanDirection == dm_horz) {
1363 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1364 } else {
1365 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1366 }
1367 ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3);
1368 } else {
1369 *DPDE0BytesFrame = 0;
1370 ExtraDPDEBytesFrame = 0;
1371 }
1372
1373 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1374 + ExtraDPDEBytesFrame;
1375
1376 if (HostVMEnable == true) {
1377 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1378 }
1379
1380 if (GPUVMEnable == true) {
1381 double FractionOfPTEReturnDrop;
1382
1383 if (SurfaceTiling == dm_sw_linear) {
1384 PixelPTEReqHeightPTEs = 1;
1385 *PixelPTEReqHeight = 1;
1386 *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
1387 *PTERequestSize = 64;
1388 FractionOfPTEReturnDrop = 0;
1389 } else if (MacroTileSizeBytes == 4096) {
1390 PixelPTEReqHeightPTEs = 1;
1391 *PixelPTEReqHeight = MacroTileHeight;
1392 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1393 *PTERequestSize = 64;
1394 if (ScanDirection == dm_horz)
1395 FractionOfPTEReturnDrop = 0;
1396 else
1397 FractionOfPTEReturnDrop = 7 / 8;
1398 } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
1399 PixelPTEReqHeightPTEs = 16;
1400 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1401 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1402 *PTERequestSize = 128;
1403 FractionOfPTEReturnDrop = 0;
1404 } else {
1405 PixelPTEReqHeightPTEs = 1;
1406 *PixelPTEReqHeight = MacroTileHeight;
1407 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1408 *PTERequestSize = 64;
1409 FractionOfPTEReturnDrop = 0;
1410 }
1411
1412 if (SurfaceTiling == dm_sw_linear) {
1413 *dpte_row_height = dml_min(128,
1414 1 << (unsigned int) dml_floor(
1415 dml_log2(
1416 (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch),
1417 1));
1418 *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1419 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1420 } else if (ScanDirection == dm_horz) {
1421 *dpte_row_height = *PixelPTEReqHeight;
1422 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1423 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1424 } else {
1425 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1426 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1427 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1428 }
1429 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1430 <= 64 * PTEBufferSizeInRequests) {
1431 *PTEBufferSizeNotExceeded = true;
1432 } else {
1433 *PTEBufferSizeNotExceeded = false;
1434 }
1435 } else {
1436 *PixelPTEBytesPerRow = 0;
1437 *PTEBufferSizeNotExceeded = true;
1438 }
1439 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame);
1440
1441 if (HostVMEnable == true) {
1442 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1443 }
1444
1445 if (HostVMEnable == true) {
1446 *vm_group_bytes = 512;
1447 *dpte_group_bytes = 512;
1448 } else if (GPUVMEnable == true) {
1449 *vm_group_bytes = 2048;
1450 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) {
1451 *dpte_group_bytes = 512;
1452 } else {
1453 *dpte_group_bytes = 2048;
1454 }
1455 } else {
1456 *vm_group_bytes = 0;
1457 *dpte_group_bytes = 0;
1458 }
1459
1460 return PDEAndMetaPTEBytesFrame;
1461 }
1462
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1463 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1464 struct display_mode_lib *mode_lib)
1465 {
1466 struct vba_vars_st *locals = &mode_lib->vba;
1467 unsigned int j, k;
1468
1469 mode_lib->vba.WritebackDISPCLK = 0.0;
1470 mode_lib->vba.DISPCLKWithRamping = 0;
1471 mode_lib->vba.DISPCLKWithoutRamping = 0;
1472 mode_lib->vba.GlobalDPPCLK = 0.0;
1473
1474 // DISPCLK and DPPCLK Calculation
1475 //
1476 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1477 if (mode_lib->vba.WritebackEnable[k]) {
1478 mode_lib->vba.WritebackDISPCLK =
1479 dml_max(
1480 mode_lib->vba.WritebackDISPCLK,
1481 CalculateWriteBackDISPCLK(
1482 mode_lib->vba.WritebackPixelFormat[k],
1483 mode_lib->vba.PixelClock[k],
1484 mode_lib->vba.WritebackHRatio[k],
1485 mode_lib->vba.WritebackVRatio[k],
1486 mode_lib->vba.WritebackLumaHTaps[k],
1487 mode_lib->vba.WritebackLumaVTaps[k],
1488 mode_lib->vba.WritebackChromaHTaps[k],
1489 mode_lib->vba.WritebackChromaVTaps[k],
1490 mode_lib->vba.WritebackDestinationWidth[k],
1491 mode_lib->vba.HTotal[k],
1492 mode_lib->vba.WritebackChromaLineBufferWidth));
1493 }
1494 }
1495
1496 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1497 if (mode_lib->vba.HRatio[k] > 1) {
1498 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1499 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1500 mode_lib->vba.MaxPSCLToLBThroughput
1501 * mode_lib->vba.HRatio[k]
1502 / dml_ceil(
1503 mode_lib->vba.htaps[k]
1504 / 6.0,
1505 1));
1506 } else {
1507 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1508 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1509 mode_lib->vba.MaxPSCLToLBThroughput);
1510 }
1511
1512 mode_lib->vba.DPPCLKUsingSingleDPPLuma =
1513 mode_lib->vba.PixelClock[k]
1514 * dml_max(
1515 mode_lib->vba.vtaps[k] / 6.0
1516 * dml_min(
1517 1.0,
1518 mode_lib->vba.HRatio[k]),
1519 dml_max(
1520 mode_lib->vba.HRatio[k]
1521 * mode_lib->vba.VRatio[k]
1522 / locals->PSCL_THROUGHPUT_LUMA[k],
1523 1.0));
1524
1525 if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
1526 && mode_lib->vba.DPPCLKUsingSingleDPPLuma
1527 < 2 * mode_lib->vba.PixelClock[k]) {
1528 mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
1529 }
1530
1531 if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
1532 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
1533 locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1534 locals->DPPCLKUsingSingleDPP[k] =
1535 mode_lib->vba.DPPCLKUsingSingleDPPLuma;
1536 } else {
1537 if (mode_lib->vba.HRatio[k] > 1) {
1538 locals->PSCL_THROUGHPUT_CHROMA[k] =
1539 dml_min(
1540 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1541 mode_lib->vba.MaxPSCLToLBThroughput
1542 * mode_lib->vba.HRatio[k]
1543 / 2
1544 / dml_ceil(
1545 mode_lib->vba.HTAPsChroma[k]
1546 / 6.0,
1547 1.0));
1548 } else {
1549 locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1550 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1551 mode_lib->vba.MaxPSCLToLBThroughput);
1552 }
1553 mode_lib->vba.DPPCLKUsingSingleDPPChroma =
1554 mode_lib->vba.PixelClock[k]
1555 * dml_max(
1556 mode_lib->vba.VTAPsChroma[k]
1557 / 6.0
1558 * dml_min(
1559 1.0,
1560 mode_lib->vba.HRatio[k]
1561 / 2),
1562 dml_max(
1563 mode_lib->vba.HRatio[k]
1564 * mode_lib->vba.VRatio[k]
1565 / 4
1566 / locals->PSCL_THROUGHPUT_CHROMA[k],
1567 1.0));
1568
1569 if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
1570 && mode_lib->vba.DPPCLKUsingSingleDPPChroma
1571 < 2 * mode_lib->vba.PixelClock[k]) {
1572 mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
1573 * mode_lib->vba.PixelClock[k];
1574 }
1575
1576 locals->DPPCLKUsingSingleDPP[k] = dml_max(
1577 mode_lib->vba.DPPCLKUsingSingleDPPLuma,
1578 mode_lib->vba.DPPCLKUsingSingleDPPChroma);
1579 }
1580 }
1581
1582 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1583 if (mode_lib->vba.BlendingAndTiming[k] != k)
1584 continue;
1585 if (mode_lib->vba.ODMCombineEnabled[k]) {
1586 mode_lib->vba.DISPCLKWithRamping =
1587 dml_max(
1588 mode_lib->vba.DISPCLKWithRamping,
1589 mode_lib->vba.PixelClock[k] / 2
1590 * (1
1591 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1592 / 100)
1593 * (1
1594 + mode_lib->vba.DISPCLKRampingMargin
1595 / 100));
1596 mode_lib->vba.DISPCLKWithoutRamping =
1597 dml_max(
1598 mode_lib->vba.DISPCLKWithoutRamping,
1599 mode_lib->vba.PixelClock[k] / 2
1600 * (1
1601 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1602 / 100));
1603 } else if (!mode_lib->vba.ODMCombineEnabled[k]) {
1604 mode_lib->vba.DISPCLKWithRamping =
1605 dml_max(
1606 mode_lib->vba.DISPCLKWithRamping,
1607 mode_lib->vba.PixelClock[k]
1608 * (1
1609 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1610 / 100)
1611 * (1
1612 + mode_lib->vba.DISPCLKRampingMargin
1613 / 100));
1614 mode_lib->vba.DISPCLKWithoutRamping =
1615 dml_max(
1616 mode_lib->vba.DISPCLKWithoutRamping,
1617 mode_lib->vba.PixelClock[k]
1618 * (1
1619 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1620 / 100));
1621 }
1622 }
1623
1624 mode_lib->vba.DISPCLKWithRamping = dml_max(
1625 mode_lib->vba.DISPCLKWithRamping,
1626 mode_lib->vba.WritebackDISPCLK);
1627 mode_lib->vba.DISPCLKWithoutRamping = dml_max(
1628 mode_lib->vba.DISPCLKWithoutRamping,
1629 mode_lib->vba.WritebackDISPCLK);
1630
1631 ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
1632 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1633 mode_lib->vba.DISPCLKWithRamping,
1634 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1635 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1636 mode_lib->vba.DISPCLKWithoutRamping,
1637 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1638 mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1639 mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz,
1640 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1641 if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
1642 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1643 mode_lib->vba.DISPCLK_calculated =
1644 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
1645 } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
1646 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1647 mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
1648 } else {
1649 mode_lib->vba.DISPCLK_calculated =
1650 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
1651 }
1652 DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
1653
1654 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1655 mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k]
1656 / mode_lib->vba.DPPPerPlane[k]
1657 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1658 mode_lib->vba.GlobalDPPCLK = dml_max(
1659 mode_lib->vba.GlobalDPPCLK,
1660 mode_lib->vba.DPPCLK_calculated[k]);
1661 }
1662 mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
1663 mode_lib->vba.GlobalDPPCLK,
1664 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1665 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1666 mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
1667 * dml_ceil(
1668 mode_lib->vba.DPPCLK_calculated[k] * 255
1669 / mode_lib->vba.GlobalDPPCLK,
1670 1);
1671 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
1672 }
1673
1674 // Urgent and B P-State/DRAM Clock Change Watermark
1675 DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
1676 DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
1677 DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
1678
1679 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1680 bool MainPlaneDoesODMCombine = false;
1681
1682 if (mode_lib->vba.SourceScan[k] == dm_horz)
1683 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
1684 else
1685 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
1686
1687 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1688 MainPlaneDoesODMCombine = true;
1689 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
1690 if (mode_lib->vba.BlendingAndTiming[k] == j
1691 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1692 MainPlaneDoesODMCombine = true;
1693
1694 if (MainPlaneDoesODMCombine == true)
1695 locals->SwathWidthY[k] = dml_min(
1696 (double) locals->SwathWidthSingleDPPY[k],
1697 dml_round(
1698 mode_lib->vba.HActive[k] / 2.0
1699 * mode_lib->vba.HRatio[k]));
1700 else
1701 locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k]
1702 / mode_lib->vba.DPPPerPlane[k];
1703 }
1704
1705 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1706 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
1707 locals->BytePerPixelDETY[k] = 8;
1708 locals->BytePerPixelDETC[k] = 0;
1709 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
1710 locals->BytePerPixelDETY[k] = 4;
1711 locals->BytePerPixelDETC[k] = 0;
1712 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
1713 locals->BytePerPixelDETY[k] = 2;
1714 locals->BytePerPixelDETC[k] = 0;
1715 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
1716 locals->BytePerPixelDETY[k] = 1;
1717 locals->BytePerPixelDETC[k] = 0;
1718 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
1719 locals->BytePerPixelDETY[k] = 1;
1720 locals->BytePerPixelDETC[k] = 2;
1721 } else { // dm_420_10
1722 locals->BytePerPixelDETY[k] = 4.0 / 3.0;
1723 locals->BytePerPixelDETC[k] = 8.0 / 3.0;
1724 }
1725 }
1726
1727 mode_lib->vba.TotalDataReadBandwidth = 0.0;
1728 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1729 locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k]
1730 * dml_ceil(locals->BytePerPixelDETY[k], 1)
1731 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1732 * mode_lib->vba.VRatio[k];
1733 locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k]
1734 / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2)
1735 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1736 * mode_lib->vba.VRatio[k] / 2;
1737 DTRACE(
1738 " read_bw[%i] = %fBps",
1739 k,
1740 locals->ReadBandwidthPlaneLuma[k]
1741 + locals->ReadBandwidthPlaneChroma[k]);
1742 mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k]
1743 + locals->ReadBandwidthPlaneChroma[k];
1744 }
1745
1746 // DCFCLK Deep Sleep
1747 CalculateDCFCLKDeepSleep(
1748 mode_lib,
1749 mode_lib->vba.NumberOfActivePlanes,
1750 locals->BytePerPixelDETY,
1751 locals->BytePerPixelDETC,
1752 mode_lib->vba.VRatio,
1753 locals->SwathWidthY,
1754 mode_lib->vba.DPPPerPlane,
1755 mode_lib->vba.HRatio,
1756 mode_lib->vba.PixelClock,
1757 locals->PSCL_THROUGHPUT_LUMA,
1758 locals->PSCL_THROUGHPUT_CHROMA,
1759 locals->DPPCLK,
1760 &mode_lib->vba.DCFCLKDeepSleep);
1761
1762 // DSCCLK
1763 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1764 if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
1765 locals->DSCCLK_calculated[k] = 0.0;
1766 } else {
1767 if (mode_lib->vba.OutputFormat[k] == dm_420
1768 || mode_lib->vba.OutputFormat[k] == dm_n422)
1769 mode_lib->vba.DSCFormatFactor = 2;
1770 else
1771 mode_lib->vba.DSCFormatFactor = 1;
1772 if (mode_lib->vba.ODMCombineEnabled[k])
1773 locals->DSCCLK_calculated[k] =
1774 mode_lib->vba.PixelClockBackEnd[k] / 6
1775 / mode_lib->vba.DSCFormatFactor
1776 / (1
1777 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1778 / 100);
1779 else
1780 locals->DSCCLK_calculated[k] =
1781 mode_lib->vba.PixelClockBackEnd[k] / 3
1782 / mode_lib->vba.DSCFormatFactor
1783 / (1
1784 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1785 / 100);
1786 }
1787 }
1788
1789 // DSC Delay
1790 // TODO
1791 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1792 double bpp = mode_lib->vba.OutputBpp[k];
1793 unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
1794
1795 if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
1796 if (!mode_lib->vba.ODMCombineEnabled[k]) {
1797 locals->DSCDelay[k] =
1798 dscceComputeDelay(
1799 mode_lib->vba.DSCInputBitPerComponent[k],
1800 bpp,
1801 dml_ceil(
1802 (double) mode_lib->vba.HActive[k]
1803 / mode_lib->vba.NumberOfDSCSlices[k],
1804 1),
1805 slices,
1806 mode_lib->vba.OutputFormat[k])
1807 + dscComputeDelay(
1808 mode_lib->vba.OutputFormat[k]);
1809 } else {
1810 locals->DSCDelay[k] =
1811 2
1812 * (dscceComputeDelay(
1813 mode_lib->vba.DSCInputBitPerComponent[k],
1814 bpp,
1815 dml_ceil(
1816 (double) mode_lib->vba.HActive[k]
1817 / mode_lib->vba.NumberOfDSCSlices[k],
1818 1),
1819 slices / 2.0,
1820 mode_lib->vba.OutputFormat[k])
1821 + dscComputeDelay(
1822 mode_lib->vba.OutputFormat[k]));
1823 }
1824 locals->DSCDelay[k] = locals->DSCDelay[k]
1825 * mode_lib->vba.PixelClock[k]
1826 / mode_lib->vba.PixelClockBackEnd[k];
1827 } else {
1828 locals->DSCDelay[k] = 0;
1829 }
1830 }
1831
1832 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
1833 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
1834 if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
1835 && mode_lib->vba.DSCEnabled[j])
1836 locals->DSCDelay[k] = locals->DSCDelay[j];
1837
1838 // Prefetch
1839 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1840 unsigned int PDEAndMetaPTEBytesFrameY;
1841 unsigned int PixelPTEBytesPerRowY;
1842 unsigned int MetaRowByteY;
1843 unsigned int MetaRowByteC;
1844 unsigned int PDEAndMetaPTEBytesFrameC;
1845 unsigned int PixelPTEBytesPerRowC;
1846 bool PTEBufferSizeNotExceededY;
1847 bool PTEBufferSizeNotExceededC;
1848
1849 Calculate256BBlockSizes(
1850 mode_lib->vba.SourcePixelFormat[k],
1851 mode_lib->vba.SurfaceTiling[k],
1852 dml_ceil(locals->BytePerPixelDETY[k], 1),
1853 dml_ceil(locals->BytePerPixelDETC[k], 2),
1854 &locals->BlockHeight256BytesY[k],
1855 &locals->BlockHeight256BytesC[k],
1856 &locals->BlockWidth256BytesY[k],
1857 &locals->BlockWidth256BytesC[k]);
1858
1859 locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
1860 mode_lib,
1861 mode_lib->vba.VRatio[k],
1862 mode_lib->vba.vtaps[k],
1863 mode_lib->vba.Interlace[k],
1864 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1865 mode_lib->vba.SwathHeightY[k],
1866 mode_lib->vba.ViewportYStartY[k],
1867 &locals->VInitPreFillY[k],
1868 &locals->MaxNumSwathY[k]);
1869
1870 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
1871 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
1872 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
1873 && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
1874 PDEAndMetaPTEBytesFrameC =
1875 CalculateVMAndRowBytes(
1876 mode_lib,
1877 mode_lib->vba.DCCEnable[k],
1878 locals->BlockHeight256BytesC[k],
1879 locals->BlockWidth256BytesC[k],
1880 mode_lib->vba.SourcePixelFormat[k],
1881 mode_lib->vba.SurfaceTiling[k],
1882 dml_ceil(
1883 locals->BytePerPixelDETC[k],
1884 2),
1885 mode_lib->vba.SourceScan[k],
1886 mode_lib->vba.ViewportWidth[k] / 2,
1887 mode_lib->vba.ViewportHeight[k] / 2,
1888 locals->SwathWidthY[k] / 2,
1889 mode_lib->vba.GPUVMEnable,
1890 mode_lib->vba.HostVMEnable,
1891 mode_lib->vba.HostVMMaxPageTableLevels,
1892 mode_lib->vba.HostVMCachedPageTableLevels,
1893 mode_lib->vba.VMMPageSize,
1894 mode_lib->vba.PTEBufferSizeInRequestsChroma,
1895 mode_lib->vba.PitchC[k],
1896 mode_lib->vba.DCCMetaPitchC[k],
1897 &locals->MacroTileWidthC[k],
1898 &MetaRowByteC,
1899 &PixelPTEBytesPerRowC,
1900 &PTEBufferSizeNotExceededC,
1901 &locals->dpte_row_width_chroma_ub[k],
1902 &locals->dpte_row_height_chroma[k],
1903 &locals->meta_req_width_chroma[k],
1904 &locals->meta_req_height_chroma[k],
1905 &locals->meta_row_width_chroma[k],
1906 &locals->meta_row_height_chroma[k],
1907 &locals->vm_group_bytes_chroma,
1908 &locals->dpte_group_bytes_chroma,
1909 &locals->PixelPTEReqWidthC[k],
1910 &locals->PixelPTEReqHeightC[k],
1911 &locals->PTERequestSizeC[k],
1912 &locals->dpde0_bytes_per_frame_ub_c[k],
1913 &locals->meta_pte_bytes_per_frame_ub_c[k]);
1914
1915 locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
1916 mode_lib,
1917 mode_lib->vba.VRatio[k] / 2,
1918 mode_lib->vba.VTAPsChroma[k],
1919 mode_lib->vba.Interlace[k],
1920 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1921 mode_lib->vba.SwathHeightC[k],
1922 mode_lib->vba.ViewportYStartC[k],
1923 &locals->VInitPreFillC[k],
1924 &locals->MaxNumSwathC[k]);
1925 } else {
1926 PixelPTEBytesPerRowC = 0;
1927 PDEAndMetaPTEBytesFrameC = 0;
1928 MetaRowByteC = 0;
1929 locals->MaxNumSwathC[k] = 0;
1930 locals->PrefetchSourceLinesC[k] = 0;
1931 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
1932 }
1933
1934 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
1935 mode_lib,
1936 mode_lib->vba.DCCEnable[k],
1937 locals->BlockHeight256BytesY[k],
1938 locals->BlockWidth256BytesY[k],
1939 mode_lib->vba.SourcePixelFormat[k],
1940 mode_lib->vba.SurfaceTiling[k],
1941 dml_ceil(locals->BytePerPixelDETY[k], 1),
1942 mode_lib->vba.SourceScan[k],
1943 mode_lib->vba.ViewportWidth[k],
1944 mode_lib->vba.ViewportHeight[k],
1945 locals->SwathWidthY[k],
1946 mode_lib->vba.GPUVMEnable,
1947 mode_lib->vba.HostVMEnable,
1948 mode_lib->vba.HostVMMaxPageTableLevels,
1949 mode_lib->vba.HostVMCachedPageTableLevels,
1950 mode_lib->vba.VMMPageSize,
1951 locals->PTEBufferSizeInRequestsForLuma,
1952 mode_lib->vba.PitchY[k],
1953 mode_lib->vba.DCCMetaPitchY[k],
1954 &locals->MacroTileWidthY[k],
1955 &MetaRowByteY,
1956 &PixelPTEBytesPerRowY,
1957 &PTEBufferSizeNotExceededY,
1958 &locals->dpte_row_width_luma_ub[k],
1959 &locals->dpte_row_height[k],
1960 &locals->meta_req_width[k],
1961 &locals->meta_req_height[k],
1962 &locals->meta_row_width[k],
1963 &locals->meta_row_height[k],
1964 &locals->vm_group_bytes[k],
1965 &locals->dpte_group_bytes[k],
1966 &locals->PixelPTEReqWidthY[k],
1967 &locals->PixelPTEReqHeightY[k],
1968 &locals->PTERequestSizeY[k],
1969 &locals->dpde0_bytes_per_frame_ub_l[k],
1970 &locals->meta_pte_bytes_per_frame_ub_l[k]);
1971
1972 locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
1973 locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
1974 + PDEAndMetaPTEBytesFrameC;
1975 locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
1976
1977 CalculateActiveRowBandwidth(
1978 mode_lib->vba.GPUVMEnable,
1979 mode_lib->vba.SourcePixelFormat[k],
1980 mode_lib->vba.VRatio[k],
1981 mode_lib->vba.DCCEnable[k],
1982 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
1983 MetaRowByteY,
1984 MetaRowByteC,
1985 locals->meta_row_height[k],
1986 locals->meta_row_height_chroma[k],
1987 PixelPTEBytesPerRowY,
1988 PixelPTEBytesPerRowC,
1989 locals->dpte_row_height[k],
1990 locals->dpte_row_height_chroma[k],
1991 &locals->meta_row_bw[k],
1992 &locals->dpte_row_bw[k]);
1993 }
1994
1995 mode_lib->vba.TotalDCCActiveDPP = 0;
1996 mode_lib->vba.TotalActiveDPP = 0;
1997 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1998 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
1999 + mode_lib->vba.DPPPerPlane[k];
2000 if (mode_lib->vba.DCCEnable[k])
2001 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
2002 + mode_lib->vba.DPPPerPlane[k];
2003 }
2004
2005 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
2006 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2007 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2008 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
2009
2010 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
2011 (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
2012 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel
2013 * mode_lib->vba.NumberOfChannels
2014 / mode_lib->vba.ReturnBW;
2015
2016 mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency(
2017 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency,
2018 mode_lib->vba.TotalActiveDPP,
2019 mode_lib->vba.PixelChunkSizeInKByte,
2020 mode_lib->vba.TotalDCCActiveDPP,
2021 mode_lib->vba.MetaChunkSize,
2022 mode_lib->vba.ReturnBW,
2023 mode_lib->vba.GPUVMEnable,
2024 mode_lib->vba.HostVMEnable,
2025 mode_lib->vba.NumberOfActivePlanes,
2026 mode_lib->vba.DPPPerPlane,
2027 locals->dpte_group_bytes,
2028 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2029 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2030 mode_lib->vba.HostVMMaxPageTableLevels,
2031 mode_lib->vba.HostVMCachedPageTableLevels);
2032
2033
2034 mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
2035
2036 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2037 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2038 if (mode_lib->vba.WritebackEnable[k] == true) {
2039 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2040 mode_lib->vba.WritebackLatency
2041 + CalculateWriteBackDelay(
2042 mode_lib->vba.WritebackPixelFormat[k],
2043 mode_lib->vba.WritebackHRatio[k],
2044 mode_lib->vba.WritebackVRatio[k],
2045 mode_lib->vba.WritebackLumaHTaps[k],
2046 mode_lib->vba.WritebackLumaVTaps[k],
2047 mode_lib->vba.WritebackChromaHTaps[k],
2048 mode_lib->vba.WritebackChromaVTaps[k],
2049 mode_lib->vba.WritebackDestinationWidth[k])
2050 / mode_lib->vba.DISPCLK;
2051 } else
2052 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
2053 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2054 if (mode_lib->vba.BlendingAndTiming[j] == k
2055 && mode_lib->vba.WritebackEnable[j] == true) {
2056 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2057 dml_max(
2058 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k],
2059 mode_lib->vba.WritebackLatency
2060 + CalculateWriteBackDelay(
2061 mode_lib->vba.WritebackPixelFormat[j],
2062 mode_lib->vba.WritebackHRatio[j],
2063 mode_lib->vba.WritebackVRatio[j],
2064 mode_lib->vba.WritebackLumaHTaps[j],
2065 mode_lib->vba.WritebackLumaVTaps[j],
2066 mode_lib->vba.WritebackChromaHTaps[j],
2067 mode_lib->vba.WritebackChromaVTaps[j],
2068 mode_lib->vba.WritebackDestinationWidth[j])
2069 / mode_lib->vba.DISPCLK);
2070 }
2071 }
2072 }
2073 }
2074
2075 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2076 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
2077 if (mode_lib->vba.BlendingAndTiming[k] == j)
2078 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2079 locals->WritebackDelay[mode_lib->vba.VoltageLevel][j];
2080
2081 mode_lib->vba.VStartupLines = 13;
2082 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2083 locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
2084 }
2085
2086 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2087 locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]);
2088
2089 // We don't really care to iterate between the various prefetch modes
2090 //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode);
2091 mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly);
2092
2093 do {
2094 double MaxTotalRDBandwidth = 0;
2095 double MaxTotalRDBandwidthNoUrgentBurst = 0;
2096 bool DestinationLineTimesForPrefetchLessThan2 = false;
2097 bool VRatioPrefetchMoreThan4 = false;
2098 double TWait = CalculateTWait(
2099 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2100 mode_lib->vba.DRAMClockChangeLatency,
2101 mode_lib->vba.UrgentLatency,
2102 mode_lib->vba.SREnterPlusExitTime);
2103
2104 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2105 Pipe myPipe;
2106 HostVM myHostVM;
2107
2108 if (mode_lib->vba.XFCEnabled[k] == true) {
2109 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
2110 CalculateRemoteSurfaceFlipDelay(
2111 mode_lib,
2112 mode_lib->vba.VRatio[k],
2113 locals->SwathWidthY[k],
2114 dml_ceil(
2115 locals->BytePerPixelDETY[k],
2116 1),
2117 mode_lib->vba.HTotal[k]
2118 / mode_lib->vba.PixelClock[k],
2119 mode_lib->vba.XFCTSlvVupdateOffset,
2120 mode_lib->vba.XFCTSlvVupdateWidth,
2121 mode_lib->vba.XFCTSlvVreadyOffset,
2122 mode_lib->vba.XFCXBUFLatencyTolerance,
2123 mode_lib->vba.XFCFillBWOverhead,
2124 mode_lib->vba.XFCSlvChunkSize,
2125 mode_lib->vba.XFCBusTransportTime,
2126 mode_lib->vba.TCalc,
2127 TWait,
2128 &mode_lib->vba.SrcActiveDrainRate,
2129 &mode_lib->vba.TInitXFill,
2130 &mode_lib->vba.TslvChk);
2131 } else {
2132 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
2133 }
2134
2135 myPipe.DPPCLK = locals->DPPCLK[k];
2136 myPipe.DISPCLK = mode_lib->vba.DISPCLK;
2137 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
2138 myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep;
2139 myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k];
2140 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
2141 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
2142 myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
2143 myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
2144 myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
2145 myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
2146 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
2147 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
2148 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
2149 myPipe.HTotal = mode_lib->vba.HTotal[k];
2150
2151
2152 myHostVM.Enable = mode_lib->vba.HostVMEnable;
2153 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
2154 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
2155
2156 mode_lib->vba.ErrorResult[k] =
2157 CalculatePrefetchSchedule(
2158 mode_lib,
2159 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2160 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2161 &myPipe,
2162 locals->DSCDelay[k],
2163 mode_lib->vba.DPPCLKDelaySubtotal,
2164 mode_lib->vba.DPPCLKDelaySCL,
2165 mode_lib->vba.DPPCLKDelaySCLLBOnly,
2166 mode_lib->vba.DPPCLKDelayCNVCFormater,
2167 mode_lib->vba.DPPCLKDelayCNVCCursor,
2168 mode_lib->vba.DISPCLKDelaySubtotal,
2169 (unsigned int) (locals->SwathWidthY[k]
2170 / mode_lib->vba.HRatio[k]),
2171 mode_lib->vba.OutputFormat[k],
2172 mode_lib->vba.MaxInterDCNTileRepeaters,
2173 dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]),
2174 locals->MaxVStartupLines[k],
2175 mode_lib->vba.GPUVMMaxPageTableLevels,
2176 mode_lib->vba.GPUVMEnable,
2177 &myHostVM,
2178 mode_lib->vba.DynamicMetadataEnable[k],
2179 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
2180 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
2181 mode_lib->vba.DCCEnable[k],
2182 mode_lib->vba.UrgentLatency,
2183 mode_lib->vba.UrgentExtraLatency,
2184 mode_lib->vba.TCalc,
2185 locals->PDEAndMetaPTEBytesFrame[k],
2186 locals->MetaRowByte[k],
2187 locals->PixelPTEBytesPerRow[k],
2188 locals->PrefetchSourceLinesY[k],
2189 locals->SwathWidthY[k],
2190 locals->BytePerPixelDETY[k],
2191 locals->VInitPreFillY[k],
2192 locals->MaxNumSwathY[k],
2193 locals->PrefetchSourceLinesC[k],
2194 locals->BytePerPixelDETC[k],
2195 locals->VInitPreFillC[k],
2196 locals->MaxNumSwathC[k],
2197 mode_lib->vba.SwathHeightY[k],
2198 mode_lib->vba.SwathHeightC[k],
2199 TWait,
2200 mode_lib->vba.XFCEnabled[k],
2201 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
2202 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
2203 &locals->DSTXAfterScaler[k],
2204 &locals->DSTYAfterScaler[k],
2205 &locals->DestinationLinesForPrefetch[k],
2206 &locals->PrefetchBandwidth[k],
2207 &locals->DestinationLinesToRequestVMInVBlank[k],
2208 &locals->DestinationLinesToRequestRowInVBlank[k],
2209 &locals->VRatioPrefetchY[k],
2210 &locals->VRatioPrefetchC[k],
2211 &locals->RequiredPrefetchPixDataBWLuma[k],
2212 &locals->RequiredPrefetchPixDataBWChroma[k],
2213 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
2214 &locals->Tno_bw[k],
2215 &locals->prefetch_vmrow_bw[k],
2216 &locals->swath_width_luma_ub[k],
2217 &locals->swath_width_chroma_ub[k],
2218 &mode_lib->vba.VUpdateOffsetPix[k],
2219 &mode_lib->vba.VUpdateWidthPix[k],
2220 &mode_lib->vba.VReadyOffsetPix[k]);
2221 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2222 locals->VStartup[k] = dml_min(
2223 mode_lib->vba.VStartupLines,
2224 locals->MaxVStartupLines[k]);
2225 if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
2226 != 0) {
2227 locals->VStartup[k] =
2228 locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
2229 }
2230 } else {
2231 locals->VStartup[k] =
2232 dml_min(
2233 mode_lib->vba.VStartupLines,
2234 locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
2235 }
2236 }
2237
2238 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2239 unsigned int m;
2240
2241 locals->cursor_bw[k] = 0;
2242 locals->cursor_bw_pre[k] = 0;
2243 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
2244 locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
2245 locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k];
2246 }
2247
2248 CalculateUrgentBurstFactor(
2249 mode_lib->vba.DETBufferSizeInKByte,
2250 mode_lib->vba.SwathHeightY[k],
2251 mode_lib->vba.SwathHeightC[k],
2252 locals->SwathWidthY[k],
2253 mode_lib->vba.HTotal[k] /
2254 mode_lib->vba.PixelClock[k],
2255 mode_lib->vba.UrgentLatency,
2256 mode_lib->vba.CursorBufferSize,
2257 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
2258 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
2259 mode_lib->vba.VRatio[k],
2260 locals->VRatioPrefetchY[k],
2261 locals->VRatioPrefetchC[k],
2262 locals->BytePerPixelDETY[k],
2263 locals->BytePerPixelDETC[k],
2264 &locals->UrgentBurstFactorCursor[k],
2265 &locals->UrgentBurstFactorCursorPre[k],
2266 &locals->UrgentBurstFactorLuma[k],
2267 &locals->UrgentBurstFactorLumaPre[k],
2268 &locals->UrgentBurstFactorChroma[k],
2269 &locals->UrgentBurstFactorChromaPre[k],
2270 &locals->NotEnoughUrgentLatencyHiding,
2271 &locals->NotEnoughUrgentLatencyHidingPre);
2272
2273 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
2274 locals->UrgentBurstFactorLuma[k] = 1;
2275 locals->UrgentBurstFactorChroma[k] = 1;
2276 locals->UrgentBurstFactorCursor[k] = 1;
2277 locals->UrgentBurstFactorLumaPre[k] = 1;
2278 locals->UrgentBurstFactorChromaPre[k] = 1;
2279 locals->UrgentBurstFactorCursorPre[k] = 1;
2280 }
2281
2282 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2283 dml_max3(locals->prefetch_vmrow_bw[k],
2284 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2285 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k]
2286 * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2287 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k]
2288 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2289
2290 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2291 dml_max3(locals->prefetch_vmrow_bw[k],
2292 locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k]
2293 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2294 locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2295
2296 if (locals->DestinationLinesForPrefetch[k] < 2)
2297 DestinationLineTimesForPrefetchLessThan2 = true;
2298 if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4)
2299 VRatioPrefetchMoreThan4 = true;
2300 }
2301 mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
2302
2303 if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2304 && !DestinationLineTimesForPrefetchLessThan2)
2305 mode_lib->vba.PrefetchModeSupported = true;
2306 else {
2307 mode_lib->vba.PrefetchModeSupported = false;
2308 dml_print(
2309 "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2310 }
2311
2312 if (mode_lib->vba.PrefetchModeSupported == true) {
2313 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
2314 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2315 mode_lib->vba.BandwidthAvailableForImmediateFlip =
2316 mode_lib->vba.BandwidthAvailableForImmediateFlip
2317 - dml_max(
2318 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2319 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k]
2320 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2321 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] +
2322 locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] +
2323 locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2324 }
2325
2326 mode_lib->vba.TotImmediateFlipBytes = 0;
2327 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2328 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k];
2329 }
2330 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2331 CalculateFlipSchedule(
2332 mode_lib,
2333 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2334 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2335 mode_lib->vba.UrgentExtraLatency,
2336 mode_lib->vba.UrgentLatency,
2337 mode_lib->vba.GPUVMMaxPageTableLevels,
2338 mode_lib->vba.HostVMEnable,
2339 mode_lib->vba.HostVMMaxPageTableLevels,
2340 mode_lib->vba.HostVMCachedPageTableLevels,
2341 mode_lib->vba.GPUVMEnable,
2342 locals->PDEAndMetaPTEBytesFrame[k],
2343 locals->MetaRowByte[k],
2344 locals->PixelPTEBytesPerRow[k],
2345 mode_lib->vba.BandwidthAvailableForImmediateFlip,
2346 mode_lib->vba.TotImmediateFlipBytes,
2347 mode_lib->vba.SourcePixelFormat[k],
2348 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2349 mode_lib->vba.VRatio[k],
2350 locals->Tno_bw[k],
2351 mode_lib->vba.DCCEnable[k],
2352 locals->dpte_row_height[k],
2353 locals->meta_row_height[k],
2354 locals->dpte_row_height_chroma[k],
2355 locals->meta_row_height_chroma[k],
2356 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
2357 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
2358 &locals->final_flip_bw[k],
2359 &locals->ImmediateFlipSupportedForPipe[k]);
2360 }
2361 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
2362 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2363 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2364 mode_lib->vba.total_dcn_read_bw_with_flip =
2365 mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
2366 locals->prefetch_vmrow_bw[k],
2367 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
2368 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2369 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k]
2370 + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k]
2371 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2372 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst =
2373 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst +
2374 dml_max3(locals->prefetch_vmrow_bw[k],
2375 locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k],
2376 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2377
2378 }
2379 mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW;
2380
2381 mode_lib->vba.ImmediateFlipSupported = true;
2382 if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
2383 mode_lib->vba.ImmediateFlipSupported = false;
2384 }
2385 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2386 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
2387 mode_lib->vba.ImmediateFlipSupported = false;
2388 }
2389 }
2390 } else {
2391 mode_lib->vba.ImmediateFlipSupported = false;
2392 }
2393
2394 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2395 if (mode_lib->vba.ErrorResult[k]) {
2396 mode_lib->vba.PrefetchModeSupported = false;
2397 dml_print(
2398 "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2399 }
2400 }
2401
2402 mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1;
2403 } while (!((mode_lib->vba.PrefetchModeSupported
2404 && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable)
2405 || mode_lib->vba.ImmediateFlipSupported))
2406 || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines));
2407
2408 //Watermarks and NB P-State/DRAM Clock Change Support
2409 {
2410 enum clock_change_support DRAMClockChangeSupport; // dummy
2411 CalculateWatermarksAndDRAMSpeedChangeSupport(
2412 mode_lib,
2413 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2414 mode_lib->vba.NumberOfActivePlanes,
2415 mode_lib->vba.MaxLineBufferLines,
2416 mode_lib->vba.LineBufferSize,
2417 mode_lib->vba.DPPOutputBufferPixels,
2418 mode_lib->vba.DETBufferSizeInKByte,
2419 mode_lib->vba.WritebackInterfaceLumaBufferSize,
2420 mode_lib->vba.WritebackInterfaceChromaBufferSize,
2421 mode_lib->vba.DCFCLK,
2422 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
2423 mode_lib->vba.ReturnBW,
2424 mode_lib->vba.GPUVMEnable,
2425 locals->dpte_group_bytes,
2426 mode_lib->vba.MetaChunkSize,
2427 mode_lib->vba.UrgentLatency,
2428 mode_lib->vba.UrgentExtraLatency,
2429 mode_lib->vba.WritebackLatency,
2430 mode_lib->vba.WritebackChunkSize,
2431 mode_lib->vba.SOCCLK,
2432 mode_lib->vba.DRAMClockChangeLatency,
2433 mode_lib->vba.SRExitTime,
2434 mode_lib->vba.SREnterPlusExitTime,
2435 mode_lib->vba.DCFCLKDeepSleep,
2436 mode_lib->vba.DPPPerPlane,
2437 mode_lib->vba.DCCEnable,
2438 locals->DPPCLK,
2439 locals->SwathWidthSingleDPPY,
2440 mode_lib->vba.SwathHeightY,
2441 locals->ReadBandwidthPlaneLuma,
2442 mode_lib->vba.SwathHeightC,
2443 locals->ReadBandwidthPlaneChroma,
2444 mode_lib->vba.LBBitPerPixel,
2445 locals->SwathWidthY,
2446 mode_lib->vba.HRatio,
2447 mode_lib->vba.vtaps,
2448 mode_lib->vba.VTAPsChroma,
2449 mode_lib->vba.VRatio,
2450 mode_lib->vba.HTotal,
2451 mode_lib->vba.PixelClock,
2452 mode_lib->vba.BlendingAndTiming,
2453 locals->BytePerPixelDETY,
2454 locals->BytePerPixelDETC,
2455 mode_lib->vba.WritebackEnable,
2456 mode_lib->vba.WritebackPixelFormat,
2457 mode_lib->vba.WritebackDestinationWidth,
2458 mode_lib->vba.WritebackDestinationHeight,
2459 mode_lib->vba.WritebackSourceHeight,
2460 &DRAMClockChangeSupport,
2461 &mode_lib->vba.UrgentWatermark,
2462 &mode_lib->vba.WritebackUrgentWatermark,
2463 &mode_lib->vba.DRAMClockChangeWatermark,
2464 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
2465 &mode_lib->vba.StutterExitWatermark,
2466 &mode_lib->vba.StutterEnterPlusExitWatermark,
2467 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
2468 }
2469
2470
2471 //Display Pipeline Delivery Time in Prefetch, Groups
2472 CalculatePixelDeliveryTimes(
2473 mode_lib->vba.NumberOfActivePlanes,
2474 mode_lib->vba.VRatio,
2475 locals->VRatioPrefetchY,
2476 locals->VRatioPrefetchC,
2477 locals->swath_width_luma_ub,
2478 locals->swath_width_chroma_ub,
2479 mode_lib->vba.DPPPerPlane,
2480 mode_lib->vba.HRatio,
2481 mode_lib->vba.PixelClock,
2482 locals->PSCL_THROUGHPUT_LUMA,
2483 locals->PSCL_THROUGHPUT_CHROMA,
2484 locals->DPPCLK,
2485 locals->BytePerPixelDETC,
2486 mode_lib->vba.SourceScan,
2487 locals->BlockWidth256BytesY,
2488 locals->BlockHeight256BytesY,
2489 locals->BlockWidth256BytesC,
2490 locals->BlockHeight256BytesC,
2491 locals->DisplayPipeLineDeliveryTimeLuma,
2492 locals->DisplayPipeLineDeliveryTimeChroma,
2493 locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
2494 locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
2495 locals->DisplayPipeRequestDeliveryTimeLuma,
2496 locals->DisplayPipeRequestDeliveryTimeChroma,
2497 locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2498 locals->DisplayPipeRequestDeliveryTimeChromaPrefetch);
2499
2500 CalculateMetaAndPTETimes(
2501 mode_lib->vba.NumberOfActivePlanes,
2502 mode_lib->vba.GPUVMEnable,
2503 mode_lib->vba.MetaChunkSize,
2504 mode_lib->vba.MinMetaChunkSizeBytes,
2505 mode_lib->vba.GPUVMMaxPageTableLevels,
2506 mode_lib->vba.HTotal,
2507 mode_lib->vba.VRatio,
2508 locals->VRatioPrefetchY,
2509 locals->VRatioPrefetchC,
2510 locals->DestinationLinesToRequestRowInVBlank,
2511 locals->DestinationLinesToRequestRowInImmediateFlip,
2512 locals->DestinationLinesToRequestVMInVBlank,
2513 locals->DestinationLinesToRequestVMInImmediateFlip,
2514 mode_lib->vba.DCCEnable,
2515 mode_lib->vba.PixelClock,
2516 locals->BytePerPixelDETY,
2517 locals->BytePerPixelDETC,
2518 mode_lib->vba.SourceScan,
2519 locals->dpte_row_height,
2520 locals->dpte_row_height_chroma,
2521 locals->meta_row_width,
2522 locals->meta_row_height,
2523 locals->meta_req_width,
2524 locals->meta_req_height,
2525 locals->dpte_group_bytes,
2526 locals->PTERequestSizeY,
2527 locals->PTERequestSizeC,
2528 locals->PixelPTEReqWidthY,
2529 locals->PixelPTEReqHeightY,
2530 locals->PixelPTEReqWidthC,
2531 locals->PixelPTEReqHeightC,
2532 locals->dpte_row_width_luma_ub,
2533 locals->dpte_row_width_chroma_ub,
2534 locals->vm_group_bytes,
2535 locals->dpde0_bytes_per_frame_ub_l,
2536 locals->dpde0_bytes_per_frame_ub_c,
2537 locals->meta_pte_bytes_per_frame_ub_l,
2538 locals->meta_pte_bytes_per_frame_ub_c,
2539 locals->DST_Y_PER_PTE_ROW_NOM_L,
2540 locals->DST_Y_PER_PTE_ROW_NOM_C,
2541 locals->DST_Y_PER_META_ROW_NOM_L,
2542 locals->TimePerMetaChunkNominal,
2543 locals->TimePerMetaChunkVBlank,
2544 locals->TimePerMetaChunkFlip,
2545 locals->time_per_pte_group_nom_luma,
2546 locals->time_per_pte_group_vblank_luma,
2547 locals->time_per_pte_group_flip_luma,
2548 locals->time_per_pte_group_nom_chroma,
2549 locals->time_per_pte_group_vblank_chroma,
2550 locals->time_per_pte_group_flip_chroma,
2551 locals->TimePerVMGroupVBlank,
2552 locals->TimePerVMGroupFlip,
2553 locals->TimePerVMRequestVBlank,
2554 locals->TimePerVMRequestFlip);
2555
2556
2557 // Min TTUVBlank
2558 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2559 if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
2560 locals->AllowDRAMClockChangeDuringVBlank[k] = true;
2561 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2562 locals->MinTTUVBlank[k] = dml_max(
2563 mode_lib->vba.DRAMClockChangeWatermark,
2564 dml_max(
2565 mode_lib->vba.StutterEnterPlusExitWatermark,
2566 mode_lib->vba.UrgentWatermark));
2567 } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) {
2568 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2569 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2570 locals->MinTTUVBlank[k] = dml_max(
2571 mode_lib->vba.StutterEnterPlusExitWatermark,
2572 mode_lib->vba.UrgentWatermark);
2573 } else {
2574 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2575 locals->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2576 locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark;
2577 }
2578 if (!mode_lib->vba.DynamicMetadataEnable[k])
2579 locals->MinTTUVBlank[k] = mode_lib->vba.TCalc
2580 + locals->MinTTUVBlank[k];
2581 }
2582
2583 // DCC Configuration
2584 mode_lib->vba.ActiveDPPs = 0;
2585 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2586 locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration(
2587 mode_lib->vba.DCCEnable[k],
2588 false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2589 mode_lib->vba.ViewportWidth[k],
2590 mode_lib->vba.ViewportHeight[k],
2591 mode_lib->vba.DETBufferSizeInKByte * 1024,
2592 locals->BlockHeight256BytesY[k],
2593 mode_lib->vba.SwathHeightY[k],
2594 mode_lib->vba.SurfaceTiling[k],
2595 locals->BytePerPixelDETY[k],
2596 mode_lib->vba.SourceScan[k],
2597 &locals->DCCYMaxUncompressedBlock[k],
2598 &locals->DCCYMaxCompressedBlock[k],
2599 &locals->DCCYIndependent64ByteBlock[k]);
2600 }
2601
2602 //XFC Parameters:
2603 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2604 if (mode_lib->vba.XFCEnabled[k] == true) {
2605 double TWait;
2606
2607 locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset;
2608 locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth;
2609 locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset;
2610 TWait = CalculateTWait(
2611 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2612 mode_lib->vba.DRAMClockChangeLatency,
2613 mode_lib->vba.UrgentLatency,
2614 mode_lib->vba.SREnterPlusExitTime);
2615 mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay(
2616 mode_lib,
2617 mode_lib->vba.VRatio[k],
2618 locals->SwathWidthY[k],
2619 dml_ceil(locals->BytePerPixelDETY[k], 1),
2620 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2621 mode_lib->vba.XFCTSlvVupdateOffset,
2622 mode_lib->vba.XFCTSlvVupdateWidth,
2623 mode_lib->vba.XFCTSlvVreadyOffset,
2624 mode_lib->vba.XFCXBUFLatencyTolerance,
2625 mode_lib->vba.XFCFillBWOverhead,
2626 mode_lib->vba.XFCSlvChunkSize,
2627 mode_lib->vba.XFCBusTransportTime,
2628 mode_lib->vba.TCalc,
2629 TWait,
2630 &mode_lib->vba.SrcActiveDrainRate,
2631 &mode_lib->vba.TInitXFill,
2632 &mode_lib->vba.TslvChk);
2633 locals->XFCRemoteSurfaceFlipLatency[k] =
2634 dml_floor(
2635 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2636 / (mode_lib->vba.HTotal[k]
2637 / mode_lib->vba.PixelClock[k]),
2638 1);
2639 locals->XFCTransferDelay[k] =
2640 dml_ceil(
2641 mode_lib->vba.XFCBusTransportTime
2642 / (mode_lib->vba.HTotal[k]
2643 / mode_lib->vba.PixelClock[k]),
2644 1);
2645 locals->XFCPrechargeDelay[k] =
2646 dml_ceil(
2647 (mode_lib->vba.XFCBusTransportTime
2648 + mode_lib->vba.TInitXFill
2649 + mode_lib->vba.TslvChk)
2650 / (mode_lib->vba.HTotal[k]
2651 / mode_lib->vba.PixelClock[k]),
2652 1);
2653 mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance
2654 * mode_lib->vba.SrcActiveDrainRate;
2655 mode_lib->vba.FinalFillMargin =
2656 (locals->DestinationLinesToRequestVMInVBlank[k]
2657 + locals->DestinationLinesToRequestRowInVBlank[k])
2658 * mode_lib->vba.HTotal[k]
2659 / mode_lib->vba.PixelClock[k]
2660 * mode_lib->vba.SrcActiveDrainRate
2661 + mode_lib->vba.XFCFillConstant;
2662 mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay
2663 * mode_lib->vba.SrcActiveDrainRate
2664 + mode_lib->vba.FinalFillMargin;
2665 mode_lib->vba.RemainingFillLevel = dml_max(
2666 0.0,
2667 mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel);
2668 mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel
2669 / (mode_lib->vba.SrcActiveDrainRate
2670 * mode_lib->vba.XFCFillBWOverhead / 100);
2671 locals->XFCPrefetchMargin[k] =
2672 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2673 + mode_lib->vba.TFinalxFill
2674 + (locals->DestinationLinesToRequestVMInVBlank[k]
2675 + locals->DestinationLinesToRequestRowInVBlank[k])
2676 * mode_lib->vba.HTotal[k]
2677 / mode_lib->vba.PixelClock[k];
2678 } else {
2679 locals->XFCSlaveVUpdateOffset[k] = 0;
2680 locals->XFCSlaveVupdateWidth[k] = 0;
2681 locals->XFCSlaveVReadyOffset[k] = 0;
2682 locals->XFCRemoteSurfaceFlipLatency[k] = 0;
2683 locals->XFCPrechargeDelay[k] = 0;
2684 locals->XFCTransferDelay[k] = 0;
2685 locals->XFCPrefetchMargin[k] = 0;
2686 }
2687 }
2688
2689 // Stutter Efficiency
2690 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2691 CalculateDETBufferSize(
2692 mode_lib->vba.DETBufferSizeInKByte,
2693 mode_lib->vba.SwathHeightY[k],
2694 mode_lib->vba.SwathHeightC[k],
2695 &locals->DETBufferSizeY[k],
2696 &locals->DETBufferSizeC[k]);
2697
2698 locals->LinesInDETY[k] = locals->DETBufferSizeY[k]
2699 / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k];
2700 locals->LinesInDETYRoundedDownToSwath[k] = dml_floor(
2701 locals->LinesInDETY[k],
2702 mode_lib->vba.SwathHeightY[k]);
2703 locals->FullDETBufferingTimeY[k] =
2704 locals->LinesInDETYRoundedDownToSwath[k]
2705 * (mode_lib->vba.HTotal[k]
2706 / mode_lib->vba.PixelClock[k])
2707 / mode_lib->vba.VRatio[k];
2708 }
2709
2710 mode_lib->vba.StutterPeriod = 999999.0;
2711 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2712 if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) {
2713 mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k];
2714 mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
2715 (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
2716 / mode_lib->vba.PixelClock[k];
2717 locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1);
2718 locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k];
2719 locals->LinesToFinishSwathTransferStutterCriticalPlane =
2720 mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]);
2721 }
2722 }
2723
2724 mode_lib->vba.AverageReadBandwidth = 0.0;
2725 mode_lib->vba.TotalRowReadBandwidth = 0.0;
2726 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2727 unsigned int DCCRateLimit;
2728
2729 if (mode_lib->vba.DCCEnable[k]) {
2730 if (locals->DCCYMaxCompressedBlock[k] == 256)
2731 DCCRateLimit = 4;
2732 else
2733 DCCRateLimit = 2;
2734
2735 mode_lib->vba.AverageReadBandwidth =
2736 mode_lib->vba.AverageReadBandwidth
2737 + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) /
2738 dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit);
2739 } else {
2740 mode_lib->vba.AverageReadBandwidth =
2741 mode_lib->vba.AverageReadBandwidth
2742 + locals->ReadBandwidthPlaneLuma[k]
2743 + locals->ReadBandwidthPlaneChroma[k];
2744 }
2745 mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth +
2746 locals->meta_row_bw[k] + locals->dpte_row_bw[k];
2747 }
2748
2749 mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth;
2750
2751 mode_lib->vba.PartOfBurstThatFitsInROB =
2752 dml_min(
2753 mode_lib->vba.StutterPeriod
2754 * mode_lib->vba.TotalDataReadBandwidth,
2755 mode_lib->vba.ROBBufferSizeInKByte * 1024
2756 * mode_lib->vba.AverageDCCCompressionRate);
2757 mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
2758 / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW
2759 + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth
2760 - mode_lib->vba.PartOfBurstThatFitsInROB)
2761 / (mode_lib->vba.DCFCLK * 64)
2762 + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW;
2763 mode_lib->vba.StutterBurstTime = dml_max(
2764 mode_lib->vba.StutterBurstTime,
2765 (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane *
2766 locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW)
2767 );
2768
2769 mode_lib->vba.TotalActiveWriteback = 0;
2770 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2771 if (mode_lib->vba.WritebackEnable[k] == true) {
2772 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
2773 }
2774 }
2775
2776 if (mode_lib->vba.TotalActiveWriteback == 0) {
2777 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
2778 - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
2779 / mode_lib->vba.StutterPeriod) * 100;
2780 } else {
2781 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
2782 }
2783
2784 mode_lib->vba.SmallestVBlank = 999999;
2785 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2786 if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
2787 mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
2788 - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
2789 / mode_lib->vba.PixelClock[k];
2790 } else {
2791 mode_lib->vba.VBlankTime = 0;
2792 }
2793 mode_lib->vba.SmallestVBlank = dml_min(
2794 mode_lib->vba.SmallestVBlank,
2795 mode_lib->vba.VBlankTime);
2796 }
2797
2798 mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
2799 * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
2800 - mode_lib->vba.SmallestVBlank)
2801 + mode_lib->vba.SmallestVBlank)
2802 / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
2803 }
2804
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)2805 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
2806 {
2807 // Display Pipe Configuration
2808 double BytePerPixDETY;
2809 double BytePerPixDETC;
2810 double Read256BytesBlockHeightY;
2811 double Read256BytesBlockHeightC;
2812 double Read256BytesBlockWidthY;
2813 double Read256BytesBlockWidthC;
2814 double MaximumSwathHeightY;
2815 double MaximumSwathHeightC;
2816 double MinimumSwathHeightY;
2817 double MinimumSwathHeightC;
2818 double SwathWidth;
2819 double SwathWidthGranularityY;
2820 double SwathWidthGranularityC;
2821 double RoundedUpMaxSwathSizeBytesY;
2822 double RoundedUpMaxSwathSizeBytesC;
2823 unsigned int j, k;
2824
2825 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2826 bool MainPlaneDoesODMCombine = false;
2827
2828 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2829 BytePerPixDETY = 8;
2830 BytePerPixDETC = 0;
2831 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
2832 BytePerPixDETY = 4;
2833 BytePerPixDETC = 0;
2834 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2835 BytePerPixDETY = 2;
2836 BytePerPixDETC = 0;
2837 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
2838 BytePerPixDETY = 1;
2839 BytePerPixDETC = 0;
2840 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2841 BytePerPixDETY = 1;
2842 BytePerPixDETC = 2;
2843 } else {
2844 BytePerPixDETY = 4.0 / 3.0;
2845 BytePerPixDETC = 8.0 / 3.0;
2846 }
2847
2848 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2849 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2850 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2851 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2852 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2853 Read256BytesBlockHeightY = 1;
2854 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2855 Read256BytesBlockHeightY = 4;
2856 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2857 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2858 Read256BytesBlockHeightY = 8;
2859 } else {
2860 Read256BytesBlockHeightY = 16;
2861 }
2862 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2863 / Read256BytesBlockHeightY;
2864 Read256BytesBlockHeightC = 0;
2865 Read256BytesBlockWidthC = 0;
2866 } else {
2867 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2868 Read256BytesBlockHeightY = 1;
2869 Read256BytesBlockHeightC = 1;
2870 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2871 Read256BytesBlockHeightY = 16;
2872 Read256BytesBlockHeightC = 8;
2873 } else {
2874 Read256BytesBlockHeightY = 8;
2875 Read256BytesBlockHeightC = 8;
2876 }
2877 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2878 / Read256BytesBlockHeightY;
2879 Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2)
2880 / Read256BytesBlockHeightC;
2881 }
2882
2883 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2884 MaximumSwathHeightY = Read256BytesBlockHeightY;
2885 MaximumSwathHeightC = Read256BytesBlockHeightC;
2886 } else {
2887 MaximumSwathHeightY = Read256BytesBlockWidthY;
2888 MaximumSwathHeightC = Read256BytesBlockWidthC;
2889 }
2890
2891 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2892 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2893 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2894 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2895 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
2896 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2897 && (mode_lib->vba.SurfaceTiling[k]
2898 == dm_sw_4kb_s
2899 || mode_lib->vba.SurfaceTiling[k]
2900 == dm_sw_4kb_s_x
2901 || mode_lib->vba.SurfaceTiling[k]
2902 == dm_sw_64kb_s
2903 || mode_lib->vba.SurfaceTiling[k]
2904 == dm_sw_64kb_s_t
2905 || mode_lib->vba.SurfaceTiling[k]
2906 == dm_sw_64kb_s_x
2907 || mode_lib->vba.SurfaceTiling[k]
2908 == dm_sw_var_s
2909 || mode_lib->vba.SurfaceTiling[k]
2910 == dm_sw_var_s_x)
2911 && mode_lib->vba.SourceScan[k] == dm_horz)) {
2912 MinimumSwathHeightY = MaximumSwathHeightY;
2913 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8
2914 && mode_lib->vba.SourceScan[k] != dm_horz) {
2915 MinimumSwathHeightY = MaximumSwathHeightY;
2916 } else {
2917 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2918 }
2919 MinimumSwathHeightC = MaximumSwathHeightC;
2920 } else {
2921 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2922 MinimumSwathHeightY = MaximumSwathHeightY;
2923 MinimumSwathHeightC = MaximumSwathHeightC;
2924 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
2925 && mode_lib->vba.SourceScan[k] == dm_horz) {
2926 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2927 MinimumSwathHeightC = MaximumSwathHeightC;
2928 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
2929 && mode_lib->vba.SourceScan[k] == dm_horz) {
2930 MinimumSwathHeightC = MaximumSwathHeightC / 2.0;
2931 MinimumSwathHeightY = MaximumSwathHeightY;
2932 } else {
2933 MinimumSwathHeightY = MaximumSwathHeightY;
2934 MinimumSwathHeightC = MaximumSwathHeightC;
2935 }
2936 }
2937
2938 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2939 SwathWidth = mode_lib->vba.ViewportWidth[k];
2940 } else {
2941 SwathWidth = mode_lib->vba.ViewportHeight[k];
2942 }
2943
2944 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2945 MainPlaneDoesODMCombine = true;
2946 }
2947 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2948 if (mode_lib->vba.BlendingAndTiming[k] == j
2949 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2950 MainPlaneDoesODMCombine = true;
2951 }
2952 }
2953
2954 if (MainPlaneDoesODMCombine == true) {
2955 SwathWidth = dml_min(
2956 SwathWidth,
2957 mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]);
2958 } else {
2959 SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k];
2960 }
2961
2962 SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY;
2963 RoundedUpMaxSwathSizeBytesY = (dml_ceil(
2964 (double) (SwathWidth - 1),
2965 SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY
2966 * MaximumSwathHeightY;
2967 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2968 RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256)
2969 + 256;
2970 }
2971 if (MaximumSwathHeightC > 0) {
2972 SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2)
2973 / MaximumSwathHeightC;
2974 RoundedUpMaxSwathSizeBytesC = (dml_ceil(
2975 (double) (SwathWidth / 2.0 - 1),
2976 SwathWidthGranularityC) + SwathWidthGranularityC)
2977 * BytePerPixDETC * MaximumSwathHeightC;
2978 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2979 RoundedUpMaxSwathSizeBytesC = dml_ceil(
2980 RoundedUpMaxSwathSizeBytesC,
2981 256) + 256;
2982 }
2983 } else
2984 RoundedUpMaxSwathSizeBytesC = 0.0;
2985
2986 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
2987 <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
2988 mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
2989 mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
2990 } else {
2991 mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY;
2992 mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC;
2993 }
2994
2995 CalculateDETBufferSize(
2996 mode_lib->vba.DETBufferSizeInKByte,
2997 mode_lib->vba.SwathHeightY[k],
2998 mode_lib->vba.SwathHeightC[k],
2999 &mode_lib->vba.DETBufferSizeY[k],
3000 &mode_lib->vba.DETBufferSizeC[k]);
3001 }
3002 }
3003
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3004 static double CalculateTWait(
3005 unsigned int PrefetchMode,
3006 double DRAMClockChangeLatency,
3007 double UrgentLatency,
3008 double SREnterPlusExitTime)
3009 {
3010 if (PrefetchMode == 0) {
3011 return dml_max(
3012 DRAMClockChangeLatency + UrgentLatency,
3013 dml_max(SREnterPlusExitTime, UrgentLatency));
3014 } else if (PrefetchMode == 1) {
3015 return dml_max(SREnterPlusExitTime, UrgentLatency);
3016 } else {
3017 return UrgentLatency;
3018 }
3019 }
3020
CalculateRemoteSurfaceFlipDelay(struct display_mode_lib * mode_lib,double VRatio,double SwathWidth,double Bpp,double LineTime,double XFCTSlvVupdateOffset,double XFCTSlvVupdateWidth,double XFCTSlvVreadyOffset,double XFCXBUFLatencyTolerance,double XFCFillBWOverhead,double XFCSlvChunkSize,double XFCBusTransportTime,double TCalc,double TWait,double * SrcActiveDrainRate,double * TInitXFill,double * TslvChk)3021 static double CalculateRemoteSurfaceFlipDelay(
3022 struct display_mode_lib *mode_lib,
3023 double VRatio,
3024 double SwathWidth,
3025 double Bpp,
3026 double LineTime,
3027 double XFCTSlvVupdateOffset,
3028 double XFCTSlvVupdateWidth,
3029 double XFCTSlvVreadyOffset,
3030 double XFCXBUFLatencyTolerance,
3031 double XFCFillBWOverhead,
3032 double XFCSlvChunkSize,
3033 double XFCBusTransportTime,
3034 double TCalc,
3035 double TWait,
3036 double *SrcActiveDrainRate,
3037 double *TInitXFill,
3038 double *TslvChk)
3039 {
3040 double TSlvSetup, AvgfillRate, result;
3041
3042 *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime;
3043 TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset;
3044 *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100);
3045 AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100);
3046 *TslvChk = XFCSlvChunkSize / AvgfillRate;
3047 dml_print(
3048 "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n",
3049 *SrcActiveDrainRate);
3050 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup);
3051 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill);
3052 dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate);
3053 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk);
3054 result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide
3055 dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result);
3056 return result;
3057 }
3058
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackLumaHTaps,unsigned int WritebackLumaVTaps,unsigned int WritebackChromaHTaps,unsigned int WritebackChromaVTaps,unsigned int WritebackDestinationWidth)3059 static double CalculateWriteBackDelay(
3060 enum source_format_class WritebackPixelFormat,
3061 double WritebackHRatio,
3062 double WritebackVRatio,
3063 unsigned int WritebackLumaHTaps,
3064 unsigned int WritebackLumaVTaps,
3065 unsigned int WritebackChromaHTaps,
3066 unsigned int WritebackChromaVTaps,
3067 unsigned int WritebackDestinationWidth)
3068 {
3069 double CalculateWriteBackDelay =
3070 dml_max(
3071 dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
3072 WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1)
3073 * dml_ceil(
3074 WritebackDestinationWidth
3075 / 4.0,
3076 1)
3077 + dml_ceil(1.0 / WritebackVRatio, 1)
3078 * (dml_ceil(
3079 WritebackLumaVTaps
3080 / 4.0,
3081 1) + 4));
3082
3083 if (WritebackPixelFormat != dm_444_32) {
3084 CalculateWriteBackDelay =
3085 dml_max(
3086 CalculateWriteBackDelay,
3087 dml_max(
3088 dml_ceil(
3089 WritebackChromaHTaps
3090 / 2.0,
3091 1)
3092 / (2
3093 * WritebackHRatio),
3094 WritebackChromaVTaps
3095 * dml_ceil(
3096 1
3097 / (2
3098 * WritebackVRatio),
3099 1)
3100 * dml_ceil(
3101 WritebackDestinationWidth
3102 / 2.0
3103 / 2.0,
3104 1)
3105 + dml_ceil(
3106 1
3107 / (2
3108 * WritebackVRatio),
3109 1)
3110 * (dml_ceil(
3111 WritebackChromaVTaps
3112 / 4.0,
3113 1)
3114 + 4)));
3115 }
3116 return CalculateWriteBackDelay;
3117 }
3118
CalculateActiveRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3119 static void CalculateActiveRowBandwidth(
3120 bool GPUVMEnable,
3121 enum source_format_class SourcePixelFormat,
3122 double VRatio,
3123 bool DCCEnable,
3124 double LineTime,
3125 unsigned int MetaRowByteLuma,
3126 unsigned int MetaRowByteChroma,
3127 unsigned int meta_row_height_luma,
3128 unsigned int meta_row_height_chroma,
3129 unsigned int PixelPTEBytesPerRowLuma,
3130 unsigned int PixelPTEBytesPerRowChroma,
3131 unsigned int dpte_row_height_luma,
3132 unsigned int dpte_row_height_chroma,
3133 double *meta_row_bw,
3134 double *dpte_row_bw)
3135 {
3136 if (DCCEnable != true) {
3137 *meta_row_bw = 0;
3138 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3139 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3140 + VRatio / 2 * MetaRowByteChroma
3141 / (meta_row_height_chroma * LineTime);
3142 } else {
3143 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3144 }
3145
3146 if (GPUVMEnable != true) {
3147 *dpte_row_bw = 0;
3148 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3149 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3150 + VRatio / 2 * PixelPTEBytesPerRowChroma
3151 / (dpte_row_height_chroma * LineTime);
3152 } else {
3153 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3154 }
3155 }
3156
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,bool GPUVMEnable,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3157 static void CalculateFlipSchedule(
3158 struct display_mode_lib *mode_lib,
3159 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3160 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3161 double UrgentExtraLatency,
3162 double UrgentLatency,
3163 unsigned int GPUVMMaxPageTableLevels,
3164 bool HostVMEnable,
3165 unsigned int HostVMMaxPageTableLevels,
3166 unsigned int HostVMCachedPageTableLevels,
3167 bool GPUVMEnable,
3168 double PDEAndMetaPTEBytesPerFrame,
3169 double MetaRowBytes,
3170 double DPTEBytesPerRow,
3171 double BandwidthAvailableForImmediateFlip,
3172 unsigned int TotImmediateFlipBytes,
3173 enum source_format_class SourcePixelFormat,
3174 double LineTime,
3175 double VRatio,
3176 double Tno_bw,
3177 bool DCCEnable,
3178 unsigned int dpte_row_height,
3179 unsigned int meta_row_height,
3180 unsigned int dpte_row_height_chroma,
3181 unsigned int meta_row_height_chroma,
3182 double *DestinationLinesToRequestVMInImmediateFlip,
3183 double *DestinationLinesToRequestRowInImmediateFlip,
3184 double *final_flip_bw,
3185 bool *ImmediateFlipSupportedForPipe)
3186 {
3187 double min_row_time = 0.0;
3188 unsigned int HostVMDynamicLevels;
3189 double TimeForFetchingMetaPTEImmediateFlip;
3190 double TimeForFetchingRowInVBlankImmediateFlip;
3191 double ImmediateFlipBW;
3192 double HostVMInefficiencyFactor;
3193 double VRatioClamped;
3194
3195 if (GPUVMEnable == true && HostVMEnable == true) {
3196 HostVMInefficiencyFactor =
3197 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
3198 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3199 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
3200 } else {
3201 HostVMInefficiencyFactor = 1;
3202 HostVMDynamicLevels = 0;
3203 }
3204
3205 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow)
3206 * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3207
3208 if (GPUVMEnable == true) {
3209 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3210 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3211 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1),
3212 LineTime / 4.0);
3213 } else {
3214 TimeForFetchingMetaPTEImmediateFlip = 0;
3215 }
3216
3217 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3218 if ((GPUVMEnable == true || DCCEnable == true)) {
3219 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4);
3220 } else {
3221 TimeForFetchingRowInVBlankImmediateFlip = 0;
3222 }
3223
3224 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3225 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3226 VRatioClamped = (VRatio < 1.0) ? 1.0 : VRatio;
3227 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3228 if (GPUVMEnable == true && DCCEnable != true) {
3229 min_row_time = dml_min(
3230 dpte_row_height * LineTime / VRatioClamped,
3231 dpte_row_height_chroma * LineTime / (VRatioClamped / 2));
3232 } else if (GPUVMEnable != true && DCCEnable == true) {
3233 min_row_time = dml_min(
3234 meta_row_height * LineTime / VRatioClamped,
3235 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3236 } else {
3237 min_row_time = dml_min4(
3238 dpte_row_height * LineTime / VRatioClamped,
3239 meta_row_height * LineTime / VRatioClamped,
3240 dpte_row_height_chroma * LineTime / (VRatioClamped / 2),
3241 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3242 }
3243 } else {
3244 if (GPUVMEnable == true && DCCEnable != true) {
3245 min_row_time = dpte_row_height * LineTime / VRatioClamped;
3246 } else if (GPUVMEnable != true && DCCEnable == true) {
3247 min_row_time = meta_row_height * LineTime / VRatioClamped;
3248 } else {
3249 min_row_time = dml_min(
3250 dpte_row_height * LineTime / VRatioClamped,
3251 meta_row_height * LineTime / VRatioClamped);
3252 }
3253 }
3254
3255 if (*DestinationLinesToRequestVMInImmediateFlip >= 32
3256 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3257 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3258 *ImmediateFlipSupportedForPipe = false;
3259 } else {
3260 *ImmediateFlipSupportedForPipe = true;
3261 }
3262 }
3263
TruncToValidBPP(double DecimalBPP,double DesiredBPP,bool DSCEnabled,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent)3264 static unsigned int TruncToValidBPP(
3265 double DecimalBPP,
3266 double DesiredBPP,
3267 bool DSCEnabled,
3268 enum output_encoder_class Output,
3269 enum output_format_class Format,
3270 unsigned int DSCInputBitPerComponent)
3271 {
3272 if (Output == dm_hdmi) {
3273 if (Format == dm_420) {
3274 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3275 return 18;
3276 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3277 return 15;
3278 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3279 return 12;
3280 else
3281 return BPP_INVALID;
3282 } else if (Format == dm_444) {
3283 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3284 return 36;
3285 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3286 return 30;
3287 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3288 return 24;
3289 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3290 return 18;
3291 else
3292 return BPP_INVALID;
3293 } else {
3294 if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3295 return 24;
3296 else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3297 return 20;
3298 else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3299 return 16;
3300 else
3301 return BPP_INVALID;
3302 }
3303 } else {
3304 if (DSCEnabled) {
3305 if (Format == dm_420) {
3306 if (DesiredBPP == 0) {
3307 if (DecimalBPP < 6)
3308 return BPP_INVALID;
3309 else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0)
3310 return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0;
3311 else
3312 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3313 } else {
3314 if (DecimalBPP < 6
3315 || DesiredBPP < 6
3316 || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0
3317 || DecimalBPP < DesiredBPP) {
3318 return BPP_INVALID;
3319 } else {
3320 return DesiredBPP;
3321 }
3322 }
3323 } else if (Format == dm_n422) {
3324 if (DesiredBPP == 0) {
3325 if (DecimalBPP < 7)
3326 return BPP_INVALID;
3327 else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0)
3328 return 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3329 else
3330 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3331 } else {
3332 if (DecimalBPP < 7
3333 || DesiredBPP < 7
3334 || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0
3335 || DecimalBPP < DesiredBPP) {
3336 return BPP_INVALID;
3337 } else {
3338 return DesiredBPP;
3339 }
3340 }
3341 } else {
3342 if (DesiredBPP == 0) {
3343 if (DecimalBPP < 8)
3344 return BPP_INVALID;
3345 else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0)
3346 return 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3347 else
3348 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3349 } else {
3350 if (DecimalBPP < 8
3351 || DesiredBPP < 8
3352 || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0
3353 || DecimalBPP < DesiredBPP) {
3354 return BPP_INVALID;
3355 } else {
3356 return DesiredBPP;
3357 }
3358 }
3359 }
3360 } else if (Format == dm_420) {
3361 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3362 return 18;
3363 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3364 return 15;
3365 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3366 return 12;
3367 else
3368 return BPP_INVALID;
3369 } else if (Format == dm_s422 || Format == dm_n422) {
3370 if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3371 return 24;
3372 else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3373 return 20;
3374 else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3375 return 16;
3376 else
3377 return BPP_INVALID;
3378 } else {
3379 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3380 return 36;
3381 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3382 return 30;
3383 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3384 return 24;
3385 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3386 return 18;
3387 else
3388 return BPP_INVALID;
3389 }
3390 }
3391 }
3392
dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3393 void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3394 {
3395 struct vba_vars_st *locals = &mode_lib->vba;
3396
3397 int i;
3398 unsigned int j, k, m;
3399
3400 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3401
3402 /*Scale Ratio, taps Support Check*/
3403
3404 mode_lib->vba.ScaleRatioAndTapsSupport = true;
3405 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3406 if (mode_lib->vba.ScalerEnabled[k] == false
3407 && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3408 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3409 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3410 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3411 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)
3412 || mode_lib->vba.HRatio[k] != 1.0
3413 || mode_lib->vba.htaps[k] != 1.0
3414 || mode_lib->vba.VRatio[k] != 1.0
3415 || mode_lib->vba.vtaps[k] != 1.0)) {
3416 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3417 } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0
3418 || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0
3419 || (mode_lib->vba.htaps[k] > 1.0
3420 && (mode_lib->vba.htaps[k] % 2) == 1)
3421 || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
3422 || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
3423 || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
3424 || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
3425 || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3426 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3427 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3428 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3429 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
3430 && (mode_lib->vba.HRatio[k] / 2.0
3431 > mode_lib->vba.HTAPsChroma[k]
3432 || mode_lib->vba.VRatio[k] / 2.0
3433 > mode_lib->vba.VTAPsChroma[k]))) {
3434 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3435 }
3436 }
3437 /*Source Format, Pixel Format and Scan Support Check*/
3438
3439 mode_lib->vba.SourceFormatPixelAndScanSupport = true;
3440 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3441 if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3442 && mode_lib->vba.SourceScan[k] != dm_horz)
3443 || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d
3444 || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x
3445 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d
3446 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t
3447 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x
3448 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d
3449 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x)
3450 && mode_lib->vba.SourcePixelFormat[k] != dm_444_64)
3451 || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x
3452 && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8
3453 || mode_lib->vba.SourcePixelFormat[k]
3454 == dm_420_8
3455 || mode_lib->vba.SourcePixelFormat[k]
3456 == dm_420_10))
3457 || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
3458 || mode_lib->vba.SurfaceTiling[k]
3459 == dm_sw_gfx7_2d_thin_l_vp)
3460 && !((mode_lib->vba.SourcePixelFormat[k]
3461 == dm_444_64
3462 || mode_lib->vba.SourcePixelFormat[k]
3463 == dm_444_32)
3464 && mode_lib->vba.SourceScan[k]
3465 == dm_horz
3466 && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
3467 == true
3468 && mode_lib->vba.DCCEnable[k]
3469 == false))
3470 || (mode_lib->vba.DCCEnable[k] == true
3471 && (mode_lib->vba.SurfaceTiling[k]
3472 == dm_sw_linear
3473 || mode_lib->vba.SourcePixelFormat[k]
3474 == dm_420_8
3475 || mode_lib->vba.SourcePixelFormat[k]
3476 == dm_420_10)))) {
3477 mode_lib->vba.SourceFormatPixelAndScanSupport = false;
3478 }
3479 }
3480 /*Bandwidth Support Check*/
3481
3482 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3483 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
3484 locals->BytePerPixelInDETY[k] = 8.0;
3485 locals->BytePerPixelInDETC[k] = 0.0;
3486 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
3487 locals->BytePerPixelInDETY[k] = 4.0;
3488 locals->BytePerPixelInDETC[k] = 0.0;
3489 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3490 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
3491 locals->BytePerPixelInDETY[k] = 2.0;
3492 locals->BytePerPixelInDETC[k] = 0.0;
3493 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
3494 locals->BytePerPixelInDETY[k] = 1.0;
3495 locals->BytePerPixelInDETC[k] = 0.0;
3496 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
3497 locals->BytePerPixelInDETY[k] = 1.0;
3498 locals->BytePerPixelInDETC[k] = 2.0;
3499 } else {
3500 locals->BytePerPixelInDETY[k] = 4.0 / 3;
3501 locals->BytePerPixelInDETC[k] = 8.0 / 3;
3502 }
3503 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3504 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
3505 } else {
3506 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
3507 }
3508 }
3509 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3510 locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
3511 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
3512 locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0)
3513 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0;
3514 locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k];
3515 }
3516 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3517 if (mode_lib->vba.WritebackEnable[k] == true
3518 && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3519 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3520 * mode_lib->vba.WritebackDestinationHeight[k]
3521 / (mode_lib->vba.WritebackSourceHeight[k]
3522 * mode_lib->vba.HTotal[k]
3523 / mode_lib->vba.PixelClock[k]) * 4.0;
3524 } else if (mode_lib->vba.WritebackEnable[k] == true
3525 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3526 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3527 * mode_lib->vba.WritebackDestinationHeight[k]
3528 / (mode_lib->vba.WritebackSourceHeight[k]
3529 * mode_lib->vba.HTotal[k]
3530 / mode_lib->vba.PixelClock[k]) * 3.0;
3531 } else if (mode_lib->vba.WritebackEnable[k] == true) {
3532 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3533 * mode_lib->vba.WritebackDestinationHeight[k]
3534 / (mode_lib->vba.WritebackSourceHeight[k]
3535 * mode_lib->vba.HTotal[k]
3536 / mode_lib->vba.PixelClock[k]) * 1.5;
3537 } else {
3538 locals->WriteBandwidth[k] = 0.0;
3539 }
3540 }
3541 mode_lib->vba.DCCEnabledInAnyPlane = false;
3542 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3543 if (mode_lib->vba.DCCEnable[k] == true) {
3544 mode_lib->vba.DCCEnabledInAnyPlane = true;
3545 }
3546 }
3547 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3548 locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3(
3549 mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
3550 mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
3551 * mode_lib->vba.DRAMChannelWidth,
3552 mode_lib->vba.FabricClockPerState[i]
3553 * mode_lib->vba.FabricDatapathToDCNDataReturn);
3554 if (mode_lib->vba.HostVMEnable == false) {
3555 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3556 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
3557 } else {
3558 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3559 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
3560 }
3561 }
3562 /*Writeback Latency support check*/
3563
3564 mode_lib->vba.WritebackLatencySupport = true;
3565 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3566 if (mode_lib->vba.WritebackEnable[k] == true) {
3567 if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3568 if (locals->WriteBandwidth[k]
3569 > (mode_lib->vba.WritebackInterfaceLumaBufferSize
3570 + mode_lib->vba.WritebackInterfaceChromaBufferSize)
3571 / mode_lib->vba.WritebackLatency) {
3572 mode_lib->vba.WritebackLatencySupport = false;
3573 }
3574 } else {
3575 if (locals->WriteBandwidth[k]
3576 > 1.5
3577 * dml_min(
3578 mode_lib->vba.WritebackInterfaceLumaBufferSize,
3579 2.0
3580 * mode_lib->vba.WritebackInterfaceChromaBufferSize)
3581 / mode_lib->vba.WritebackLatency) {
3582 mode_lib->vba.WritebackLatencySupport = false;
3583 }
3584 }
3585 }
3586 }
3587 /*Re-ordering Buffer Support Check*/
3588
3589 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3590 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
3591 (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
3592 + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
3593 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
3594 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
3595 * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
3596 if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
3597 > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
3598 locals->ROBSupport[i][0] = true;
3599 } else {
3600 locals->ROBSupport[i][0] = false;
3601 }
3602 }
3603 /*Writeback Mode Support Check*/
3604
3605 mode_lib->vba.TotalNumberOfActiveWriteback = 0;
3606 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3607 if (mode_lib->vba.WritebackEnable[k] == true) {
3608 if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0)
3609 mode_lib->vba.ActiveWritebacksPerPlane[k] = 1;
3610 mode_lib->vba.TotalNumberOfActiveWriteback =
3611 mode_lib->vba.TotalNumberOfActiveWriteback
3612 + mode_lib->vba.ActiveWritebacksPerPlane[k];
3613 }
3614 }
3615 mode_lib->vba.WritebackModeSupport = true;
3616 if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) {
3617 mode_lib->vba.WritebackModeSupport = false;
3618 }
3619 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3620 if (mode_lib->vba.WritebackEnable[k] == true
3621 && mode_lib->vba.Writeback10bpc420Supported != true
3622 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3623 mode_lib->vba.WritebackModeSupport = false;
3624 }
3625 }
3626 /*Writeback Scale Ratio and Taps Support Check*/
3627
3628 mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
3629 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3630 if (mode_lib->vba.WritebackEnable[k] == true) {
3631 if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false
3632 && (mode_lib->vba.WritebackHRatio[k] != 1.0
3633 || mode_lib->vba.WritebackVRatio[k] != 1.0)) {
3634 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3635 }
3636 if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
3637 || mode_lib->vba.WritebackVRatio[k]
3638 > mode_lib->vba.WritebackMaxVSCLRatio
3639 || mode_lib->vba.WritebackHRatio[k]
3640 < mode_lib->vba.WritebackMinHSCLRatio
3641 || mode_lib->vba.WritebackVRatio[k]
3642 < mode_lib->vba.WritebackMinVSCLRatio
3643 || mode_lib->vba.WritebackLumaHTaps[k]
3644 > mode_lib->vba.WritebackMaxHSCLTaps
3645 || mode_lib->vba.WritebackLumaVTaps[k]
3646 > mode_lib->vba.WritebackMaxVSCLTaps
3647 || mode_lib->vba.WritebackHRatio[k]
3648 > mode_lib->vba.WritebackLumaHTaps[k]
3649 || mode_lib->vba.WritebackVRatio[k]
3650 > mode_lib->vba.WritebackLumaVTaps[k]
3651 || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0
3652 && ((mode_lib->vba.WritebackLumaHTaps[k] % 2)
3653 == 1))
3654 || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32
3655 && (mode_lib->vba.WritebackChromaHTaps[k]
3656 > mode_lib->vba.WritebackMaxHSCLTaps
3657 || mode_lib->vba.WritebackChromaVTaps[k]
3658 > mode_lib->vba.WritebackMaxVSCLTaps
3659 || 2.0
3660 * mode_lib->vba.WritebackHRatio[k]
3661 > mode_lib->vba.WritebackChromaHTaps[k]
3662 || 2.0
3663 * mode_lib->vba.WritebackVRatio[k]
3664 > mode_lib->vba.WritebackChromaVTaps[k]
3665 || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0
3666 && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) {
3667 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3668 }
3669 if (mode_lib->vba.WritebackVRatio[k] < 1.0) {
3670 mode_lib->vba.WritebackLumaVExtra =
3671 dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0);
3672 } else {
3673 mode_lib->vba.WritebackLumaVExtra = -1;
3674 }
3675 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32
3676 && mode_lib->vba.WritebackLumaVTaps[k]
3677 > (mode_lib->vba.WritebackLineBufferLumaBufferSize
3678 + mode_lib->vba.WritebackLineBufferChromaBufferSize)
3679 / 3.0
3680 / mode_lib->vba.WritebackDestinationWidth[k]
3681 - mode_lib->vba.WritebackLumaVExtra)
3682 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3683 && mode_lib->vba.WritebackLumaVTaps[k]
3684 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3685 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3686 - mode_lib->vba.WritebackLumaVExtra)
3687 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3688 && mode_lib->vba.WritebackLumaVTaps[k]
3689 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3690 * 8.0 / 10.0
3691 / mode_lib->vba.WritebackDestinationWidth[k]
3692 - mode_lib->vba.WritebackLumaVExtra)) {
3693 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3694 }
3695 if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) {
3696 mode_lib->vba.WritebackChromaVExtra = 0.0;
3697 } else {
3698 mode_lib->vba.WritebackChromaVExtra = -1;
3699 }
3700 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3701 && mode_lib->vba.WritebackChromaVTaps[k]
3702 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3703 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3704 - mode_lib->vba.WritebackChromaVExtra)
3705 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3706 && mode_lib->vba.WritebackChromaVTaps[k]
3707 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3708 * 8.0 / 10.0
3709 / mode_lib->vba.WritebackDestinationWidth[k]
3710 - mode_lib->vba.WritebackChromaVExtra)) {
3711 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3712 }
3713 }
3714 }
3715 /*Maximum DISPCLK/DPPCLK Support check*/
3716
3717 mode_lib->vba.WritebackRequiredDISPCLK = 0.0;
3718 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3719 if (mode_lib->vba.WritebackEnable[k] == true) {
3720 mode_lib->vba.WritebackRequiredDISPCLK =
3721 dml_max(
3722 mode_lib->vba.WritebackRequiredDISPCLK,
3723 CalculateWriteBackDISPCLK(
3724 mode_lib->vba.WritebackPixelFormat[k],
3725 mode_lib->vba.PixelClock[k],
3726 mode_lib->vba.WritebackHRatio[k],
3727 mode_lib->vba.WritebackVRatio[k],
3728 mode_lib->vba.WritebackLumaHTaps[k],
3729 mode_lib->vba.WritebackLumaVTaps[k],
3730 mode_lib->vba.WritebackChromaHTaps[k],
3731 mode_lib->vba.WritebackChromaVTaps[k],
3732 mode_lib->vba.WritebackDestinationWidth[k],
3733 mode_lib->vba.HTotal[k],
3734 mode_lib->vba.WritebackChromaLineBufferWidth));
3735 }
3736 }
3737 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3738 if (mode_lib->vba.HRatio[k] > 1.0) {
3739 locals->PSCL_FACTOR[k] = dml_min(
3740 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3741 mode_lib->vba.MaxPSCLToLBThroughput
3742 * mode_lib->vba.HRatio[k]
3743 / dml_ceil(
3744 mode_lib->vba.htaps[k]
3745 / 6.0,
3746 1.0));
3747 } else {
3748 locals->PSCL_FACTOR[k] = dml_min(
3749 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3750 mode_lib->vba.MaxPSCLToLBThroughput);
3751 }
3752 if (locals->BytePerPixelInDETC[k] == 0.0) {
3753 locals->PSCL_FACTOR_CHROMA[k] = 0.0;
3754 locals->MinDPPCLKUsingSingleDPP[k] =
3755 mode_lib->vba.PixelClock[k]
3756 * dml_max3(
3757 mode_lib->vba.vtaps[k] / 6.0
3758 * dml_min(
3759 1.0,
3760 mode_lib->vba.HRatio[k]),
3761 mode_lib->vba.HRatio[k]
3762 * mode_lib->vba.VRatio[k]
3763 / locals->PSCL_FACTOR[k],
3764 1.0);
3765 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0)
3766 && locals->MinDPPCLKUsingSingleDPP[k]
3767 < 2.0 * mode_lib->vba.PixelClock[k]) {
3768 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3769 * mode_lib->vba.PixelClock[k];
3770 }
3771 } else {
3772 if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) {
3773 locals->PSCL_FACTOR_CHROMA[k] =
3774 dml_min(
3775 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3776 mode_lib->vba.MaxPSCLToLBThroughput
3777 * mode_lib->vba.HRatio[k]
3778 / 2.0
3779 / dml_ceil(
3780 mode_lib->vba.HTAPsChroma[k]
3781 / 6.0,
3782 1.0));
3783 } else {
3784 locals->PSCL_FACTOR_CHROMA[k] = dml_min(
3785 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3786 mode_lib->vba.MaxPSCLToLBThroughput);
3787 }
3788 locals->MinDPPCLKUsingSingleDPP[k] =
3789 mode_lib->vba.PixelClock[k]
3790 * dml_max5(
3791 mode_lib->vba.vtaps[k] / 6.0
3792 * dml_min(
3793 1.0,
3794 mode_lib->vba.HRatio[k]),
3795 mode_lib->vba.HRatio[k]
3796 * mode_lib->vba.VRatio[k]
3797 / locals->PSCL_FACTOR[k],
3798 mode_lib->vba.VTAPsChroma[k]
3799 / 6.0
3800 * dml_min(
3801 1.0,
3802 mode_lib->vba.HRatio[k]
3803 / 2.0),
3804 mode_lib->vba.HRatio[k]
3805 * mode_lib->vba.VRatio[k]
3806 / 4.0
3807 / locals->PSCL_FACTOR_CHROMA[k],
3808 1.0);
3809 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0
3810 || mode_lib->vba.HTAPsChroma[k] > 6.0
3811 || mode_lib->vba.VTAPsChroma[k] > 6.0)
3812 && locals->MinDPPCLKUsingSingleDPP[k]
3813 < 2.0 * mode_lib->vba.PixelClock[k]) {
3814 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3815 * mode_lib->vba.PixelClock[k];
3816 }
3817 }
3818 }
3819 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3820 Calculate256BBlockSizes(
3821 mode_lib->vba.SourcePixelFormat[k],
3822 mode_lib->vba.SurfaceTiling[k],
3823 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
3824 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
3825 &locals->Read256BlockHeightY[k],
3826 &locals->Read256BlockHeightC[k],
3827 &locals->Read256BlockWidthY[k],
3828 &locals->Read256BlockWidthC[k]);
3829 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3830 locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k];
3831 locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k];
3832 } else {
3833 locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k];
3834 locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k];
3835 }
3836 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3837 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
3838 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3839 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16
3840 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) {
3841 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3842 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3843 && (mode_lib->vba.SurfaceTiling[k]
3844 == dm_sw_4kb_s
3845 || mode_lib->vba.SurfaceTiling[k]
3846 == dm_sw_4kb_s_x
3847 || mode_lib->vba.SurfaceTiling[k]
3848 == dm_sw_64kb_s
3849 || mode_lib->vba.SurfaceTiling[k]
3850 == dm_sw_64kb_s_t
3851 || mode_lib->vba.SurfaceTiling[k]
3852 == dm_sw_64kb_s_x
3853 || mode_lib->vba.SurfaceTiling[k]
3854 == dm_sw_var_s
3855 || mode_lib->vba.SurfaceTiling[k]
3856 == dm_sw_var_s_x)
3857 && mode_lib->vba.SourceScan[k] == dm_horz)) {
3858 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3859 } else {
3860 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3861 / 2.0;
3862 }
3863 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3864 } else {
3865 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
3866 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3867 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3868 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
3869 && mode_lib->vba.SourceScan[k] == dm_horz) {
3870 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3871 / 2.0;
3872 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3873 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
3874 && mode_lib->vba.SourceScan[k] == dm_horz) {
3875 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]
3876 / 2.0;
3877 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3878 } else {
3879 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3880 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3881 }
3882 }
3883 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
3884 mode_lib->vba.MaximumSwathWidthSupport = 8192.0;
3885 } else {
3886 mode_lib->vba.MaximumSwathWidthSupport = 5120.0;
3887 }
3888 mode_lib->vba.MaximumSwathWidthInDETBuffer =
3889 dml_min(
3890 mode_lib->vba.MaximumSwathWidthSupport,
3891 mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
3892 / (locals->BytePerPixelInDETY[k]
3893 * locals->MinSwathHeightY[k]
3894 + locals->BytePerPixelInDETC[k]
3895 / 2.0
3896 * locals->MinSwathHeightC[k]));
3897 if (locals->BytePerPixelInDETC[k] == 0.0) {
3898 mode_lib->vba.MaximumSwathWidthInLineBuffer =
3899 mode_lib->vba.LineBufferSize
3900 * dml_max(mode_lib->vba.HRatio[k], 1.0)
3901 / mode_lib->vba.LBBitPerPixel[k]
3902 / (mode_lib->vba.vtaps[k]
3903 + dml_max(
3904 dml_ceil(
3905 mode_lib->vba.VRatio[k],
3906 1.0)
3907 - 2,
3908 0.0));
3909 } else {
3910 mode_lib->vba.MaximumSwathWidthInLineBuffer =
3911 dml_min(
3912 mode_lib->vba.LineBufferSize
3913 * dml_max(
3914 mode_lib->vba.HRatio[k],
3915 1.0)
3916 / mode_lib->vba.LBBitPerPixel[k]
3917 / (mode_lib->vba.vtaps[k]
3918 + dml_max(
3919 dml_ceil(
3920 mode_lib->vba.VRatio[k],
3921 1.0)
3922 - 2,
3923 0.0)),
3924 2.0 * mode_lib->vba.LineBufferSize
3925 * dml_max(
3926 mode_lib->vba.HRatio[k]
3927 / 2.0,
3928 1.0)
3929 / mode_lib->vba.LBBitPerPixel[k]
3930 / (mode_lib->vba.VTAPsChroma[k]
3931 + dml_max(
3932 dml_ceil(
3933 mode_lib->vba.VRatio[k]
3934 / 2.0,
3935 1.0)
3936 - 2,
3937 0.0)));
3938 }
3939 locals->MaximumSwathWidth[k] = dml_min(
3940 mode_lib->vba.MaximumSwathWidthInDETBuffer,
3941 mode_lib->vba.MaximumSwathWidthInLineBuffer);
3942 }
3943 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3944 double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown(
3945 mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states],
3946 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
3947
3948 for (j = 0; j < 2; j++) {
3949 mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
3950 mode_lib->vba.MaxDispclk[i],
3951 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
3952 mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
3953 mode_lib->vba.MaxDppclk[i],
3954 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
3955 locals->RequiredDISPCLK[i][j] = 0.0;
3956 locals->DISPCLK_DPPCLK_Support[i][j] = true;
3957 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3958 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine =
3959 mode_lib->vba.PixelClock[k]
3960 * (1.0
3961 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
3962 / 100.0)
3963 * (1.0
3964 + mode_lib->vba.DISPCLKRampingMargin
3965 / 100.0);
3966 if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i]
3967 && i == mode_lib->vba.soc.num_states)
3968 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k]
3969 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3970
3971 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
3972 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
3973 if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i]
3974 && i == mode_lib->vba.soc.num_states)
3975 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
3976 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3977
3978 locals->ODMCombineEnablePerState[i][k] = false;
3979 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
3980 if (mode_lib->vba.ODMCapability) {
3981 if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
3982 locals->ODMCombineEnablePerState[i][k] = true;
3983 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
3984 } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) {
3985 locals->ODMCombineEnablePerState[i][k] = true;
3986 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
3987 } else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
3988 locals->ODMCombineEnablePerState[i][k] = true;
3989 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
3990 }
3991 }
3992
3993 if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
3994 && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
3995 && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
3996 locals->NoOfDPP[i][j][k] = 1;
3997 locals->RequiredDPPCLK[i][j][k] =
3998 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3999 } else {
4000 locals->NoOfDPP[i][j][k] = 2;
4001 locals->RequiredDPPCLK[i][j][k] =
4002 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4003 }
4004 locals->RequiredDISPCLK[i][j] = dml_max(
4005 locals->RequiredDISPCLK[i][j],
4006 mode_lib->vba.PlaneRequiredDISPCLK);
4007 if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4008 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity)
4009 || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) {
4010 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4011 }
4012 }
4013 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4014 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4015 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4016 if (j == 1) {
4017 while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP
4018 && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) {
4019 double BWOfNonSplitPlaneOfMaximumBandwidth;
4020 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4021
4022 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4023 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4024 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4025 if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) {
4026 BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k];
4027 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4028 }
4029 }
4030 locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4031 locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4032 locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4033 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4034 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1;
4035 }
4036 }
4037 if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) {
4038 locals->RequiredDISPCLK[i][j] = 0.0;
4039 locals->DISPCLK_DPPCLK_Support[i][j] = true;
4040 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4041 locals->ODMCombineEnablePerState[i][k] = false;
4042 if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
4043 locals->NoOfDPP[i][j][k] = 1;
4044 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4045 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4046 } else {
4047 locals->NoOfDPP[i][j][k] = 2;
4048 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4049 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4050 }
4051 if (i != mode_lib->vba.soc.num_states) {
4052 mode_lib->vba.PlaneRequiredDISPCLK =
4053 mode_lib->vba.PixelClock[k]
4054 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4055 * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
4056 } else {
4057 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k]
4058 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4059 }
4060 locals->RequiredDISPCLK[i][j] = dml_max(
4061 locals->RequiredDISPCLK[i][j],
4062 mode_lib->vba.PlaneRequiredDISPCLK);
4063 if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4064 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
4065 || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)
4066 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4067 }
4068 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4069 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4070 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4071 }
4072 locals->RequiredDISPCLK[i][j] = dml_max(
4073 locals->RequiredDISPCLK[i][j],
4074 mode_lib->vba.WritebackRequiredDISPCLK);
4075 if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity
4076 < mode_lib->vba.WritebackRequiredDISPCLK) {
4077 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4078 }
4079 }
4080 }
4081 /*Viewport Size Check*/
4082
4083 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4084 locals->ViewportSizeSupport[i][0] = true;
4085 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4086 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4087 if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
4088 > locals->MaximumSwathWidth[k]) {
4089 locals->ViewportSizeSupport[i][0] = false;
4090 }
4091 } else {
4092 if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
4093 locals->ViewportSizeSupport[i][0] = false;
4094 }
4095 }
4096 }
4097 }
4098 /*Total Available Pipes Support Check*/
4099
4100 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4101 for (j = 0; j < 2; j++) {
4102 if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP)
4103 locals->TotalAvailablePipesSupport[i][j] = true;
4104 else
4105 locals->TotalAvailablePipesSupport[i][j] = false;
4106 }
4107 }
4108 /*Total Available OTG Support Check*/
4109
4110 mode_lib->vba.TotalNumberOfActiveOTG = 0.0;
4111 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4112 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4113 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG
4114 + 1.0;
4115 }
4116 }
4117 if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) {
4118 mode_lib->vba.NumberOfOTGSupport = true;
4119 } else {
4120 mode_lib->vba.NumberOfOTGSupport = false;
4121 }
4122 /*Display IO and DSC Support Check*/
4123
4124 mode_lib->vba.NonsupportedDSCInputBPC = false;
4125 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4126 if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
4127 || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
4128 || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) {
4129 mode_lib->vba.NonsupportedDSCInputBPC = true;
4130 }
4131 }
4132 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4133 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4134 locals->RequiresDSC[i][k] = false;
4135 locals->RequiresFEC[i][k] = 0;
4136 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4137 if (mode_lib->vba.Output[k] == dm_hdmi) {
4138 locals->RequiresDSC[i][k] = false;
4139 locals->RequiresFEC[i][k] = 0;
4140 locals->OutputBppPerState[i][k] = TruncToValidBPP(
4141 dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
4142 mode_lib->vba.ForcedOutputLinkBPP[k],
4143 false,
4144 mode_lib->vba.Output[k],
4145 mode_lib->vba.OutputFormat[k],
4146 mode_lib->vba.DSCInputBitPerComponent[k]);
4147 } else if (mode_lib->vba.Output[k] == dm_dp
4148 || mode_lib->vba.Output[k] == dm_edp) {
4149 if (mode_lib->vba.Output[k] == dm_edp) {
4150 mode_lib->vba.EffectiveFECOverhead = 0.0;
4151 } else {
4152 mode_lib->vba.EffectiveFECOverhead =
4153 mode_lib->vba.FECOverhead;
4154 }
4155 if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) {
4156 mode_lib->vba.Outbpp = TruncToValidBPP(
4157 (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0
4158 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4159 mode_lib->vba.ForcedOutputLinkBPP[k],
4160 false,
4161 mode_lib->vba.Output[k],
4162 mode_lib->vba.OutputFormat[k],
4163 mode_lib->vba.DSCInputBitPerComponent[k]);
4164 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4165 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0
4166 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4167 mode_lib->vba.ForcedOutputLinkBPP[k],
4168 true,
4169 mode_lib->vba.Output[k],
4170 mode_lib->vba.OutputFormat[k],
4171 mode_lib->vba.DSCInputBitPerComponent[k]);
4172 if (mode_lib->vba.DSCEnabled[k] == true) {
4173 locals->RequiresDSC[i][k] = true;
4174 if (mode_lib->vba.Output[k] == dm_dp) {
4175 locals->RequiresFEC[i][k] = true;
4176 } else {
4177 locals->RequiresFEC[i][k] = false;
4178 }
4179 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4180 } else {
4181 locals->RequiresDSC[i][k] = false;
4182 locals->RequiresFEC[i][k] = false;
4183 }
4184 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4185 }
4186 if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) {
4187 mode_lib->vba.Outbpp = TruncToValidBPP(
4188 (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0
4189 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4190 mode_lib->vba.ForcedOutputLinkBPP[k],
4191 false,
4192 mode_lib->vba.Output[k],
4193 mode_lib->vba.OutputFormat[k],
4194 mode_lib->vba.DSCInputBitPerComponent[k]);
4195 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4196 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0
4197 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4198 mode_lib->vba.ForcedOutputLinkBPP[k],
4199 true,
4200 mode_lib->vba.Output[k],
4201 mode_lib->vba.OutputFormat[k],
4202 mode_lib->vba.DSCInputBitPerComponent[k]);
4203 if (mode_lib->vba.DSCEnabled[k] == true) {
4204 locals->RequiresDSC[i][k] = true;
4205 if (mode_lib->vba.Output[k] == dm_dp) {
4206 locals->RequiresFEC[i][k] = true;
4207 } else {
4208 locals->RequiresFEC[i][k] = false;
4209 }
4210 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4211 } else {
4212 locals->RequiresDSC[i][k] = false;
4213 locals->RequiresFEC[i][k] = false;
4214 }
4215 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4216 }
4217 if (mode_lib->vba.Outbpp == BPP_INVALID
4218 && mode_lib->vba.PHYCLKPerState[i]
4219 >= 810.0) {
4220 mode_lib->vba.Outbpp = TruncToValidBPP(
4221 (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0
4222 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4223 mode_lib->vba.ForcedOutputLinkBPP[k],
4224 false,
4225 mode_lib->vba.Output[k],
4226 mode_lib->vba.OutputFormat[k],
4227 mode_lib->vba.DSCInputBitPerComponent[k]);
4228 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4229 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0
4230 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4231 mode_lib->vba.ForcedOutputLinkBPP[k],
4232 true,
4233 mode_lib->vba.Output[k],
4234 mode_lib->vba.OutputFormat[k],
4235 mode_lib->vba.DSCInputBitPerComponent[k]);
4236 if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) {
4237 locals->RequiresDSC[i][k] = true;
4238 if (mode_lib->vba.Output[k] == dm_dp) {
4239 locals->RequiresFEC[i][k] = true;
4240 } else {
4241 locals->RequiresFEC[i][k] = false;
4242 }
4243 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4244 } else {
4245 locals->RequiresDSC[i][k] = false;
4246 locals->RequiresFEC[i][k] = false;
4247 }
4248 locals->OutputBppPerState[i][k] =
4249 mode_lib->vba.Outbpp;
4250 }
4251 }
4252 } else {
4253 locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE;
4254 }
4255 }
4256 }
4257 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4258 locals->DIOSupport[i] = true;
4259 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4260 if (!mode_lib->vba.skip_dio_check[k]
4261 && (locals->OutputBppPerState[i][k] == BPP_INVALID
4262 || (mode_lib->vba.OutputFormat[k] == dm_420
4263 && mode_lib->vba.Interlace[k] == true
4264 && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true))) {
4265 locals->DIOSupport[i] = false;
4266 }
4267 }
4268 }
4269 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4270 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4271 locals->DSCCLKRequiredMoreThanSupported[i] = false;
4272 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4273 if ((mode_lib->vba.Output[k] == dm_dp
4274 || mode_lib->vba.Output[k] == dm_edp)) {
4275 if (mode_lib->vba.OutputFormat[k] == dm_420
4276 || mode_lib->vba.OutputFormat[k]
4277 == dm_n422) {
4278 mode_lib->vba.DSCFormatFactor = 2;
4279 } else {
4280 mode_lib->vba.DSCFormatFactor = 1;
4281 }
4282 if (locals->RequiresDSC[i][k] == true) {
4283 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4284 if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
4285 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4286 locals->DSCCLKRequiredMoreThanSupported[i] =
4287 true;
4288 }
4289 } else {
4290 if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor
4291 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4292 locals->DSCCLKRequiredMoreThanSupported[i] =
4293 true;
4294 }
4295 }
4296 }
4297 }
4298 }
4299 }
4300 }
4301 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4302 locals->NotEnoughDSCUnits[i] = false;
4303 mode_lib->vba.TotalDSCUnitsRequired = 0.0;
4304 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4305 if (locals->RequiresDSC[i][k] == true) {
4306 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4307 mode_lib->vba.TotalDSCUnitsRequired =
4308 mode_lib->vba.TotalDSCUnitsRequired + 2.0;
4309 } else {
4310 mode_lib->vba.TotalDSCUnitsRequired =
4311 mode_lib->vba.TotalDSCUnitsRequired + 1.0;
4312 }
4313 }
4314 }
4315 if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) {
4316 locals->NotEnoughDSCUnits[i] = true;
4317 }
4318 }
4319 /*DSC Delay per state*/
4320
4321 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4322 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4323 if (mode_lib->vba.BlendingAndTiming[k] != k) {
4324 mode_lib->vba.slices = 0;
4325 } else if (locals->RequiresDSC[i][k] == 0
4326 || locals->RequiresDSC[i][k] == false) {
4327 mode_lib->vba.slices = 0;
4328 } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) {
4329 mode_lib->vba.slices = dml_ceil(
4330 mode_lib->vba.PixelClockBackEnd[k] / 400.0,
4331 4.0);
4332 } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) {
4333 mode_lib->vba.slices = 8.0;
4334 } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) {
4335 mode_lib->vba.slices = 4.0;
4336 } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) {
4337 mode_lib->vba.slices = 2.0;
4338 } else {
4339 mode_lib->vba.slices = 1.0;
4340 }
4341 if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE
4342 || locals->OutputBppPerState[i][k] == BPP_INVALID) {
4343 mode_lib->vba.bpp = 0.0;
4344 } else {
4345 mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
4346 }
4347 if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
4348 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4349 locals->DSCDelayPerState[i][k] =
4350 dscceComputeDelay(
4351 mode_lib->vba.DSCInputBitPerComponent[k],
4352 mode_lib->vba.bpp,
4353 dml_ceil(
4354 mode_lib->vba.HActive[k]
4355 / mode_lib->vba.slices,
4356 1.0),
4357 mode_lib->vba.slices,
4358 mode_lib->vba.OutputFormat[k])
4359 + dscComputeDelay(
4360 mode_lib->vba.OutputFormat[k]);
4361 } else {
4362 locals->DSCDelayPerState[i][k] =
4363 2.0 * (dscceComputeDelay(
4364 mode_lib->vba.DSCInputBitPerComponent[k],
4365 mode_lib->vba.bpp,
4366 dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0),
4367 mode_lib->vba.slices / 2,
4368 mode_lib->vba.OutputFormat[k])
4369 + dscComputeDelay(mode_lib->vba.OutputFormat[k]));
4370 }
4371 locals->DSCDelayPerState[i][k] =
4372 locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k];
4373 } else {
4374 locals->DSCDelayPerState[i][k] = 0.0;
4375 }
4376 }
4377 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4378 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4379 for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) {
4380 if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true)
4381 locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m];
4382 }
4383 }
4384 }
4385 }
4386
4387 //Prefetch Check
4388 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
4389 for (j = 0; j <= 1; ++j) {
4390 locals->TotalNumberOfDCCActiveDPP[i][j] = 0;
4391 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4392 if (mode_lib->vba.DCCEnable[k] == true)
4393 locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4394 }
4395 }
4396 }
4397
4398 mode_lib->vba.UrgentLatency = dml_max3(
4399 mode_lib->vba.UrgentLatencyPixelDataOnly,
4400 mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
4401 mode_lib->vba.UrgentLatencyVMDataOnly);
4402 mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(
4403 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
4404 &mode_lib->vba.MinPrefetchMode,
4405 &mode_lib->vba.MaxPrefetchMode);
4406
4407 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4408 for (j = 0; j < 2; j++) {
4409 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4410 locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
4411 locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k];
4412 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4413 locals->SwathWidthYThisState[k] =
4414 dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
4415 } else {
4416 locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
4417 }
4418 mode_lib->vba.SwathWidthGranularityY = 256.0
4419 / dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
4420 / locals->MaxSwathHeightY[k];
4421 mode_lib->vba.RoundedUpMaxSwathSizeBytesY =
4422 (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY)
4423 + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k];
4424 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4425 mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil(
4426 mode_lib->vba.RoundedUpMaxSwathSizeBytesY,
4427 256.0) + 256;
4428 }
4429 if (locals->MaxSwathHeightC[k] > 0.0) {
4430 mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k];
4431 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC)
4432 + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k];
4433 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4434 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256;
4435 }
4436 } else {
4437 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0;
4438 }
4439 if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC
4440 <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
4441 locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k];
4442 locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k];
4443 } else {
4444 locals->SwathHeightYThisState[k] =
4445 locals->MinSwathHeightY[k];
4446 locals->SwathHeightCThisState[k] =
4447 locals->MinSwathHeightC[k];
4448 }
4449 }
4450
4451 CalculateDCFCLKDeepSleep(
4452 mode_lib,
4453 mode_lib->vba.NumberOfActivePlanes,
4454 locals->BytePerPixelInDETY,
4455 locals->BytePerPixelInDETC,
4456 mode_lib->vba.VRatio,
4457 locals->SwathWidthYThisState,
4458 locals->NoOfDPPThisState,
4459 mode_lib->vba.HRatio,
4460 mode_lib->vba.PixelClock,
4461 locals->PSCL_FACTOR,
4462 locals->PSCL_FACTOR_CHROMA,
4463 locals->RequiredDPPCLKThisState,
4464 &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]);
4465
4466 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4467 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
4468 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
4469 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
4470 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
4471 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) {
4472 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4473 mode_lib,
4474 mode_lib->vba.DCCEnable[k],
4475 locals->Read256BlockHeightC[k],
4476 locals->Read256BlockWidthC[k],
4477 mode_lib->vba.SourcePixelFormat[k],
4478 mode_lib->vba.SurfaceTiling[k],
4479 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
4480 mode_lib->vba.SourceScan[k],
4481 mode_lib->vba.ViewportWidth[k] / 2.0,
4482 mode_lib->vba.ViewportHeight[k] / 2.0,
4483 locals->SwathWidthYThisState[k] / 2.0,
4484 mode_lib->vba.GPUVMEnable,
4485 mode_lib->vba.HostVMEnable,
4486 mode_lib->vba.HostVMMaxPageTableLevels,
4487 mode_lib->vba.HostVMCachedPageTableLevels,
4488 mode_lib->vba.VMMPageSize,
4489 mode_lib->vba.PTEBufferSizeInRequestsChroma,
4490 mode_lib->vba.PitchC[k],
4491 0.0,
4492 &locals->MacroTileWidthC[k],
4493 &mode_lib->vba.MetaRowBytesC,
4494 &mode_lib->vba.DPTEBytesPerRowC,
4495 &locals->PTEBufferSizeNotExceededC[i][j][k],
4496 locals->dpte_row_width_chroma_ub,
4497 &locals->dpte_row_height_chroma[k],
4498 &locals->meta_req_width_chroma[k],
4499 &locals->meta_req_height_chroma[k],
4500 &locals->meta_row_width_chroma[k],
4501 &locals->meta_row_height_chroma[k],
4502 &locals->vm_group_bytes_chroma,
4503 &locals->dpte_group_bytes_chroma,
4504 locals->PixelPTEReqWidthC,
4505 locals->PixelPTEReqHeightC,
4506 locals->PTERequestSizeC,
4507 locals->dpde0_bytes_per_frame_ub_c,
4508 locals->meta_pte_bytes_per_frame_ub_c);
4509 locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
4510 mode_lib,
4511 mode_lib->vba.VRatio[k]/2,
4512 mode_lib->vba.VTAPsChroma[k],
4513 mode_lib->vba.Interlace[k],
4514 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4515 locals->SwathHeightCThisState[k],
4516 mode_lib->vba.ViewportYStartC[k],
4517 &locals->PrefillC[k],
4518 &locals->MaxNumSwC[k]);
4519 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma;
4520 } else {
4521 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
4522 mode_lib->vba.MetaRowBytesC = 0.0;
4523 mode_lib->vba.DPTEBytesPerRowC = 0.0;
4524 locals->PrefetchLinesC[0][0][k] = 0.0;
4525 locals->PTEBufferSizeNotExceededC[i][j][k] = true;
4526 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
4527 }
4528 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4529 mode_lib,
4530 mode_lib->vba.DCCEnable[k],
4531 locals->Read256BlockHeightY[k],
4532 locals->Read256BlockWidthY[k],
4533 mode_lib->vba.SourcePixelFormat[k],
4534 mode_lib->vba.SurfaceTiling[k],
4535 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
4536 mode_lib->vba.SourceScan[k],
4537 mode_lib->vba.ViewportWidth[k],
4538 mode_lib->vba.ViewportHeight[k],
4539 locals->SwathWidthYThisState[k],
4540 mode_lib->vba.GPUVMEnable,
4541 mode_lib->vba.HostVMEnable,
4542 mode_lib->vba.HostVMMaxPageTableLevels,
4543 mode_lib->vba.HostVMCachedPageTableLevels,
4544 mode_lib->vba.VMMPageSize,
4545 locals->PTEBufferSizeInRequestsForLuma,
4546 mode_lib->vba.PitchY[k],
4547 mode_lib->vba.DCCMetaPitchY[k],
4548 &locals->MacroTileWidthY[k],
4549 &mode_lib->vba.MetaRowBytesY,
4550 &mode_lib->vba.DPTEBytesPerRowY,
4551 &locals->PTEBufferSizeNotExceededY[i][j][k],
4552 locals->dpte_row_width_luma_ub,
4553 &locals->dpte_row_height[k],
4554 &locals->meta_req_width[k],
4555 &locals->meta_req_height[k],
4556 &locals->meta_row_width[k],
4557 &locals->meta_row_height[k],
4558 &locals->vm_group_bytes[k],
4559 &locals->dpte_group_bytes[k],
4560 locals->PixelPTEReqWidthY,
4561 locals->PixelPTEReqHeightY,
4562 locals->PTERequestSizeY,
4563 locals->dpde0_bytes_per_frame_ub_l,
4564 locals->meta_pte_bytes_per_frame_ub_l);
4565 locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
4566 mode_lib,
4567 mode_lib->vba.VRatio[k],
4568 mode_lib->vba.vtaps[k],
4569 mode_lib->vba.Interlace[k],
4570 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4571 locals->SwathHeightYThisState[k],
4572 mode_lib->vba.ViewportYStartY[k],
4573 &locals->PrefillY[k],
4574 &locals->MaxNumSwY[k]);
4575 locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
4576 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
4577 locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
4578 locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
4579
4580 CalculateActiveRowBandwidth(
4581 mode_lib->vba.GPUVMEnable,
4582 mode_lib->vba.SourcePixelFormat[k],
4583 mode_lib->vba.VRatio[k],
4584 mode_lib->vba.DCCEnable[k],
4585 mode_lib->vba.HTotal[k] /
4586 mode_lib->vba.PixelClock[k],
4587 mode_lib->vba.MetaRowBytesY,
4588 mode_lib->vba.MetaRowBytesC,
4589 locals->meta_row_height[k],
4590 locals->meta_row_height_chroma[k],
4591 mode_lib->vba.DPTEBytesPerRowY,
4592 mode_lib->vba.DPTEBytesPerRowC,
4593 locals->dpte_row_height[k],
4594 locals->dpte_row_height_chroma[k],
4595 &locals->meta_row_bw[k],
4596 &locals->dpte_row_bw[k]);
4597 }
4598 mode_lib->vba.ExtraLatency = CalculateExtraLatency(
4599 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i],
4600 locals->TotalNumberOfActiveDPP[i][j],
4601 mode_lib->vba.PixelChunkSizeInKByte,
4602 locals->TotalNumberOfDCCActiveDPP[i][j],
4603 mode_lib->vba.MetaChunkSize,
4604 locals->ReturnBWPerState[i][0],
4605 mode_lib->vba.GPUVMEnable,
4606 mode_lib->vba.HostVMEnable,
4607 mode_lib->vba.NumberOfActivePlanes,
4608 locals->NoOfDPPThisState,
4609 locals->dpte_group_bytes,
4610 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4611 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4612 mode_lib->vba.HostVMMaxPageTableLevels,
4613 mode_lib->vba.HostVMCachedPageTableLevels);
4614
4615 mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
4616 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4617 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4618 if (mode_lib->vba.WritebackEnable[k] == true) {
4619 locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency
4620 + CalculateWriteBackDelay(
4621 mode_lib->vba.WritebackPixelFormat[k],
4622 mode_lib->vba.WritebackHRatio[k],
4623 mode_lib->vba.WritebackVRatio[k],
4624 mode_lib->vba.WritebackLumaHTaps[k],
4625 mode_lib->vba.WritebackLumaVTaps[k],
4626 mode_lib->vba.WritebackChromaHTaps[k],
4627 mode_lib->vba.WritebackChromaVTaps[k],
4628 mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j];
4629 } else {
4630 locals->WritebackDelay[i][k] = 0.0;
4631 }
4632 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4633 if (mode_lib->vba.BlendingAndTiming[m] == k
4634 && mode_lib->vba.WritebackEnable[m]
4635 == true) {
4636 locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k],
4637 mode_lib->vba.WritebackLatency + CalculateWriteBackDelay(
4638 mode_lib->vba.WritebackPixelFormat[m],
4639 mode_lib->vba.WritebackHRatio[m],
4640 mode_lib->vba.WritebackVRatio[m],
4641 mode_lib->vba.WritebackLumaHTaps[m],
4642 mode_lib->vba.WritebackLumaVTaps[m],
4643 mode_lib->vba.WritebackChromaHTaps[m],
4644 mode_lib->vba.WritebackChromaVTaps[m],
4645 mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]);
4646 }
4647 }
4648 }
4649 }
4650 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4651 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4652 if (mode_lib->vba.BlendingAndTiming[k] == m) {
4653 locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m];
4654 }
4655 }
4656 }
4657 mode_lib->vba.MaxMaxVStartup[0][0] = 0;
4658 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4659 locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
4660 - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
4661 mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]);
4662 }
4663
4664 mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
4665 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4666 do {
4667 mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
4668 mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
4669
4670 mode_lib->vba.TWait = CalculateTWait(
4671 mode_lib->vba.PrefetchMode[i][j],
4672 mode_lib->vba.DRAMClockChangeLatency,
4673 mode_lib->vba.UrgentLatency,
4674 mode_lib->vba.SREnterPlusExitTime);
4675 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4676 Pipe myPipe;
4677 HostVM myHostVM;
4678
4679 if (mode_lib->vba.XFCEnabled[k] == true) {
4680 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
4681 CalculateRemoteSurfaceFlipDelay(
4682 mode_lib,
4683 mode_lib->vba.VRatio[k],
4684 locals->SwathWidthYThisState[k],
4685 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
4686 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4687 mode_lib->vba.XFCTSlvVupdateOffset,
4688 mode_lib->vba.XFCTSlvVupdateWidth,
4689 mode_lib->vba.XFCTSlvVreadyOffset,
4690 mode_lib->vba.XFCXBUFLatencyTolerance,
4691 mode_lib->vba.XFCFillBWOverhead,
4692 mode_lib->vba.XFCSlvChunkSize,
4693 mode_lib->vba.XFCBusTransportTime,
4694 mode_lib->vba.TimeCalc,
4695 mode_lib->vba.TWait,
4696 &mode_lib->vba.SrcActiveDrainRate,
4697 &mode_lib->vba.TInitXFill,
4698 &mode_lib->vba.TslvChk);
4699 } else {
4700 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
4701 }
4702
4703 myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
4704 myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
4705 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
4706 myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
4707 myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
4708 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
4709 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
4710 myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
4711 myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
4712 myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
4713 myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
4714 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
4715 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
4716 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
4717 myPipe.HTotal = mode_lib->vba.HTotal[k];
4718
4719
4720 myHostVM.Enable = mode_lib->vba.HostVMEnable;
4721 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
4722 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
4723
4724
4725 mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
4726 mode_lib,
4727 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4728 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4729 &myPipe,
4730 locals->DSCDelayPerState[i][k],
4731 mode_lib->vba.DPPCLKDelaySubtotal,
4732 mode_lib->vba.DPPCLKDelaySCL,
4733 mode_lib->vba.DPPCLKDelaySCLLBOnly,
4734 mode_lib->vba.DPPCLKDelayCNVCFormater,
4735 mode_lib->vba.DPPCLKDelayCNVCCursor,
4736 mode_lib->vba.DISPCLKDelaySubtotal,
4737 locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
4738 mode_lib->vba.OutputFormat[k],
4739 mode_lib->vba.MaxInterDCNTileRepeaters,
4740 dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
4741 locals->MaximumVStartup[0][0][k],
4742 mode_lib->vba.GPUVMMaxPageTableLevels,
4743 mode_lib->vba.GPUVMEnable,
4744 &myHostVM,
4745 mode_lib->vba.DynamicMetadataEnable[k],
4746 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
4747 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
4748 mode_lib->vba.DCCEnable[k],
4749 mode_lib->vba.UrgentLatency,
4750 mode_lib->vba.ExtraLatency,
4751 mode_lib->vba.TimeCalc,
4752 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
4753 locals->MetaRowBytes[0][0][k],
4754 locals->DPTEBytesPerRow[0][0][k],
4755 locals->PrefetchLinesY[0][0][k],
4756 locals->SwathWidthYThisState[k],
4757 locals->BytePerPixelInDETY[k],
4758 locals->PrefillY[k],
4759 locals->MaxNumSwY[k],
4760 locals->PrefetchLinesC[0][0][k],
4761 locals->BytePerPixelInDETC[k],
4762 locals->PrefillC[k],
4763 locals->MaxNumSwC[k],
4764 locals->SwathHeightYThisState[k],
4765 locals->SwathHeightCThisState[k],
4766 mode_lib->vba.TWait,
4767 mode_lib->vba.XFCEnabled[k],
4768 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
4769 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4770 &locals->dst_x_after_scaler,
4771 &locals->dst_y_after_scaler,
4772 &locals->LineTimesForPrefetch[k],
4773 &locals->PrefetchBW[k],
4774 &locals->LinesForMetaPTE[k],
4775 &locals->LinesForMetaAndDPTERow[k],
4776 &locals->VRatioPreY[i][j][k],
4777 &locals->VRatioPreC[i][j][k],
4778 &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
4779 &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
4780 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
4781 &locals->Tno_bw[k],
4782 &locals->prefetch_vmrow_bw[k],
4783 locals->swath_width_luma_ub,
4784 locals->swath_width_chroma_ub,
4785 &mode_lib->vba.VUpdateOffsetPix[k],
4786 &mode_lib->vba.VUpdateWidthPix[k],
4787 &mode_lib->vba.VReadyOffsetPix[k]);
4788 }
4789 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
4790 mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
4791 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4792 unsigned int m;
4793
4794 locals->cursor_bw[k] = 0;
4795 locals->cursor_bw_pre[k] = 0;
4796 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
4797 locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4798 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
4799 locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4800 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k];
4801 }
4802
4803 CalculateUrgentBurstFactor(
4804 mode_lib->vba.DETBufferSizeInKByte,
4805 locals->SwathHeightYThisState[k],
4806 locals->SwathHeightCThisState[k],
4807 locals->SwathWidthYThisState[k],
4808 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4809 mode_lib->vba.UrgentLatency,
4810 mode_lib->vba.CursorBufferSize,
4811 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
4812 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
4813 mode_lib->vba.VRatio[k],
4814 locals->VRatioPreY[i][j][k],
4815 locals->VRatioPreC[i][j][k],
4816 locals->BytePerPixelInDETY[k],
4817 locals->BytePerPixelInDETC[k],
4818 &locals->UrgentBurstFactorCursor[k],
4819 &locals->UrgentBurstFactorCursorPre[k],
4820 &locals->UrgentBurstFactorLuma[k],
4821 &locals->UrgentBurstFactorLumaPre[k],
4822 &locals->UrgentBurstFactorChroma[k],
4823 &locals->UrgentBurstFactorChromaPre[k],
4824 &locals->NotEnoughUrgentLatencyHiding,
4825 &locals->NotEnoughUrgentLatencyHidingPre);
4826
4827 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
4828 locals->UrgentBurstFactorCursor[k] = 1;
4829 locals->UrgentBurstFactorCursorPre[k] = 1;
4830 locals->UrgentBurstFactorLuma[k] = 1;
4831 locals->UrgentBurstFactorLumaPre[k] = 1;
4832 locals->UrgentBurstFactorChroma[k] = 1;
4833 locals->UrgentBurstFactorChromaPre[k] = 1;
4834 }
4835
4836 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch
4837 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k]
4838 * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4839 * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k];
4840 mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch
4841 + dml_max3(locals->prefetch_vmrow_bw[k],
4842 locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4843 * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k]
4844 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
4845 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4846 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4847 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4848 }
4849 locals->BandwidthWithoutPrefetchSupported[i][0] = true;
4850 if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]
4851 || locals->NotEnoughUrgentLatencyHiding == 1) {
4852 locals->BandwidthWithoutPrefetchSupported[i][0] = false;
4853 }
4854
4855 locals->PrefetchSupported[i][j] = true;
4856 if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]
4857 || locals->NotEnoughUrgentLatencyHiding == 1
4858 || locals->NotEnoughUrgentLatencyHidingPre == 1) {
4859 locals->PrefetchSupported[i][j] = false;
4860 }
4861 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4862 if (locals->LineTimesForPrefetch[k] < 2.0
4863 || locals->LinesForMetaPTE[k] >= 32.0
4864 || locals->LinesForMetaAndDPTERow[k] >= 16.0
4865 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4866 locals->PrefetchSupported[i][j] = false;
4867 }
4868 }
4869 locals->VRatioInPrefetchSupported[i][j] = true;
4870 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4871 if (locals->VRatioPreY[i][j][k] > 4.0
4872 || locals->VRatioPreC[i][j][k] > 4.0
4873 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4874 locals->VRatioInPrefetchSupported[i][j] = false;
4875 }
4876 }
4877 mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
4878 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4879 if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) {
4880 mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
4881 }
4882 }
4883
4884 if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
4885 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4886 mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
4887 } else {
4888 mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
4889 }
4890 } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
4891 && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
4892 || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
4893
4894 if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
4895 mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
4896 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4897 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
4898 - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4899 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4900 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4901 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4902 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4903 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4904 }
4905 mode_lib->vba.TotImmediateFlipBytes = 0.0;
4906 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4907 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
4908 + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k];
4909 }
4910
4911 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4912 CalculateFlipSchedule(
4913 mode_lib,
4914 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4915 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4916 mode_lib->vba.ExtraLatency,
4917 mode_lib->vba.UrgentLatency,
4918 mode_lib->vba.GPUVMMaxPageTableLevels,
4919 mode_lib->vba.HostVMEnable,
4920 mode_lib->vba.HostVMMaxPageTableLevels,
4921 mode_lib->vba.HostVMCachedPageTableLevels,
4922 mode_lib->vba.GPUVMEnable,
4923 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
4924 locals->MetaRowBytes[0][0][k],
4925 locals->DPTEBytesPerRow[0][0][k],
4926 mode_lib->vba.BandwidthAvailableForImmediateFlip,
4927 mode_lib->vba.TotImmediateFlipBytes,
4928 mode_lib->vba.SourcePixelFormat[k],
4929 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4930 mode_lib->vba.VRatio[k],
4931 locals->Tno_bw[k],
4932 mode_lib->vba.DCCEnable[k],
4933 locals->dpte_row_height[k],
4934 locals->meta_row_height[k],
4935 locals->dpte_row_height_chroma[k],
4936 locals->meta_row_height_chroma[k],
4937 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
4938 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
4939 &locals->final_flip_bw[k],
4940 &locals->ImmediateFlipSupportedForPipe[k]);
4941 }
4942 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
4943 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4944 mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
4945 locals->prefetch_vmrow_bw[k],
4946 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4947 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4948 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4949 locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k]
4950 * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k]
4951 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k]
4952 * locals->UrgentBurstFactorCursorPre[k]);
4953 }
4954 locals->ImmediateFlipSupportedForState[i][j] = true;
4955 if (mode_lib->vba.total_dcn_read_bw_with_flip
4956 > locals->ReturnBWPerState[i][0]) {
4957 locals->ImmediateFlipSupportedForState[i][j] = false;
4958 }
4959 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4960 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
4961 locals->ImmediateFlipSupportedForState[i][j] = false;
4962 }
4963 }
4964 } else {
4965 locals->ImmediateFlipSupportedForState[i][j] = false;
4966 }
4967 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
4968 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4969 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4970 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
4971 CalculateWatermarksAndDRAMSpeedChangeSupport(
4972 mode_lib,
4973 mode_lib->vba.PrefetchMode[i][j],
4974 mode_lib->vba.NumberOfActivePlanes,
4975 mode_lib->vba.MaxLineBufferLines,
4976 mode_lib->vba.LineBufferSize,
4977 mode_lib->vba.DPPOutputBufferPixels,
4978 mode_lib->vba.DETBufferSizeInKByte,
4979 mode_lib->vba.WritebackInterfaceLumaBufferSize,
4980 mode_lib->vba.WritebackInterfaceChromaBufferSize,
4981 mode_lib->vba.DCFCLKPerState[i],
4982 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
4983 locals->ReturnBWPerState[i][0],
4984 mode_lib->vba.GPUVMEnable,
4985 locals->dpte_group_bytes,
4986 mode_lib->vba.MetaChunkSize,
4987 mode_lib->vba.UrgentLatency,
4988 mode_lib->vba.ExtraLatency,
4989 mode_lib->vba.WritebackLatency,
4990 mode_lib->vba.WritebackChunkSize,
4991 mode_lib->vba.SOCCLKPerState[i],
4992 mode_lib->vba.DRAMClockChangeLatency,
4993 mode_lib->vba.SRExitTime,
4994 mode_lib->vba.SREnterPlusExitTime,
4995 mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
4996 locals->NoOfDPPThisState,
4997 mode_lib->vba.DCCEnable,
4998 locals->RequiredDPPCLKThisState,
4999 locals->SwathWidthYSingleDPP,
5000 locals->SwathHeightYThisState,
5001 locals->ReadBandwidthLuma,
5002 locals->SwathHeightCThisState,
5003 locals->ReadBandwidthChroma,
5004 mode_lib->vba.LBBitPerPixel,
5005 locals->SwathWidthYThisState,
5006 mode_lib->vba.HRatio,
5007 mode_lib->vba.vtaps,
5008 mode_lib->vba.VTAPsChroma,
5009 mode_lib->vba.VRatio,
5010 mode_lib->vba.HTotal,
5011 mode_lib->vba.PixelClock,
5012 mode_lib->vba.BlendingAndTiming,
5013 locals->BytePerPixelInDETY,
5014 locals->BytePerPixelInDETC,
5015 mode_lib->vba.WritebackEnable,
5016 mode_lib->vba.WritebackPixelFormat,
5017 mode_lib->vba.WritebackDestinationWidth,
5018 mode_lib->vba.WritebackDestinationHeight,
5019 mode_lib->vba.WritebackSourceHeight,
5020 &locals->DRAMClockChangeSupport[i][j],
5021 &mode_lib->vba.UrgentWatermark,
5022 &mode_lib->vba.WritebackUrgentWatermark,
5023 &mode_lib->vba.DRAMClockChangeWatermark,
5024 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
5025 &mode_lib->vba.StutterExitWatermark,
5026 &mode_lib->vba.StutterEnterPlusExitWatermark,
5027 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
5028 }
5029 }
5030
5031 /*Vertical Active BW support*/
5032 {
5033 double MaxTotalVActiveRDBandwidth = 0.0;
5034 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
5035 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
5036 }
5037 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
5038 locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(
5039 locals->IdealSDPPortBandwidthPerState[i][0] *
5040 mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
5041 / 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
5042 mode_lib->vba.NumberOfChannels *
5043 mode_lib->vba.DRAMChannelWidth *
5044 mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
5045 / 100.0);
5046
5047 if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) {
5048 locals->TotalVerticalActiveBandwidthSupport[i][0] = true;
5049 } else {
5050 locals->TotalVerticalActiveBandwidthSupport[i][0] = false;
5051 }
5052 }
5053 }
5054
5055 /*PTE Buffer Size Check*/
5056
5057 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
5058 for (j = 0; j < 2; j++) {
5059 locals->PTEBufferSizeNotExceeded[i][j] = true;
5060 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5061 if (locals->PTEBufferSizeNotExceededY[i][j][k] == false
5062 || locals->PTEBufferSizeNotExceededC[i][j][k] == false) {
5063 locals->PTEBufferSizeNotExceeded[i][j] = false;
5064 }
5065 }
5066 }
5067 }
5068 /*Cursor Support Check*/
5069
5070 mode_lib->vba.CursorSupport = true;
5071 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5072 if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
5073 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
5074 if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) {
5075 mode_lib->vba.CursorSupport = false;
5076 }
5077 }
5078 }
5079 }
5080 /*Valid Pitch Check*/
5081
5082 mode_lib->vba.PitchSupport = true;
5083 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5084 locals->AlignedYPitch[k] = dml_ceil(
5085 dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]),
5086 locals->MacroTileWidthY[k]);
5087 if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) {
5088 mode_lib->vba.PitchSupport = false;
5089 }
5090 if (mode_lib->vba.DCCEnable[k] == true) {
5091 locals->AlignedDCCMetaPitch[k] = dml_ceil(
5092 dml_max(
5093 mode_lib->vba.DCCMetaPitchY[k],
5094 mode_lib->vba.ViewportWidth[k]),
5095 64.0 * locals->Read256BlockWidthY[k]);
5096 } else {
5097 locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k];
5098 }
5099 if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) {
5100 mode_lib->vba.PitchSupport = false;
5101 }
5102 if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
5103 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
5104 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
5105 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
5106 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
5107 locals->AlignedCPitch[k] = dml_ceil(
5108 dml_max(
5109 mode_lib->vba.PitchC[k],
5110 mode_lib->vba.ViewportWidth[k] / 2.0),
5111 locals->MacroTileWidthC[k]);
5112 } else {
5113 locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k];
5114 }
5115 if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) {
5116 mode_lib->vba.PitchSupport = false;
5117 }
5118 }
5119 /*Mode Support, Voltage State and SOC Configuration*/
5120
5121 for (i = mode_lib->vba.soc.num_states; i >= 0; i--) {
5122 for (j = 0; j < 2; j++) {
5123 enum dm_validation_status status = DML_VALIDATION_OK;
5124
5125 if (!mode_lib->vba.ScaleRatioAndTapsSupport) {
5126 status = DML_FAIL_SCALE_RATIO_TAP;
5127 } else if (!mode_lib->vba.SourceFormatPixelAndScanSupport) {
5128 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5129 } else if (!locals->ViewportSizeSupport[i][0]) {
5130 status = DML_FAIL_VIEWPORT_SIZE;
5131 } else if (!locals->DIOSupport[i]) {
5132 status = DML_FAIL_DIO_SUPPORT;
5133 } else if (locals->NotEnoughDSCUnits[i]) {
5134 status = DML_FAIL_NOT_ENOUGH_DSC;
5135 } else if (locals->DSCCLKRequiredMoreThanSupported[i]) {
5136 status = DML_FAIL_DSC_CLK_REQUIRED;
5137 } else if (!locals->ROBSupport[i][0]) {
5138 status = DML_FAIL_REORDERING_BUFFER;
5139 } else if (!locals->DISPCLK_DPPCLK_Support[i][j]) {
5140 status = DML_FAIL_DISPCLK_DPPCLK;
5141 } else if (!locals->TotalAvailablePipesSupport[i][j]) {
5142 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5143 } else if (!mode_lib->vba.NumberOfOTGSupport) {
5144 status = DML_FAIL_NUM_OTG;
5145 } else if (!mode_lib->vba.WritebackModeSupport) {
5146 status = DML_FAIL_WRITEBACK_MODE;
5147 } else if (!mode_lib->vba.WritebackLatencySupport) {
5148 status = DML_FAIL_WRITEBACK_LATENCY;
5149 } else if (!mode_lib->vba.WritebackScaleRatioAndTapsSupport) {
5150 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5151 } else if (!mode_lib->vba.CursorSupport) {
5152 status = DML_FAIL_CURSOR_SUPPORT;
5153 } else if (!mode_lib->vba.PitchSupport) {
5154 status = DML_FAIL_PITCH_SUPPORT;
5155 } else if (!locals->TotalVerticalActiveBandwidthSupport[i][0]) {
5156 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5157 } else if (!locals->PTEBufferSizeNotExceeded[i][j]) {
5158 status = DML_FAIL_PTE_BUFFER_SIZE;
5159 } else if (mode_lib->vba.NonsupportedDSCInputBPC) {
5160 status = DML_FAIL_DSC_INPUT_BPC;
5161 } else if ((mode_lib->vba.HostVMEnable
5162 && !locals->ImmediateFlipSupportedForState[i][j])) {
5163 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5164 } else if (!locals->PrefetchSupported[i][j]) {
5165 status = DML_FAIL_PREFETCH_SUPPORT;
5166 } else if (!locals->VRatioInPrefetchSupported[i][j]) {
5167 status = DML_FAIL_V_RATIO_PREFETCH;
5168 }
5169
5170 if (status == DML_VALIDATION_OK) {
5171 locals->ModeSupport[i][j] = true;
5172 } else {
5173 locals->ModeSupport[i][j] = false;
5174 }
5175 locals->ValidationStatus[i] = status;
5176 }
5177 }
5178 {
5179 unsigned int MaximumMPCCombine = 0;
5180 mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1;
5181 for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) {
5182 if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) {
5183 mode_lib->vba.VoltageLevel = i;
5184 if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false
5185 || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible
5186 || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks
5187 && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive
5188 && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive)
5189 || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank
5190 && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) {
5191 MaximumMPCCombine = 1;
5192 } else {
5193 MaximumMPCCombine = 0;
5194 }
5195 break;
5196 }
5197 }
5198 mode_lib->vba.ImmediateFlipSupport =
5199 locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5200 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5201 mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5202 locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5203 }
5204 mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5205 mode_lib->vba.maxMpcComb = MaximumMPCCombine;
5206 }
5207 mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel];
5208 mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
5209 mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
5210 mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
5211 mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
5212 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5213 if (mode_lib->vba.BlendingAndTiming[k] == k) {
5214 mode_lib->vba.ODMCombineEnabled[k] =
5215 locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
5216 } else {
5217 mode_lib->vba.ODMCombineEnabled[k] = false;
5218 }
5219 mode_lib->vba.DSCEnabled[k] =
5220 locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
5221 mode_lib->vba.OutputBpp[k] =
5222 locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k];
5223 }
5224 }
5225
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,double DETBufferSizeInKByte,unsigned int WritebackInterfaceLumaBufferSize,unsigned int WritebackInterfaceChromaBufferSize,double DCFCLK,double UrgentOutOfOrderReturn,double ReturnBW,bool GPUVMEnable,int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],double SwathWidthSingleDPPY[],unsigned int SwathHeightY[],double ReadBandwidthPlaneLuma[],unsigned int SwathHeightC[],double ReadBandwidthPlaneChroma[],unsigned int LBBitPerPixel[],double SwathWidthY[],double HRatio[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5226 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5227 struct display_mode_lib *mode_lib,
5228 unsigned int PrefetchMode,
5229 unsigned int NumberOfActivePlanes,
5230 unsigned int MaxLineBufferLines,
5231 unsigned int LineBufferSize,
5232 unsigned int DPPOutputBufferPixels,
5233 double DETBufferSizeInKByte,
5234 unsigned int WritebackInterfaceLumaBufferSize,
5235 unsigned int WritebackInterfaceChromaBufferSize,
5236 double DCFCLK,
5237 double UrgentOutOfOrderReturn,
5238 double ReturnBW,
5239 bool GPUVMEnable,
5240 int dpte_group_bytes[],
5241 unsigned int MetaChunkSize,
5242 double UrgentLatency,
5243 double ExtraLatency,
5244 double WritebackLatency,
5245 double WritebackChunkSize,
5246 double SOCCLK,
5247 double DRAMClockChangeLatency,
5248 double SRExitTime,
5249 double SREnterPlusExitTime,
5250 double DCFCLKDeepSleep,
5251 int DPPPerPlane[],
5252 bool DCCEnable[],
5253 double DPPCLK[],
5254 double SwathWidthSingleDPPY[],
5255 unsigned int SwathHeightY[],
5256 double ReadBandwidthPlaneLuma[],
5257 unsigned int SwathHeightC[],
5258 double ReadBandwidthPlaneChroma[],
5259 unsigned int LBBitPerPixel[],
5260 double SwathWidthY[],
5261 double HRatio[],
5262 unsigned int vtaps[],
5263 unsigned int VTAPsChroma[],
5264 double VRatio[],
5265 unsigned int HTotal[],
5266 double PixelClock[],
5267 unsigned int BlendingAndTiming[],
5268 double BytePerPixelDETY[],
5269 double BytePerPixelDETC[],
5270 bool WritebackEnable[],
5271 enum source_format_class WritebackPixelFormat[],
5272 double WritebackDestinationWidth[],
5273 double WritebackDestinationHeight[],
5274 double WritebackSourceHeight[],
5275 enum clock_change_support *DRAMClockChangeSupport,
5276 double *UrgentWatermark,
5277 double *WritebackUrgentWatermark,
5278 double *DRAMClockChangeWatermark,
5279 double *WritebackDRAMClockChangeWatermark,
5280 double *StutterExitWatermark,
5281 double *StutterEnterPlusExitWatermark,
5282 double *MinActiveDRAMClockChangeLatencySupported)
5283 {
5284 double EffectiveLBLatencyHidingY;
5285 double EffectiveLBLatencyHidingC;
5286 double DPPOutputBufferLinesY;
5287 double DPPOutputBufferLinesC;
5288 double DETBufferSizeY;
5289 double DETBufferSizeC;
5290 double LinesInDETY[DC__NUM_DPP__MAX];
5291 double LinesInDETC;
5292 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5293 unsigned int LinesInDETCRoundedDownToSwath;
5294 double FullDETBufferingTimeY[DC__NUM_DPP__MAX];
5295 double FullDETBufferingTimeC;
5296 double ActiveDRAMClockChangeLatencyMarginY;
5297 double ActiveDRAMClockChangeLatencyMarginC;
5298 double WritebackDRAMClockChangeLatencyMargin;
5299 double PlaneWithMinActiveDRAMClockChangeMargin;
5300 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5301 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5302 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5303 unsigned int k, j;
5304
5305 mode_lib->vba.TotalActiveDPP = 0;
5306 mode_lib->vba.TotalDCCActiveDPP = 0;
5307 for (k = 0; k < NumberOfActivePlanes; ++k) {
5308 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5309 if (DCCEnable[k] == true) {
5310 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5311 }
5312 }
5313
5314 mode_lib->vba.TotalDataReadBandwidth = 0;
5315 for (k = 0; k < NumberOfActivePlanes; ++k) {
5316 mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth
5317 + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
5318 }
5319
5320 *UrgentWatermark = UrgentLatency + ExtraLatency;
5321
5322 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5323
5324 mode_lib->vba.TotalActiveWriteback = 0;
5325 for (k = 0; k < NumberOfActivePlanes; ++k) {
5326 if (WritebackEnable[k] == true) {
5327 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5328 }
5329 }
5330
5331 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5332 *WritebackUrgentWatermark = WritebackLatency;
5333 } else {
5334 *WritebackUrgentWatermark = WritebackLatency
5335 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5336 }
5337
5338 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5339 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5340 } else {
5341 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency
5342 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5343 }
5344
5345 for (k = 0; k < NumberOfActivePlanes; ++k) {
5346
5347 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines,
5348 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1))
5349 - (vtaps[k] - 1);
5350
5351 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines,
5352 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1))
5353 - (VTAPsChroma[k] - 1);
5354
5355 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k]
5356 * (HTotal[k] / PixelClock[k]);
5357
5358 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC
5359 / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]);
5360
5361 if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) {
5362 DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k];
5363 } else if (SwathWidthY[k] > DPPOutputBufferPixels) {
5364 DPPOutputBufferLinesY = 0.5;
5365 } else {
5366 DPPOutputBufferLinesY = 1;
5367 }
5368
5369 if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) {
5370 DPPOutputBufferLinesC = (double) DPPOutputBufferPixels
5371 / (SwathWidthY[k] / 2.0);
5372 } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) {
5373 DPPOutputBufferLinesC = 0.5;
5374 } else {
5375 DPPOutputBufferLinesC = 1;
5376 }
5377
5378 CalculateDETBufferSize(
5379 DETBufferSizeInKByte,
5380 SwathHeightY[k],
5381 SwathHeightC[k],
5382 &DETBufferSizeY,
5383 &DETBufferSizeC);
5384
5385 LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5386 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5387 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k]
5388 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5389 if (BytePerPixelDETC[k] > 0) {
5390 LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
5391 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5392 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath
5393 * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2);
5394 } else {
5395 LinesInDETC = 0;
5396 FullDETBufferingTimeC = 999999;
5397 }
5398
5399 ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k]
5400 * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY
5401 + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark;
5402
5403 if (NumberOfActivePlanes > 1) {
5404 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5405 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5406 }
5407
5408 if (BytePerPixelDETC[k] > 0) {
5409 ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k]
5410 * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC
5411 + FullDETBufferingTimeC - *DRAMClockChangeWatermark;
5412 if (NumberOfActivePlanes > 1) {
5413 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5414 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2);
5415 }
5416 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5417 ActiveDRAMClockChangeLatencyMarginY,
5418 ActiveDRAMClockChangeLatencyMarginC);
5419 } else {
5420 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5421 }
5422
5423 if (WritebackEnable[k] == true) {
5424 if (WritebackPixelFormat[k] == dm_444_32) {
5425 WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize
5426 + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k]
5427 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k]
5428 / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark;
5429 } else {
5430 WritebackDRAMClockChangeLatencyMargin = dml_min(
5431 WritebackInterfaceLumaBufferSize * 8.0 / 10,
5432 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k]
5433 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]))
5434 - *WritebackDRAMClockChangeWatermark;
5435 }
5436 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5437 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k],
5438 WritebackDRAMClockChangeLatencyMargin);
5439 }
5440 }
5441
5442 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5443 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5444 for (k = 0; k < NumberOfActivePlanes; ++k) {
5445 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5446 < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5447 mode_lib->vba.MinActiveDRAMClockChangeMargin =
5448 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5449 if (BlendingAndTiming[k] == k) {
5450 PlaneWithMinActiveDRAMClockChangeMargin = k;
5451 } else {
5452 for (j = 0; j < NumberOfActivePlanes; ++j) {
5453 if (BlendingAndTiming[k] == j) {
5454 PlaneWithMinActiveDRAMClockChangeMargin = j;
5455 }
5456 }
5457 }
5458 }
5459 }
5460
5461 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5462
5463 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5464 for (k = 0; k < NumberOfActivePlanes; ++k) {
5465 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k))
5466 && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5467 && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5468 < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5469 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank =
5470 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5471 }
5472 }
5473
5474 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5475 for (k = 0; k < NumberOfActivePlanes; ++k) {
5476 if (BlendingAndTiming[k] == k) {
5477 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5478 }
5479 }
5480
5481 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5482 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5483 } else if (((mode_lib->vba.SynchronizedVBlank == true
5484 || mode_lib->vba.TotalNumberOfActiveOTG == 1
5485 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0)
5486 && PrefetchMode == 0)) {
5487 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5488 } else {
5489 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5490 }
5491
5492 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5493 for (k = 0; k < NumberOfActivePlanes; ++k) {
5494 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5495 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k]
5496 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]))
5497 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5498 }
5499 }
5500
5501 *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5502 + ExtraLatency + 10 / DCFCLKDeepSleep;
5503 *StutterEnterPlusExitWatermark = dml_max(
5504 SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5505 + ExtraLatency + 10 / DCFCLKDeepSleep,
5506 TimeToFinishSwathTransferStutterCriticalPlane);
5507
5508 }
5509
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,double BytePerPixelDETY[],double BytePerPixelDETC[],double VRatio[],double SwathWidthY[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double * DCFCLKDeepSleep)5510 static void CalculateDCFCLKDeepSleep(
5511 struct display_mode_lib *mode_lib,
5512 unsigned int NumberOfActivePlanes,
5513 double BytePerPixelDETY[],
5514 double BytePerPixelDETC[],
5515 double VRatio[],
5516 double SwathWidthY[],
5517 int DPPPerPlane[],
5518 double HRatio[],
5519 double PixelClock[],
5520 double PSCL_THROUGHPUT[],
5521 double PSCL_THROUGHPUT_CHROMA[],
5522 double DPPCLK[],
5523 double *DCFCLKDeepSleep)
5524 {
5525 unsigned int k;
5526 double DisplayPipeLineDeliveryTimeLuma;
5527 double DisplayPipeLineDeliveryTimeChroma;
5528 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5529
5530 for (k = 0; k < NumberOfActivePlanes; ++k) {
5531 if (VRatio[k] <= 1) {
5532 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k]
5533 / HRatio[k] / PixelClock[k];
5534 } else {
5535 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k]
5536 / DPPCLK[k];
5537 }
5538 if (BytePerPixelDETC[k] == 0) {
5539 DisplayPipeLineDeliveryTimeChroma = 0;
5540 } else {
5541 if (VRatio[k] / 2 <= 1) {
5542 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5543 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5544 } else {
5545 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5546 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5547 }
5548 }
5549
5550 if (BytePerPixelDETC[k] > 0) {
5551 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5552 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1)
5553 / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5554 1.1 * SwathWidthY[k] / 2.0
5555 * dml_ceil(BytePerPixelDETC[k], 2) / 32.0
5556 / DisplayPipeLineDeliveryTimeChroma);
5557 } else {
5558 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k]
5559 * dml_ceil(BytePerPixelDETY[k], 1) / 64.0
5560 / DisplayPipeLineDeliveryTimeLuma;
5561 }
5562 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5563 mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
5564 PixelClock[k] / 16);
5565
5566 }
5567
5568 *DCFCLKDeepSleep = 8;
5569 for (k = 0; k < NumberOfActivePlanes; ++k) {
5570 *DCFCLKDeepSleep = dml_max(
5571 *DCFCLKDeepSleep,
5572 mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5573 }
5574 }
5575
CalculateDETBufferSize(double DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double * DETBufferSizeY,double * DETBufferSizeC)5576 static void CalculateDETBufferSize(
5577 double DETBufferSizeInKByte,
5578 unsigned int SwathHeightY,
5579 unsigned int SwathHeightC,
5580 double *DETBufferSizeY,
5581 double *DETBufferSizeC)
5582 {
5583 if (SwathHeightC == 0) {
5584 *DETBufferSizeY = DETBufferSizeInKByte * 1024;
5585 *DETBufferSizeC = 0;
5586 } else if (SwathHeightY <= SwathHeightC) {
5587 *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2;
5588 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2;
5589 } else {
5590 *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3;
5591 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3;
5592 }
5593 }
5594
CalculateUrgentBurstFactor(unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,unsigned int SwathWidthY,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioPreY,double VRatioPreC,double BytePerPixelInDETY,double BytePerPixelInDETC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorCursorPre,double * UrgentBurstFactorLuma,double * UrgentBurstFactorLumaPre,double * UrgentBurstFactorChroma,double * UrgentBurstFactorChromaPre,unsigned int * NotEnoughUrgentLatencyHiding,unsigned int * NotEnoughUrgentLatencyHidingPre)5595 static void CalculateUrgentBurstFactor(
5596 unsigned int DETBufferSizeInKByte,
5597 unsigned int SwathHeightY,
5598 unsigned int SwathHeightC,
5599 unsigned int SwathWidthY,
5600 double LineTime,
5601 double UrgentLatency,
5602 double CursorBufferSize,
5603 unsigned int CursorWidth,
5604 unsigned int CursorBPP,
5605 double VRatio,
5606 double VRatioPreY,
5607 double VRatioPreC,
5608 double BytePerPixelInDETY,
5609 double BytePerPixelInDETC,
5610 double *UrgentBurstFactorCursor,
5611 double *UrgentBurstFactorCursorPre,
5612 double *UrgentBurstFactorLuma,
5613 double *UrgentBurstFactorLumaPre,
5614 double *UrgentBurstFactorChroma,
5615 double *UrgentBurstFactorChromaPre,
5616 unsigned int *NotEnoughUrgentLatencyHiding,
5617 unsigned int *NotEnoughUrgentLatencyHidingPre)
5618 {
5619 double LinesInDETLuma;
5620 double LinesInDETChroma;
5621 unsigned int LinesInCursorBuffer;
5622 double CursorBufferSizeInTime;
5623 double CursorBufferSizeInTimePre;
5624 double DETBufferSizeInTimeLuma;
5625 double DETBufferSizeInTimeLumaPre;
5626 double DETBufferSizeInTimeChroma;
5627 double DETBufferSizeInTimeChromaPre;
5628 double DETBufferSizeY;
5629 double DETBufferSizeC;
5630
5631 *NotEnoughUrgentLatencyHiding = 0;
5632 *NotEnoughUrgentLatencyHidingPre = 0;
5633
5634 if (CursorWidth > 0) {
5635 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(
5636 dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5637 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5638 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5639 *NotEnoughUrgentLatencyHiding = 1;
5640 *UrgentBurstFactorCursor = 0;
5641 } else {
5642 *UrgentBurstFactorCursor = CursorBufferSizeInTime
5643 / (CursorBufferSizeInTime - UrgentLatency);
5644 }
5645 if (VRatioPreY > 0) {
5646 CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY;
5647 if (CursorBufferSizeInTimePre - UrgentLatency <= 0) {
5648 *NotEnoughUrgentLatencyHidingPre = 1;
5649 *UrgentBurstFactorCursorPre = 0;
5650 } else {
5651 *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre
5652 / (CursorBufferSizeInTimePre - UrgentLatency);
5653 }
5654 } else {
5655 *UrgentBurstFactorCursorPre = 1;
5656 }
5657 }
5658
5659 CalculateDETBufferSize(
5660 DETBufferSizeInKByte,
5661 SwathHeightY,
5662 SwathHeightC,
5663 &DETBufferSizeY,
5664 &DETBufferSizeC);
5665
5666 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
5667 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5668 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5669 *NotEnoughUrgentLatencyHiding = 1;
5670 *UrgentBurstFactorLuma = 0;
5671 } else {
5672 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma
5673 / (DETBufferSizeInTimeLuma - UrgentLatency);
5674 }
5675 if (VRatioPreY > 0) {
5676 DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime
5677 / VRatioPreY;
5678 if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) {
5679 *NotEnoughUrgentLatencyHidingPre = 1;
5680 *UrgentBurstFactorLumaPre = 0;
5681 } else {
5682 *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre
5683 / (DETBufferSizeInTimeLumaPre - UrgentLatency);
5684 }
5685 } else {
5686 *UrgentBurstFactorLumaPre = 1;
5687 }
5688
5689 if (BytePerPixelInDETC > 0) {
5690 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
5691 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime
5692 / (VRatio / 2);
5693 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5694 *NotEnoughUrgentLatencyHiding = 1;
5695 *UrgentBurstFactorChroma = 0;
5696 } else {
5697 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
5698 / (DETBufferSizeInTimeChroma - UrgentLatency);
5699 }
5700 if (VRatioPreC > 0) {
5701 DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC)
5702 * LineTime / VRatioPreC;
5703 if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) {
5704 *NotEnoughUrgentLatencyHidingPre = 1;
5705 *UrgentBurstFactorChromaPre = 0;
5706 } else {
5707 *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre
5708 / (DETBufferSizeInTimeChromaPre - UrgentLatency);
5709 }
5710 } else {
5711 *UrgentBurstFactorChromaPre = 1;
5712 }
5713 }
5714 }
5715
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])5716 static void CalculatePixelDeliveryTimes(
5717 unsigned int NumberOfActivePlanes,
5718 double VRatio[],
5719 double VRatioPrefetchY[],
5720 double VRatioPrefetchC[],
5721 unsigned int swath_width_luma_ub[],
5722 unsigned int swath_width_chroma_ub[],
5723 int DPPPerPlane[],
5724 double HRatio[],
5725 double PixelClock[],
5726 double PSCL_THROUGHPUT[],
5727 double PSCL_THROUGHPUT_CHROMA[],
5728 double DPPCLK[],
5729 double BytePerPixelDETC[],
5730 enum scan_direction_class SourceScan[],
5731 unsigned int BlockWidth256BytesY[],
5732 unsigned int BlockHeight256BytesY[],
5733 unsigned int BlockWidth256BytesC[],
5734 unsigned int BlockHeight256BytesC[],
5735 double DisplayPipeLineDeliveryTimeLuma[],
5736 double DisplayPipeLineDeliveryTimeChroma[],
5737 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5738 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5739 double DisplayPipeRequestDeliveryTimeLuma[],
5740 double DisplayPipeRequestDeliveryTimeChroma[],
5741 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5742 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
5743 {
5744 double req_per_swath_ub;
5745 unsigned int k;
5746
5747 for (k = 0; k < NumberOfActivePlanes; ++k) {
5748 if (VRatio[k] <= 1) {
5749 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k]
5750 / HRatio[k] / PixelClock[k];
5751 } else {
5752 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k]
5753 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5754 }
5755
5756 if (BytePerPixelDETC[k] == 0) {
5757 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5758 } else {
5759 if (VRatio[k] / 2 <= 1) {
5760 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5761 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5762 } else {
5763 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5764 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5765 }
5766 }
5767
5768 if (VRatioPrefetchY[k] <= 1) {
5769 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5770 * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5771 } else {
5772 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5773 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5774 }
5775
5776 if (BytePerPixelDETC[k] == 0) {
5777 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5778 } else {
5779 if (VRatioPrefetchC[k] <= 1) {
5780 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5781 swath_width_chroma_ub[k] * DPPPerPlane[k]
5782 / (HRatio[k] / 2) / PixelClock[k];
5783 } else {
5784 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5785 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5786 }
5787 }
5788 }
5789
5790 for (k = 0; k < NumberOfActivePlanes; ++k) {
5791 if (SourceScan[k] == dm_horz) {
5792 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5793 } else {
5794 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5795 }
5796 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k]
5797 / req_per_swath_ub;
5798 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
5799 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5800 if (BytePerPixelDETC[k] == 0) {
5801 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5802 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5803 } else {
5804 if (SourceScan[k] == dm_horz) {
5805 req_per_swath_ub = swath_width_chroma_ub[k]
5806 / BlockWidth256BytesC[k];
5807 } else {
5808 req_per_swath_ub = swath_width_chroma_ub[k]
5809 / BlockHeight256BytesC[k];
5810 }
5811 DisplayPipeRequestDeliveryTimeChroma[k] =
5812 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5813 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
5814 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5815 }
5816 }
5817 }
5818
CalculateMetaAndPTETimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],double BytePerPixelDETY[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_height[],unsigned int meta_req_width[],unsigned int meta_req_height[],int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double TimePerMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5819 static void CalculateMetaAndPTETimes(
5820 unsigned int NumberOfActivePlanes,
5821 bool GPUVMEnable,
5822 unsigned int MetaChunkSize,
5823 unsigned int MinMetaChunkSizeBytes,
5824 unsigned int GPUVMMaxPageTableLevels,
5825 unsigned int HTotal[],
5826 double VRatio[],
5827 double VRatioPrefetchY[],
5828 double VRatioPrefetchC[],
5829 double DestinationLinesToRequestRowInVBlank[],
5830 double DestinationLinesToRequestRowInImmediateFlip[],
5831 double DestinationLinesToRequestVMInVBlank[],
5832 double DestinationLinesToRequestVMInImmediateFlip[],
5833 bool DCCEnable[],
5834 double PixelClock[],
5835 double BytePerPixelDETY[],
5836 double BytePerPixelDETC[],
5837 enum scan_direction_class SourceScan[],
5838 unsigned int dpte_row_height[],
5839 unsigned int dpte_row_height_chroma[],
5840 unsigned int meta_row_width[],
5841 unsigned int meta_row_height[],
5842 unsigned int meta_req_width[],
5843 unsigned int meta_req_height[],
5844 int dpte_group_bytes[],
5845 unsigned int PTERequestSizeY[],
5846 unsigned int PTERequestSizeC[],
5847 unsigned int PixelPTEReqWidthY[],
5848 unsigned int PixelPTEReqHeightY[],
5849 unsigned int PixelPTEReqWidthC[],
5850 unsigned int PixelPTEReqHeightC[],
5851 unsigned int dpte_row_width_luma_ub[],
5852 unsigned int dpte_row_width_chroma_ub[],
5853 unsigned int vm_group_bytes[],
5854 unsigned int dpde0_bytes_per_frame_ub_l[],
5855 unsigned int dpde0_bytes_per_frame_ub_c[],
5856 unsigned int meta_pte_bytes_per_frame_ub_l[],
5857 unsigned int meta_pte_bytes_per_frame_ub_c[],
5858 double DST_Y_PER_PTE_ROW_NOM_L[],
5859 double DST_Y_PER_PTE_ROW_NOM_C[],
5860 double DST_Y_PER_META_ROW_NOM_L[],
5861 double TimePerMetaChunkNominal[],
5862 double TimePerMetaChunkVBlank[],
5863 double TimePerMetaChunkFlip[],
5864 double time_per_pte_group_nom_luma[],
5865 double time_per_pte_group_vblank_luma[],
5866 double time_per_pte_group_flip_luma[],
5867 double time_per_pte_group_nom_chroma[],
5868 double time_per_pte_group_vblank_chroma[],
5869 double time_per_pte_group_flip_chroma[],
5870 double TimePerVMGroupVBlank[],
5871 double TimePerVMGroupFlip[],
5872 double TimePerVMRequestVBlank[],
5873 double TimePerVMRequestFlip[])
5874 {
5875 unsigned int meta_chunk_width;
5876 unsigned int min_meta_chunk_width;
5877 unsigned int meta_chunk_per_row_int;
5878 unsigned int meta_row_remainder;
5879 unsigned int meta_chunk_threshold;
5880 unsigned int meta_chunks_per_row_ub;
5881 unsigned int dpte_group_width_luma;
5882 unsigned int dpte_group_width_chroma;
5883 unsigned int dpte_groups_per_row_luma_ub;
5884 unsigned int dpte_groups_per_row_chroma_ub;
5885 unsigned int num_group_per_lower_vm_stage;
5886 unsigned int num_req_per_lower_vm_stage;
5887 unsigned int k;
5888
5889 for (k = 0; k < NumberOfActivePlanes; ++k) {
5890 if (GPUVMEnable == true) {
5891 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5892 if (BytePerPixelDETC[k] == 0) {
5893 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5894 } else {
5895 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2);
5896 }
5897 } else {
5898 DST_Y_PER_PTE_ROW_NOM_L[k] = 0;
5899 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5900 }
5901 if (DCCEnable[k] == true) {
5902 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5903 } else {
5904 DST_Y_PER_META_ROW_NOM_L[k] = 0;
5905 }
5906 }
5907
5908 for (k = 0; k < NumberOfActivePlanes; ++k) {
5909 if (DCCEnable[k] == true) {
5910 meta_chunk_width = MetaChunkSize * 1024 * 256
5911 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5912 min_meta_chunk_width = MinMetaChunkSizeBytes * 256
5913 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5914 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5915 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5916 if (SourceScan[k] == dm_horz) {
5917 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5918 } else {
5919 meta_chunk_threshold = 2 * min_meta_chunk_width
5920 - meta_req_height[k];
5921 }
5922 if (meta_row_remainder <= meta_chunk_threshold) {
5923 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5924 } else {
5925 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5926 }
5927 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k]
5928 / PixelClock[k] / meta_chunks_per_row_ub;
5929 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k]
5930 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5931 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k]
5932 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5933 } else {
5934 TimePerMetaChunkNominal[k] = 0;
5935 TimePerMetaChunkVBlank[k] = 0;
5936 TimePerMetaChunkFlip[k] = 0;
5937 }
5938 }
5939
5940 for (k = 0; k < NumberOfActivePlanes; ++k) {
5941 if (GPUVMEnable == true) {
5942 if (SourceScan[k] == dm_horz) {
5943 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5944 * PixelPTEReqWidthY[k];
5945 } else {
5946 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5947 * PixelPTEReqHeightY[k];
5948 }
5949 dpte_groups_per_row_luma_ub = dml_ceil(
5950 (float) dpte_row_width_luma_ub[k] / dpte_group_width_luma,
5951 1);
5952 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k]
5953 / PixelClock[k] / dpte_groups_per_row_luma_ub;
5954 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k]
5955 * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5956 time_per_pte_group_flip_luma[k] =
5957 DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k]
5958 / PixelClock[k]
5959 / dpte_groups_per_row_luma_ub;
5960 if (BytePerPixelDETC[k] == 0) {
5961 time_per_pte_group_nom_chroma[k] = 0;
5962 time_per_pte_group_vblank_chroma[k] = 0;
5963 time_per_pte_group_flip_chroma[k] = 0;
5964 } else {
5965 if (SourceScan[k] == dm_horz) {
5966 dpte_group_width_chroma = dpte_group_bytes[k]
5967 / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5968 } else {
5969 dpte_group_width_chroma = dpte_group_bytes[k]
5970 / PTERequestSizeC[k]
5971 * PixelPTEReqHeightC[k];
5972 }
5973 dpte_groups_per_row_chroma_ub = dml_ceil(
5974 (float) dpte_row_width_chroma_ub[k]
5975 / dpte_group_width_chroma,
5976 1);
5977 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k]
5978 * HTotal[k] / PixelClock[k]
5979 / dpte_groups_per_row_chroma_ub;
5980 time_per_pte_group_vblank_chroma[k] =
5981 DestinationLinesToRequestRowInVBlank[k] * HTotal[k]
5982 / PixelClock[k]
5983 / dpte_groups_per_row_chroma_ub;
5984 time_per_pte_group_flip_chroma[k] =
5985 DestinationLinesToRequestRowInImmediateFlip[k]
5986 * HTotal[k] / PixelClock[k]
5987 / dpte_groups_per_row_chroma_ub;
5988 }
5989 } else {
5990 time_per_pte_group_nom_luma[k] = 0;
5991 time_per_pte_group_vblank_luma[k] = 0;
5992 time_per_pte_group_flip_luma[k] = 0;
5993 time_per_pte_group_nom_chroma[k] = 0;
5994 time_per_pte_group_vblank_chroma[k] = 0;
5995 time_per_pte_group_flip_chroma[k] = 0;
5996 }
5997 }
5998
5999 for (k = 0; k < NumberOfActivePlanes; ++k) {
6000 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6001 if (DCCEnable[k] == false) {
6002 if (BytePerPixelDETC[k] > 0) {
6003 num_group_per_lower_vm_stage =
6004 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6005 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6006 } else {
6007 num_group_per_lower_vm_stage =
6008 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6009 }
6010 } else {
6011 if (GPUVMMaxPageTableLevels == 1) {
6012 if (BytePerPixelDETC[k] > 0) {
6013 num_group_per_lower_vm_stage =
6014 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6015 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6016 } else {
6017 num_group_per_lower_vm_stage =
6018 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6019 }
6020 } else {
6021 if (BytePerPixelDETC[k] > 0) {
6022 num_group_per_lower_vm_stage =
6023 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6024 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6025 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6026 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6027 } else {
6028 num_group_per_lower_vm_stage =
6029 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6030 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6031 }
6032 }
6033 }
6034
6035 if (DCCEnable[k] == false) {
6036 if (BytePerPixelDETC[k] > 0) {
6037 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6038 / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6039 } else {
6040 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6041 / 64;
6042 }
6043 } else {
6044 if (GPUVMMaxPageTableLevels == 1) {
6045 if (BytePerPixelDETC[k] > 0) {
6046 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6047 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6048 } else {
6049 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6050 }
6051 } else {
6052 if (BytePerPixelDETC[k] > 0) {
6053 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6054 + dpde0_bytes_per_frame_ub_c[k] / 64
6055 + meta_pte_bytes_per_frame_ub_l[k] / 64
6056 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6057 } else {
6058 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6059 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6060 }
6061 }
6062 }
6063
6064 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k]
6065 / PixelClock[k] / num_group_per_lower_vm_stage;
6066 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6067 * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6068 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k]
6069 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6070 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6071 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6072
6073 if (GPUVMMaxPageTableLevels > 2) {
6074 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6075 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6076 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6077 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6078 }
6079
6080 } else {
6081 TimePerVMGroupVBlank[k] = 0;
6082 TimePerVMGroupFlip[k] = 0;
6083 TimePerVMRequestVBlank[k] = 0;
6084 TimePerVMRequestFlip[k] = 0;
6085 }
6086 }
6087 }
6088
CalculateExtraLatency(double UrgentRoundTripAndOutOfOrderLatency,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,int HostVMMaxPageTableLevels,int HostVMCachedPageTableLevels)6089 static double CalculateExtraLatency(
6090 double UrgentRoundTripAndOutOfOrderLatency,
6091 int TotalNumberOfActiveDPP,
6092 int PixelChunkSizeInKByte,
6093 int TotalNumberOfDCCActiveDPP,
6094 int MetaChunkSize,
6095 double ReturnBW,
6096 bool GPUVMEnable,
6097 bool HostVMEnable,
6098 int NumberOfActivePlanes,
6099 int NumberOfDPP[],
6100 int dpte_group_bytes[],
6101 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6102 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6103 int HostVMMaxPageTableLevels,
6104 int HostVMCachedPageTableLevels)
6105 {
6106 double CalculateExtraLatency;
6107 double HostVMInefficiencyFactor;
6108 int HostVMDynamicLevels;
6109
6110 if (GPUVMEnable && HostVMEnable) {
6111 HostVMInefficiencyFactor =
6112 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6113 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6114 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
6115 } else {
6116 HostVMInefficiencyFactor = 1;
6117 HostVMDynamicLevels = 0;
6118 }
6119
6120 CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency
6121 + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte
6122 + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0
6123 / ReturnBW;
6124
6125 if (GPUVMEnable) {
6126 int k;
6127
6128 for (k = 0; k < NumberOfActivePlanes; k++) {
6129 CalculateExtraLatency = CalculateExtraLatency
6130 + NumberOfDPP[k] * dpte_group_bytes[k]
6131 * (1 + 8 * HostVMDynamicLevels)
6132 * HostVMInefficiencyFactor / ReturnBW;
6133 }
6134 }
6135 return CalculateExtraLatency;
6136 }
6137
6138