1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 	unsigned int   NonDSCBPP3;
1599 
1600 	if (Format == dm_420) {
1601 		NonDSCBPP0 = 12;
1602 		NonDSCBPP1 = 15;
1603 		NonDSCBPP2 = 18;
1604 		MinDSCBPP = 6;
1605 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1606 	} else if (Format == dm_444) {
1607 		NonDSCBPP0 = 18;
1608 		NonDSCBPP1 = 24;
1609 		NonDSCBPP2 = 30;
1610 		NonDSCBPP3 = 36;
1611 		MinDSCBPP = 8;
1612 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 	} else {
1614 		if (Output == dm_hdmi) {
1615 			NonDSCBPP0 = 24;
1616 			NonDSCBPP1 = 24;
1617 			NonDSCBPP2 = 24;
1618 		} else {
1619 			NonDSCBPP0 = 16;
1620 			NonDSCBPP1 = 20;
1621 			NonDSCBPP2 = 24;
1622 		}
1623 		if (Format == dm_n422) {
1624 			MinDSCBPP = 7;
1625 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 		} else {
1627 			MinDSCBPP = 8;
1628 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 		}
1630 	}
1631 	if (Output == dm_dp2p0) {
1632 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 	} else if (DSCEnable && Output == dm_dp) {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 	} else {
1636 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 	}
1638 
1639 	if (DSCEnable) {
1640 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 			MaxLinkBPP = 2 * MaxLinkBPP;
1646 	} else {
1647 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 			MaxLinkBPP = 2 * MaxLinkBPP;
1653 	}
1654 
1655 	if (DesiredBPP == 0) {
1656 		if (DSCEnable) {
1657 			if (MaxLinkBPP < MinDSCBPP)
1658 				return BPP_INVALID;
1659 			else if (MaxLinkBPP >= MaxDSCBPP)
1660 				return MaxDSCBPP;
1661 			else
1662 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 		} else {
1664 			if (MaxLinkBPP >= NonDSCBPP3)
1665 				return NonDSCBPP3;
1666 			else if (MaxLinkBPP >= NonDSCBPP2)
1667 				return NonDSCBPP2;
1668 			else if (MaxLinkBPP >= NonDSCBPP1)
1669 				return NonDSCBPP1;
1670 			else if (MaxLinkBPP >= NonDSCBPP0)
1671 				return 16.0;
1672 			else
1673 				return BPP_INVALID;
1674 		}
1675 	} else {
1676 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1677 				DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1678 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1679 			return BPP_INVALID;
1680 		else
1681 			return DesiredBPP;
1682 	}
1683 
1684 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1685 
1686 	return BPP_INVALID;
1687 } // TruncToValidBPP
1688 
1689 double dml32_RequiredDTBCLK(
1690 		bool              DSCEnable,
1691 		double               PixelClock,
1692 		enum output_format_class  OutputFormat,
1693 		double               OutputBpp,
1694 		unsigned int              DSCSlices,
1695 		unsigned int                 HTotal,
1696 		unsigned int                 HActive,
1697 		unsigned int              AudioRate,
1698 		unsigned int              AudioLayout)
1699 {
1700 	double PixelWordRate;
1701 	double HCActive;
1702 	double HCBlank;
1703 	double AverageTribyteRate;
1704 	double HActiveTribyteRate;
1705 
1706 	if (DSCEnable != true)
1707 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1708 
1709 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1710 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1711 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1712 	HCBlank = 64 + 32 *
1713 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1714 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1715 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1716 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1717 }
1718 
1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1720 		enum odm_combine_mode ODMMode,
1721 		unsigned int DSCInputBitPerComponent,
1722 		double OutputBpp,
1723 		unsigned int HActive,
1724 		unsigned int HTotal,
1725 		unsigned int NumberOfDSCSlices,
1726 		enum output_format_class  OutputFormat,
1727 		enum output_encoder_class Output,
1728 		double PixelClock,
1729 		double PixelClockBackEnd)
1730 {
1731 	unsigned int DSCDelayRequirement_val;
1732 
1733 	if (DSCEnabled == true && OutputBpp != 0) {
1734 		if (ODMMode == dm_odm_combine_mode_4to1) {
1735 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1736 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1737 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1738 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1739 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1740 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1741 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1742 		} else {
1743 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1744 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1745 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1746 		}
1747 
1748 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1749 				dml_ceil(DSCDelayRequirement_val / HActive, 1);
1750 
1751 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1752 
1753 	} else {
1754 		DSCDelayRequirement_val = 0;
1755 	}
1756 
1757 #ifdef __DML_VBA_DEBUG__
1758 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1759 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1760 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1761 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1762 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1763 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1764 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1765 #endif
1766 
1767 	return DSCDelayRequirement_val;
1768 }
1769 
1770 void dml32_CalculateSurfaceSizeInMall(
1771 		unsigned int NumberOfActiveSurfaces,
1772 		unsigned int MALLAllocatedForDCN,
1773 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1774 		bool DCCEnable[],
1775 		bool ViewportStationary[],
1776 		unsigned int ViewportXStartY[],
1777 		unsigned int ViewportYStartY[],
1778 		unsigned int ViewportXStartC[],
1779 		unsigned int ViewportYStartC[],
1780 		unsigned int ViewportWidthY[],
1781 		unsigned int ViewportHeightY[],
1782 		unsigned int BytesPerPixelY[],
1783 		unsigned int ViewportWidthC[],
1784 		unsigned int ViewportHeightC[],
1785 		unsigned int BytesPerPixelC[],
1786 		unsigned int SurfaceWidthY[],
1787 		unsigned int SurfaceWidthC[],
1788 		unsigned int SurfaceHeightY[],
1789 		unsigned int SurfaceHeightC[],
1790 		unsigned int Read256BytesBlockWidthY[],
1791 		unsigned int Read256BytesBlockWidthC[],
1792 		unsigned int Read256BytesBlockHeightY[],
1793 		unsigned int Read256BytesBlockHeightC[],
1794 		unsigned int ReadBlockWidthY[],
1795 		unsigned int ReadBlockWidthC[],
1796 		unsigned int ReadBlockHeightY[],
1797 		unsigned int ReadBlockHeightC[],
1798 
1799 		/* Output */
1800 		unsigned int    SurfaceSizeInMALL[],
1801 		bool *ExceededMALLSize)
1802 {
1803 	unsigned int TotalSurfaceSizeInMALL  = 0;
1804 	unsigned int k;
1805 
1806 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1807 		if (ViewportStationary[k]) {
1808 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1809 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1810 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1811 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1812 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1813 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1814 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1815 
1816 			if (ReadBlockWidthC[k] > 0) {
1817 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1819 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1820 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1821 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1822 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1823 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1824 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1825 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1826 							BytesPerPixelC[k];
1827 			}
1828 			if (DCCEnable[k] == true) {
1829 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1830 						dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1831 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1832 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1833 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1834 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1835 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1836 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1837 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1838 							* Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1839 				if (Read256BytesBlockWidthC[k] > 0) {
1840 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841 							dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1842 								Read256BytesBlockWidthC[k]),
1843 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1844 								* Read256BytesBlockWidthC[k] - 1, 8 *
1845 								Read256BytesBlockWidthC[k]) -
1846 								dml_floor(ViewportXStartC[k], 8 *
1847 								Read256BytesBlockWidthC[k])) *
1848 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1849 								Read256BytesBlockHeightC[k]),
1850 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1851 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1852 								Read256BytesBlockHeightC[k]) -
1853 								dml_floor(ViewportYStartC[k], 8 *
1854 								Read256BytesBlockHeightC[k])) *
1855 								BytesPerPixelC[k] / 256;
1856 				}
1857 			}
1858 		} else {
1859 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1860 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1861 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1862 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1863 							BytesPerPixelY[k];
1864 			if (ReadBlockWidthC[k] > 0) {
1865 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1866 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1867 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1868 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1869 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1870 								BytesPerPixelC[k];
1871 			}
1872 			if (DCCEnable[k] == true) {
1873 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1874 						dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1875 								Read256BytesBlockWidthY[k] - 1), 8 *
1876 								Read256BytesBlockWidthY[k]) *
1877 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1878 								Read256BytesBlockHeightY[k] - 1), 8 *
1879 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1880 
1881 				if (Read256BytesBlockWidthC[k] > 0) {
1882 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1883 							dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1884 									Read256BytesBlockWidthC[k] - 1), 8 *
1885 									Read256BytesBlockWidthC[k]) *
1886 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1887 									Read256BytesBlockHeightC[k] - 1), 8 *
1888 									Read256BytesBlockHeightC[k]) *
1889 									BytesPerPixelC[k] / 256;
1890 				}
1891 			}
1892 		}
1893 	}
1894 
1895 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1896 		if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1897 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1898 	}
1899 	*ExceededMALLSize =  (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
1900 } // CalculateSurfaceSizeInMall
1901 
1902 void dml32_CalculateVMRowAndSwath(
1903 		unsigned int NumberOfActiveSurfaces,
1904 		DmlPipe myPipe[],
1905 		unsigned int SurfaceSizeInMALL[],
1906 		unsigned int PTEBufferSizeInRequestsLuma,
1907 		unsigned int PTEBufferSizeInRequestsChroma,
1908 		unsigned int DCCMetaBufferSizeBytes,
1909 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1910 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1911 		unsigned int MALLAllocatedForDCN,
1912 		double SwathWidthY[],
1913 		double SwathWidthC[],
1914 		bool GPUVMEnable,
1915 		bool HostVMEnable,
1916 		unsigned int HostVMMaxNonCachedPageTableLevels,
1917 		unsigned int GPUVMMaxPageTableLevels,
1918 		unsigned int GPUVMMinPageSizeKBytes[],
1919 		unsigned int HostVMMinPageSize,
1920 
1921 		/* Output */
1922 		bool PTEBufferSizeNotExceeded[],
1923 		bool DCCMetaBufferSizeNotExceeded[],
1924 		unsigned int dpte_row_width_luma_ub[],
1925 		unsigned int dpte_row_width_chroma_ub[],
1926 		unsigned int dpte_row_height_luma[],
1927 		unsigned int dpte_row_height_chroma[],
1928 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1929 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1930 		unsigned int meta_req_width[],
1931 		unsigned int meta_req_width_chroma[],
1932 		unsigned int meta_req_height[],
1933 		unsigned int meta_req_height_chroma[],
1934 		unsigned int meta_row_width[],
1935 		unsigned int meta_row_width_chroma[],
1936 		unsigned int meta_row_height[],
1937 		unsigned int meta_row_height_chroma[],
1938 		unsigned int vm_group_bytes[],
1939 		unsigned int dpte_group_bytes[],
1940 		unsigned int PixelPTEReqWidthY[],
1941 		unsigned int PixelPTEReqHeightY[],
1942 		unsigned int PTERequestSizeY[],
1943 		unsigned int PixelPTEReqWidthC[],
1944 		unsigned int PixelPTEReqHeightC[],
1945 		unsigned int PTERequestSizeC[],
1946 		unsigned int dpde0_bytes_per_frame_ub_l[],
1947 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1948 		unsigned int dpde0_bytes_per_frame_ub_c[],
1949 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1950 		double PrefetchSourceLinesY[],
1951 		double PrefetchSourceLinesC[],
1952 		double VInitPreFillY[],
1953 		double VInitPreFillC[],
1954 		unsigned int MaxNumSwathY[],
1955 		unsigned int MaxNumSwathC[],
1956 		double meta_row_bw[],
1957 		double dpte_row_bw[],
1958 		double PixelPTEBytesPerRow[],
1959 		double PDEAndMetaPTEBytesFrame[],
1960 		double MetaRowByte[],
1961 		bool use_one_row_for_frame[],
1962 		bool use_one_row_for_frame_flip[],
1963 		bool UsesMALLForStaticScreen[],
1964 		bool PTE_BUFFER_MODE[],
1965 		unsigned int BIGK_FRAGMENT_SIZE[])
1966 {
1967 	unsigned int k;
1968 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1969 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1970 	unsigned int PDEAndMetaPTEBytesFrameY;
1971 	unsigned int PDEAndMetaPTEBytesFrameC;
1972 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1973 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1974 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1975 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1976 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1977 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1978 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1979 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1980 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1981 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1982 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1983 
1984 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1985 		if (HostVMEnable == true) {
1986 			vm_group_bytes[k] = 512;
1987 			dpte_group_bytes[k] = 512;
1988 		} else if (GPUVMEnable == true) {
1989 			vm_group_bytes[k] = 2048;
1990 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1991 				dpte_group_bytes[k] = 512;
1992 			else
1993 				dpte_group_bytes[k] = 2048;
1994 		} else {
1995 			vm_group_bytes[k] = 0;
1996 			dpte_group_bytes[k] = 0;
1997 		}
1998 
1999 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2000 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2001 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2002 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2003 					!IsVertical(myPipe[k].SourceRotation)) {
2004 				PTEBufferSizeInRequestsForLuma[k] =
2005 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2006 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2007 			} else {
2008 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2009 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2010 			}
2011 
2012 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2013 					myPipe[k].ViewportStationary,
2014 					myPipe[k].DCCEnable,
2015 					myPipe[k].DPPPerSurface,
2016 					myPipe[k].BlockHeight256BytesC,
2017 					myPipe[k].BlockWidth256BytesC,
2018 					myPipe[k].SourcePixelFormat,
2019 					myPipe[k].SurfaceTiling,
2020 					myPipe[k].BytePerPixelC,
2021 					myPipe[k].SourceRotation,
2022 					SwathWidthC[k],
2023 					myPipe[k].ViewportHeightChroma,
2024 					myPipe[k].ViewportXStartC,
2025 					myPipe[k].ViewportYStartC,
2026 					GPUVMEnable,
2027 					HostVMEnable,
2028 					HostVMMaxNonCachedPageTableLevels,
2029 					GPUVMMaxPageTableLevels,
2030 					GPUVMMinPageSizeKBytes[k],
2031 					HostVMMinPageSize,
2032 					PTEBufferSizeInRequestsForChroma[k],
2033 					myPipe[k].PitchC,
2034 					myPipe[k].DCCMetaPitchC,
2035 					myPipe[k].BlockWidthC,
2036 					myPipe[k].BlockHeightC,
2037 
2038 					/* Output */
2039 					&MetaRowByteC[k],
2040 					&PixelPTEBytesPerRowC[k],
2041 					&dpte_row_width_chroma_ub[k],
2042 					&dpte_row_height_chroma[k],
2043 					&dpte_row_height_linear_chroma[k],
2044 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2045 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2046 					&dpte_row_height_chroma_one_row_per_frame[k],
2047 					&meta_req_width_chroma[k],
2048 					&meta_req_height_chroma[k],
2049 					&meta_row_width_chroma[k],
2050 					&meta_row_height_chroma[k],
2051 					&PixelPTEReqWidthC[k],
2052 					&PixelPTEReqHeightC[k],
2053 					&PTERequestSizeC[k],
2054 					&dpde0_bytes_per_frame_ub_c[k],
2055 					&meta_pte_bytes_per_frame_ub_c[k]);
2056 
2057 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2058 					myPipe[k].VRatioChroma,
2059 					myPipe[k].VTapsChroma,
2060 					myPipe[k].InterlaceEnable,
2061 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2062 					myPipe[k].SwathHeightC,
2063 					myPipe[k].SourceRotation,
2064 					myPipe[k].ViewportStationary,
2065 					SwathWidthC[k],
2066 					myPipe[k].ViewportHeightChroma,
2067 					myPipe[k].ViewportXStartC,
2068 					myPipe[k].ViewportYStartC,
2069 
2070 					/* Output */
2071 					&VInitPreFillC[k],
2072 					&MaxNumSwathC[k]);
2073 		} else {
2074 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2075 			PTEBufferSizeInRequestsForChroma[k] = 0;
2076 			PixelPTEBytesPerRowC[k] = 0;
2077 			PDEAndMetaPTEBytesFrameC = 0;
2078 			MetaRowByteC[k] = 0;
2079 			MaxNumSwathC[k] = 0;
2080 			PrefetchSourceLinesC[k] = 0;
2081 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2082 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2083 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2084 		}
2085 
2086 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2087 				myPipe[k].ViewportStationary,
2088 				myPipe[k].DCCEnable,
2089 				myPipe[k].DPPPerSurface,
2090 				myPipe[k].BlockHeight256BytesY,
2091 				myPipe[k].BlockWidth256BytesY,
2092 				myPipe[k].SourcePixelFormat,
2093 				myPipe[k].SurfaceTiling,
2094 				myPipe[k].BytePerPixelY,
2095 				myPipe[k].SourceRotation,
2096 				SwathWidthY[k],
2097 				myPipe[k].ViewportHeight,
2098 				myPipe[k].ViewportXStart,
2099 				myPipe[k].ViewportYStart,
2100 				GPUVMEnable,
2101 				HostVMEnable,
2102 				HostVMMaxNonCachedPageTableLevels,
2103 				GPUVMMaxPageTableLevels,
2104 				GPUVMMinPageSizeKBytes[k],
2105 				HostVMMinPageSize,
2106 				PTEBufferSizeInRequestsForLuma[k],
2107 				myPipe[k].PitchY,
2108 				myPipe[k].DCCMetaPitchY,
2109 				myPipe[k].BlockWidthY,
2110 				myPipe[k].BlockHeightY,
2111 
2112 				/* Output */
2113 				&MetaRowByteY[k],
2114 				&PixelPTEBytesPerRowY[k],
2115 				&dpte_row_width_luma_ub[k],
2116 				&dpte_row_height_luma[k],
2117 				&dpte_row_height_linear_luma[k],
2118 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2119 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2120 				&dpte_row_height_luma_one_row_per_frame[k],
2121 				&meta_req_width[k],
2122 				&meta_req_height[k],
2123 				&meta_row_width[k],
2124 				&meta_row_height[k],
2125 				&PixelPTEReqWidthY[k],
2126 				&PixelPTEReqHeightY[k],
2127 				&PTERequestSizeY[k],
2128 				&dpde0_bytes_per_frame_ub_l[k],
2129 				&meta_pte_bytes_per_frame_ub_l[k]);
2130 
2131 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2132 				myPipe[k].VRatio,
2133 				myPipe[k].VTaps,
2134 				myPipe[k].InterlaceEnable,
2135 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2136 				myPipe[k].SwathHeightY,
2137 				myPipe[k].SourceRotation,
2138 				myPipe[k].ViewportStationary,
2139 				SwathWidthY[k],
2140 				myPipe[k].ViewportHeight,
2141 				myPipe[k].ViewportXStart,
2142 				myPipe[k].ViewportYStart,
2143 
2144 				/* Output */
2145 				&VInitPreFillY[k],
2146 				&MaxNumSwathY[k]);
2147 
2148 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2149 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2150 
2151 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2152 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2153 			PTEBufferSizeNotExceeded[k] = true;
2154 		} else {
2155 			PTEBufferSizeNotExceeded[k] = false;
2156 		}
2157 
2158 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2159 			PTEBufferSizeInRequestsForLuma[k] &&
2160 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2161 	}
2162 
2163 	dml32_CalculateMALLUseForStaticScreen(
2164 			NumberOfActiveSurfaces,
2165 			MALLAllocatedForDCN,
2166 			UseMALLForStaticScreen,   // mode
2167 			SurfaceSizeInMALL,
2168 			one_row_per_frame_fits_in_buffer,
2169 			/* Output */
2170 			UsesMALLForStaticScreen); // boolen
2171 
2172 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2173 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2174 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2175 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2176 				(GPUVMMinPageSizeKBytes[k] > 64);
2177 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2178 	}
2179 
2180 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2181 #ifdef __DML_VBA_DEBUG__
2182 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2183 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2184 #endif
2185 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2186 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2187 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2188 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2189 
2190 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2191 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2192 
2193 		if (use_one_row_for_frame[k]) {
2194 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2195 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2196 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2197 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2198 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2199 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2200 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2201 		}
2202 
2203 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2204 			DCCMetaBufferSizeNotExceeded[k] = true;
2205 		else
2206 			DCCMetaBufferSizeNotExceeded[k] = false;
2207 
2208 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2209 		if (use_one_row_for_frame[k])
2210 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2211 
2212 		dml32_CalculateRowBandwidth(
2213 				GPUVMEnable,
2214 				myPipe[k].SourcePixelFormat,
2215 				myPipe[k].VRatio,
2216 				myPipe[k].VRatioChroma,
2217 				myPipe[k].DCCEnable,
2218 				myPipe[k].HTotal / myPipe[k].PixelClock,
2219 				MetaRowByteY[k], MetaRowByteC[k],
2220 				meta_row_height[k],
2221 				meta_row_height_chroma[k],
2222 				PixelPTEBytesPerRowY[k],
2223 				PixelPTEBytesPerRowC[k],
2224 				dpte_row_height_luma[k],
2225 				dpte_row_height_chroma[k],
2226 
2227 				/* Output */
2228 				&meta_row_bw[k],
2229 				&dpte_row_bw[k]);
2230 #ifdef __DML_VBA_DEBUG__
2231 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2232 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2233 				__func__, k, use_one_row_for_frame_flip[k]);
2234 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2235 				__func__, k, UseMALLForPStateChange[k]);
2236 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2237 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2238 				__func__, k, dpte_row_width_luma_ub[k]);
2239 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2240 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2241 				__func__, k, dpte_row_height_chroma[k]);
2242 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2243 				__func__, k, dpte_row_width_chroma_ub[k]);
2244 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2245 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2246 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2247 				__func__, k, PTEBufferSizeNotExceeded[k]);
2248 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2249 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2250 #endif
2251 	}
2252 } // CalculateVMRowAndSwath
2253 
2254 unsigned int dml32_CalculateVMAndRowBytes(
2255 		bool ViewportStationary,
2256 		bool DCCEnable,
2257 		unsigned int NumberOfDPPs,
2258 		unsigned int BlockHeight256Bytes,
2259 		unsigned int BlockWidth256Bytes,
2260 		enum source_format_class SourcePixelFormat,
2261 		unsigned int SurfaceTiling,
2262 		unsigned int BytePerPixel,
2263 		enum dm_rotation_angle SourceRotation,
2264 		double SwathWidth,
2265 		unsigned int ViewportHeight,
2266 		unsigned int    ViewportXStart,
2267 		unsigned int    ViewportYStart,
2268 		bool GPUVMEnable,
2269 		bool HostVMEnable,
2270 		unsigned int HostVMMaxNonCachedPageTableLevels,
2271 		unsigned int GPUVMMaxPageTableLevels,
2272 		unsigned int GPUVMMinPageSizeKBytes,
2273 		unsigned int HostVMMinPageSize,
2274 		unsigned int PTEBufferSizeInRequests,
2275 		unsigned int Pitch,
2276 		unsigned int DCCMetaPitch,
2277 		unsigned int MacroTileWidth,
2278 		unsigned int MacroTileHeight,
2279 
2280 		/* Output */
2281 		unsigned int *MetaRowByte,
2282 		unsigned int *PixelPTEBytesPerRow,
2283 		unsigned int    *dpte_row_width_ub,
2284 		unsigned int *dpte_row_height,
2285 		unsigned int *dpte_row_height_linear,
2286 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2287 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2288 		unsigned int    *dpte_row_height_one_row_per_frame,
2289 		unsigned int *MetaRequestWidth,
2290 		unsigned int *MetaRequestHeight,
2291 		unsigned int *meta_row_width,
2292 		unsigned int *meta_row_height,
2293 		unsigned int *PixelPTEReqWidth,
2294 		unsigned int *PixelPTEReqHeight,
2295 		unsigned int *PTERequestSize,
2296 		unsigned int    *DPDE0BytesFrame,
2297 		unsigned int    *MetaPTEBytesFrame)
2298 {
2299 	unsigned int MPDEBytesFrame;
2300 	unsigned int DCCMetaSurfaceBytes;
2301 	unsigned int ExtraDPDEBytesFrame;
2302 	unsigned int PDEAndMetaPTEBytesFrame;
2303 	unsigned int HostVMDynamicLevels = 0;
2304 	unsigned int    MacroTileSizeBytes;
2305 	unsigned int    vp_height_meta_ub;
2306 	unsigned int    vp_height_dpte_ub;
2307 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2308 
2309 	if (GPUVMEnable == true && HostVMEnable == true) {
2310 		if (HostVMMinPageSize < 2048)
2311 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2312 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2313 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2314 		else
2315 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2316 	}
2317 
2318 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2319 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2320 	if (SurfaceTiling == dm_sw_linear) {
2321 		*meta_row_height = 32;
2322 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2323 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2324 	} else if (!IsVertical(SourceRotation)) {
2325 		*meta_row_height = *MetaRequestHeight;
2326 		if (ViewportStationary && NumberOfDPPs == 1) {
2327 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2328 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2329 		} else {
2330 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2331 		}
2332 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2333 	} else {
2334 		*meta_row_height = *MetaRequestWidth;
2335 		if (ViewportStationary && NumberOfDPPs == 1) {
2336 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2337 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2338 		} else {
2339 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2340 		}
2341 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2342 	}
2343 
2344 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2345 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2346 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2347 	} else if (!IsVertical(SourceRotation)) {
2348 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2349 	} else {
2350 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2351 	}
2352 
2353 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2354 
2355 	if (GPUVMEnable == true) {
2356 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2357 				(8 * 4.0 * 1024), 1) + 1) * 64;
2358 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2359 	} else {
2360 		*MetaPTEBytesFrame = 0;
2361 		MPDEBytesFrame = 0;
2362 	}
2363 
2364 	if (DCCEnable != true) {
2365 		*MetaPTEBytesFrame = 0;
2366 		MPDEBytesFrame = 0;
2367 		*MetaRowByte = 0;
2368 	}
2369 
2370 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2371 
2372 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2373 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2374 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2375 					MacroTileHeight - 1, MacroTileHeight) -
2376 					dml_floor(ViewportYStart, MacroTileHeight);
2377 		} else if (!IsVertical(SourceRotation)) {
2378 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2379 		} else {
2380 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2381 		}
2382 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2383 				(8 * 2097152), 1) + 1);
2384 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2385 	} else {
2386 		*DPDE0BytesFrame = 0;
2387 		ExtraDPDEBytesFrame = 0;
2388 		vp_height_dpte_ub = 0;
2389 	}
2390 
2391 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2392 
2393 #ifdef __DML_VBA_DEBUG__
2394 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2395 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2396 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2397 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2398 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2399 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2400 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2401 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2402 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2403 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2404 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2405 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2406 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2407 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2408 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2409 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2410 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2411 #endif
2412 
2413 	if (HostVMEnable == true)
2414 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2415 
2416 	if (SurfaceTiling == dm_sw_linear) {
2417 		*PixelPTEReqHeight = 1;
2418 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2419 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2420 		*PTERequestSize = 64;
2421 	} else if (GPUVMMinPageSizeKBytes == 4) {
2422 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2423 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2424 		*PTERequestSize = 128;
2425 	} else {
2426 		*PixelPTEReqHeight = MacroTileHeight;
2427 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2428 		*PTERequestSize = 64;
2429 	}
2430 #ifdef __DML_VBA_DEBUG__
2431 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2432 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2433 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2434 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2435 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2436 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2437 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2438 #endif
2439 
2440 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2441 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2442 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2443 					(double) *PixelPTEReqWidth;
2444 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2445 			*PTERequestSize;
2446 
2447 	if (SurfaceTiling == dm_sw_linear) {
2448 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2449 				*PixelPTEReqWidth / Pitch), 1));
2450 #ifdef __DML_VBA_DEBUG__
2451 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2452 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2453 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2454 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2455 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2456 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2457 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2458 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2459 						*PixelPTEReqWidth / Pitch), 1));
2460 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2461 #endif
2462 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2463 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2464 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2465 
2466 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2467 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2468 				PixelPTEReqWidth_linear / Pitch), 1);
2469 		if (*dpte_row_height_linear > 128)
2470 			*dpte_row_height_linear = 128;
2471 
2472 	} else if (!IsVertical(SourceRotation)) {
2473 		*dpte_row_height = *PixelPTEReqHeight;
2474 
2475 		if (GPUVMMinPageSizeKBytes > 64) {
2476 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2477 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2478 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2479 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2480 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2481 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2482 		} else {
2483 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2484 					*PixelPTEReqWidth;
2485 		}
2486 
2487 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2488 	} else {
2489 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2490 
2491 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2492 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2493 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2494 		} else {
2495 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2496 					* *PixelPTEReqHeight;
2497 		}
2498 
2499 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2500 	}
2501 
2502 	if (GPUVMEnable != true)
2503 		*PixelPTEBytesPerRow = 0;
2504 	if (HostVMEnable == true)
2505 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2506 
2507 #ifdef __DML_VBA_DEBUG__
2508 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2509 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2510 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2511 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2512 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2513 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2514 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2515 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2516 			__func__, *dpte_row_width_ub_one_row_per_frame);
2517 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2518 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2519 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2520 			*MetaPTEBytesFrame);
2521 #endif
2522 
2523 	return PDEAndMetaPTEBytesFrame;
2524 } // CalculateVMAndRowBytes
2525 
2526 double dml32_CalculatePrefetchSourceLines(
2527 		double VRatio,
2528 		unsigned int VTaps,
2529 		bool Interlace,
2530 		bool ProgressiveToInterlaceUnitInOPP,
2531 		unsigned int SwathHeight,
2532 		enum dm_rotation_angle SourceRotation,
2533 		bool ViewportStationary,
2534 		double SwathWidth,
2535 		unsigned int ViewportHeight,
2536 		unsigned int ViewportXStart,
2537 		unsigned int ViewportYStart,
2538 
2539 		/* Output */
2540 		double *VInitPreFill,
2541 		unsigned int *MaxNumSwath)
2542 {
2543 
2544 	unsigned int vp_start_rot;
2545 	unsigned int sw0_tmp;
2546 	unsigned int MaxPartialSwath;
2547 	double numLines;
2548 
2549 #ifdef __DML_VBA_DEBUG__
2550 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2551 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2552 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2553 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2554 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2555 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2556 #endif
2557 	if (ProgressiveToInterlaceUnitInOPP)
2558 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2559 	else
2560 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2561 
2562 	if (ViewportStationary) {
2563 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2564 			vp_start_rot = SwathHeight -
2565 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2566 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2567 			vp_start_rot = ViewportXStart;
2568 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2569 			vp_start_rot = SwathHeight -
2570 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2571 		} else {
2572 			vp_start_rot = ViewportYStart;
2573 		}
2574 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2575 		if (sw0_tmp < *VInitPreFill)
2576 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2577 		else
2578 			*MaxNumSwath = 1;
2579 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2580 	} else {
2581 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2582 		if (*VInitPreFill > 1)
2583 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2584 		else
2585 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2586 	}
2587 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2588 
2589 #ifdef __DML_VBA_DEBUG__
2590 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2591 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2592 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2593 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2594 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2595 #endif
2596 	return numLines;
2597 
2598 } // CalculatePrefetchSourceLines
2599 
2600 void dml32_CalculateMALLUseForStaticScreen(
2601 		unsigned int NumberOfActiveSurfaces,
2602 		unsigned int MALLAllocatedForDCNFinal,
2603 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2604 		unsigned int SurfaceSizeInMALL[],
2605 		bool one_row_per_frame_fits_in_buffer[],
2606 
2607 		/* output */
2608 		bool UsesMALLForStaticScreen[])
2609 {
2610 	unsigned int k;
2611 	unsigned int SurfaceToAddToMALL;
2612 	bool CanAddAnotherSurfaceToMALL;
2613 	unsigned int TotalSurfaceSizeInMALL;
2614 
2615 	TotalSurfaceSizeInMALL = 0;
2616 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2617 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2618 		if (UsesMALLForStaticScreen[k])
2619 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2620 #ifdef __DML_VBA_DEBUG__
2621 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2622 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2623 #endif
2624 	}
2625 
2626 	SurfaceToAddToMALL = 0;
2627 	CanAddAnotherSurfaceToMALL = true;
2628 	while (CanAddAnotherSurfaceToMALL) {
2629 		CanAddAnotherSurfaceToMALL = false;
2630 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2631 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2632 					!UsesMALLForStaticScreen[k] &&
2633 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2634 					one_row_per_frame_fits_in_buffer[k] &&
2635 					(!CanAddAnotherSurfaceToMALL ||
2636 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2637 				CanAddAnotherSurfaceToMALL = true;
2638 				SurfaceToAddToMALL = k;
2639 #ifdef __DML_VBA_DEBUG__
2640 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2641 						__func__, k, UseMALLForStaticScreen[k]);
2642 #endif
2643 			}
2644 		}
2645 		if (CanAddAnotherSurfaceToMALL) {
2646 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2647 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2648 
2649 #ifdef __DML_VBA_DEBUG__
2650 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2651 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2652 #endif
2653 
2654 		}
2655 	}
2656 }
2657 
2658 void dml32_CalculateRowBandwidth(
2659 		bool GPUVMEnable,
2660 		enum source_format_class SourcePixelFormat,
2661 		double VRatio,
2662 		double VRatioChroma,
2663 		bool DCCEnable,
2664 		double LineTime,
2665 		unsigned int MetaRowByteLuma,
2666 		unsigned int MetaRowByteChroma,
2667 		unsigned int meta_row_height_luma,
2668 		unsigned int meta_row_height_chroma,
2669 		unsigned int PixelPTEBytesPerRowLuma,
2670 		unsigned int PixelPTEBytesPerRowChroma,
2671 		unsigned int dpte_row_height_luma,
2672 		unsigned int dpte_row_height_chroma,
2673 		/* Output */
2674 		double *meta_row_bw,
2675 		double *dpte_row_bw)
2676 {
2677 	if (DCCEnable != true) {
2678 		*meta_row_bw = 0;
2679 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2680 			SourcePixelFormat == dm_rgbe_alpha) {
2681 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2682 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2683 	} else {
2684 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2685 	}
2686 
2687 	if (GPUVMEnable != true) {
2688 		*dpte_row_bw = 0;
2689 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2690 			SourcePixelFormat == dm_rgbe_alpha) {
2691 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2692 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2693 	} else {
2694 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2695 	}
2696 }
2697 
2698 double dml32_CalculateUrgentLatency(
2699 		double UrgentLatencyPixelDataOnly,
2700 		double UrgentLatencyPixelMixedWithVMData,
2701 		double UrgentLatencyVMDataOnly,
2702 		bool   DoUrgentLatencyAdjustment,
2703 		double UrgentLatencyAdjustmentFabricClockComponent,
2704 		double UrgentLatencyAdjustmentFabricClockReference,
2705 		double FabricClock)
2706 {
2707 	double   ret;
2708 
2709 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2710 	if (DoUrgentLatencyAdjustment == true) {
2711 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2712 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2713 	}
2714 	return ret;
2715 }
2716 
2717 void dml32_CalculateUrgentBurstFactor(
2718 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2719 		unsigned int    swath_width_luma_ub,
2720 		unsigned int    swath_width_chroma_ub,
2721 		unsigned int SwathHeightY,
2722 		unsigned int SwathHeightC,
2723 		double  LineTime,
2724 		double  UrgentLatency,
2725 		double  CursorBufferSize,
2726 		unsigned int CursorWidth,
2727 		unsigned int CursorBPP,
2728 		double  VRatio,
2729 		double  VRatioC,
2730 		double  BytePerPixelInDETY,
2731 		double  BytePerPixelInDETC,
2732 		unsigned int    DETBufferSizeY,
2733 		unsigned int    DETBufferSizeC,
2734 		/* Output */
2735 		double *UrgentBurstFactorCursor,
2736 		double *UrgentBurstFactorLuma,
2737 		double *UrgentBurstFactorChroma,
2738 		bool   *NotEnoughUrgentLatencyHiding)
2739 {
2740 	double       LinesInDETLuma;
2741 	double       LinesInDETChroma;
2742 	unsigned int LinesInCursorBuffer;
2743 	double       CursorBufferSizeInTime;
2744 	double       DETBufferSizeInTimeLuma;
2745 	double       DETBufferSizeInTimeChroma;
2746 
2747 	*NotEnoughUrgentLatencyHiding = 0;
2748 
2749 	if (CursorWidth > 0) {
2750 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2751 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2752 		if (VRatio > 0) {
2753 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2754 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2755 				*NotEnoughUrgentLatencyHiding = 1;
2756 				*UrgentBurstFactorCursor = 0;
2757 			} else {
2758 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2759 						(CursorBufferSizeInTime - UrgentLatency);
2760 			}
2761 		} else {
2762 			*UrgentBurstFactorCursor = 1;
2763 		}
2764 	}
2765 
2766 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2767 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2768 
2769 	if (VRatio > 0) {
2770 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2771 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2772 			*NotEnoughUrgentLatencyHiding = 1;
2773 			*UrgentBurstFactorLuma = 0;
2774 		} else {
2775 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2776 		}
2777 	} else {
2778 		*UrgentBurstFactorLuma = 1;
2779 	}
2780 
2781 	if (BytePerPixelInDETC > 0) {
2782 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2783 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2784 					/ swath_width_chroma_ub;
2785 
2786 		if (VRatio > 0) {
2787 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2788 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2789 				*NotEnoughUrgentLatencyHiding = 1;
2790 				*UrgentBurstFactorChroma = 0;
2791 			} else {
2792 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2793 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2794 			}
2795 		} else {
2796 			*UrgentBurstFactorChroma = 1;
2797 		}
2798 	}
2799 } // CalculateUrgentBurstFactor
2800 
2801 void dml32_CalculateDCFCLKDeepSleep(
2802 		unsigned int NumberOfActiveSurfaces,
2803 		unsigned int BytePerPixelY[],
2804 		unsigned int BytePerPixelC[],
2805 		double VRatio[],
2806 		double VRatioChroma[],
2807 		double SwathWidthY[],
2808 		double SwathWidthC[],
2809 		unsigned int DPPPerSurface[],
2810 		double HRatio[],
2811 		double HRatioChroma[],
2812 		double PixelClock[],
2813 		double PSCL_THROUGHPUT[],
2814 		double PSCL_THROUGHPUT_CHROMA[],
2815 		double Dppclk[],
2816 		double ReadBandwidthLuma[],
2817 		double ReadBandwidthChroma[],
2818 		unsigned int ReturnBusWidth,
2819 
2820 		/* Output */
2821 		double *DCFClkDeepSleep)
2822 {
2823 	unsigned int k;
2824 	double   DisplayPipeLineDeliveryTimeLuma;
2825 	double   DisplayPipeLineDeliveryTimeChroma;
2826 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2827 	double ReadBandwidth = 0.0;
2828 
2829 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2830 
2831 		if (VRatio[k] <= 1) {
2832 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2833 					/ PixelClock[k];
2834 		} else {
2835 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2836 		}
2837 		if (BytePerPixelC[k] == 0) {
2838 			DisplayPipeLineDeliveryTimeChroma = 0;
2839 		} else {
2840 			if (VRatioChroma[k] <= 1) {
2841 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2842 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2843 			} else {
2844 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2845 						/ Dppclk[k];
2846 			}
2847 		}
2848 
2849 		if (BytePerPixelC[k] > 0) {
2850 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2851 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2852 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2853 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2854 		} else {
2855 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2856 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2857 		}
2858 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2859 
2860 #ifdef __DML_VBA_DEBUG__
2861 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2862 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2863 #endif
2864 	}
2865 
2866 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2867 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2868 
2869 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2870 
2871 #ifdef __DML_VBA_DEBUG__
2872 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2873 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2874 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2875 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2876 #endif
2877 
2878 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2879 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2880 #ifdef __DML_VBA_DEBUG__
2881 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2882 #endif
2883 } // CalculateDCFCLKDeepSleep
2884 
2885 double dml32_CalculateWriteBackDelay(
2886 		enum source_format_class WritebackPixelFormat,
2887 		double WritebackHRatio,
2888 		double WritebackVRatio,
2889 		unsigned int WritebackVTaps,
2890 		unsigned int         WritebackDestinationWidth,
2891 		unsigned int         WritebackDestinationHeight,
2892 		unsigned int         WritebackSourceHeight,
2893 		unsigned int HTotal)
2894 {
2895 	double CalculateWriteBackDelay;
2896 	double Line_length;
2897 	double Output_lines_last_notclamped;
2898 	double WritebackVInit;
2899 
2900 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2901 	Line_length = dml_max((double) WritebackDestinationWidth,
2902 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2903 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2904 			dml_ceil(((double)WritebackSourceHeight -
2905 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2906 	if (Output_lines_last_notclamped < 0) {
2907 		CalculateWriteBackDelay = 0;
2908 	} else {
2909 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2910 				(HTotal - WritebackDestinationWidth) + 80;
2911 	}
2912 	return CalculateWriteBackDelay;
2913 }
2914 
2915 void dml32_UseMinimumDCFCLK(
2916 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2917 		bool DRRDisplay[],
2918 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2919 		unsigned int MaxInterDCNTileRepeaters,
2920 		unsigned int MaxPrefetchMode,
2921 		double DRAMClockChangeLatencyFinal,
2922 		double FCLKChangeLatency,
2923 		double SREnterPlusExitTime,
2924 		unsigned int ReturnBusWidth,
2925 		unsigned int RoundTripPingLatencyCycles,
2926 		unsigned int ReorderingBytes,
2927 		unsigned int PixelChunkSizeInKByte,
2928 		unsigned int MetaChunkSize,
2929 		bool GPUVMEnable,
2930 		unsigned int GPUVMMaxPageTableLevels,
2931 		bool HostVMEnable,
2932 		unsigned int NumberOfActiveSurfaces,
2933 		double HostVMMinPageSize,
2934 		unsigned int HostVMMaxNonCachedPageTableLevels,
2935 		bool DynamicMetadataVMEnabled,
2936 		bool ImmediateFlipRequirement,
2937 		bool ProgressiveToInterlaceUnitInOPP,
2938 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2939 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2940 		unsigned int VTotal[],
2941 		unsigned int VActive[],
2942 		unsigned int DynamicMetadataTransmittedBytes[],
2943 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2944 		bool Interlace[],
2945 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2946 		double RequiredDISPCLK[][2],
2947 		double UrgLatency[],
2948 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2949 		double ProjectedDCFClkDeepSleep[][2],
2950 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2951 		unsigned int TotalNumberOfActiveDPP[][2],
2952 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2953 		unsigned int dpte_group_bytes[],
2954 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2955 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2956 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2957 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2958 		unsigned int BytePerPixelY[],
2959 		unsigned int BytePerPixelC[],
2960 		unsigned int HTotal[],
2961 		double PixelClock[],
2962 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2963 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2964 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2965 		bool DynamicMetadataEnable[],
2966 		double ReadBandwidthLuma[],
2967 		double ReadBandwidthChroma[],
2968 		double DCFCLKPerState[],
2969 		/* Output */
2970 		double DCFCLKState[][2])
2971 {
2972 	unsigned int i, j, k;
2973 	unsigned int     dummy1;
2974 	double dummy2, dummy3;
2975 	double   NormalEfficiency;
2976 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2977 
2978 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2979 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2980 		for  (j = 0; j <= 1; ++j) {
2981 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2982 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2983 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2984 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2985 			double MinimumTWait = 0.0;
2986 			double DPTEBandwidth;
2987 			double DCFCLKRequiredForAverageBandwidth;
2988 			unsigned int ExtraLatencyBytes;
2989 			double ExtraLatencyCycles;
2990 			double DCFCLKRequiredForPeakBandwidth;
2991 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2992 			double MinimumTvmPlus2Tr0;
2993 
2994 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2995 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2996 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2997 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2998 								/ (15.75 * HTotal[k] / PixelClock[k]);
2999 			}
3000 
3001 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3002 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3003 
3004 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3005 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3006 
3007 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3008 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3009 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3010 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3011 					HostVMMaxNonCachedPageTableLevels);
3012 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3013 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3014 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3015 				double DCFCLKCyclesRequiredInPrefetch;
3016 				double PrefetchTime;
3017 
3018 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3019 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3020 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3021 								* BytePerPixelC[k]) / NormalEfficiency
3022 						/ ReturnBusWidth;
3023 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3024 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3025 								/ NormalEfficiency / ReturnBusWidth
3026 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3027 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3028 								/ ReturnBusWidth
3029 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3030 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3031 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3032 						* HTotal[k] / PixelClock[k];
3033 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3034 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3035 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3036 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3037 
3038 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3039 						UseMALLForPStateChange[k],
3040 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3041 						DRRDisplay[k],
3042 						DRAMClockChangeLatencyFinal,
3043 						FCLKChangeLatency,
3044 						UrgLatency[i],
3045 						SREnterPlusExitTime);
3046 
3047 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3048 						MinimumTWait - UrgLatency[i] *
3049 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3050 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3051 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3052 						DynamicMetadataVMExtraLatency[k];
3053 
3054 				if (PrefetchTime > 0) {
3055 					double ExpectedVRatioPrefetch;
3056 
3057 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3058 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3059 							DCFCLKCyclesRequiredInPrefetch);
3060 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3061 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3062 							PrefetchPixelLinesTime[k] *
3063 							dml_max(1.0, ExpectedVRatioPrefetch) *
3064 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3065 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3066 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3067 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3068 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3069 								NormalEfficiency / ReturnBusWidth;
3070 					}
3071 				} else {
3072 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3073 				}
3074 				if (DynamicMetadataEnable[k] == true) {
3075 					double TSetupPipe;
3076 					double TdmbfPipe;
3077 					double TdmsksPipe;
3078 					double TdmecPipe;
3079 					double AllowedTimeForUrgentExtraLatency;
3080 
3081 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3082 							MaxInterDCNTileRepeaters,
3083 							RequiredDPPCLKPerSurface[i][j][k],
3084 							RequiredDISPCLK[i][j],
3085 							ProjectedDCFClkDeepSleep[i][j],
3086 							PixelClock[k],
3087 							HTotal[k],
3088 							VTotal[k] - VActive[k],
3089 							DynamicMetadataTransmittedBytes[k],
3090 							DynamicMetadataLinesBeforeActiveRequired[k],
3091 							Interlace[k],
3092 							ProgressiveToInterlaceUnitInOPP,
3093 
3094 							/* output */
3095 							&TSetupPipe,
3096 							&TdmbfPipe,
3097 							&TdmecPipe,
3098 							&TdmsksPipe,
3099 							&dummy1,
3100 							&dummy2,
3101 							&dummy3);
3102 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3103 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3104 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3105 					if (AllowedTimeForUrgentExtraLatency > 0)
3106 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3107 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3108 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3109 					else
3110 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3111 				}
3112 			}
3113 			DCFCLKRequiredForPeakBandwidth = 0;
3114 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3115 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3116 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3117 			}
3118 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3119 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3120 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3121 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3122 				double MaximumTvmPlus2Tr0PlusTsw;
3123 
3124 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3125 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3126 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3127 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3128 				} else {
3129 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3130 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3131 								MinimumTvmPlus2Tr0 -
3132 								PrefetchPixelLinesTime[k] / 4),
3133 							(2 * ExtraLatencyCycles +
3134 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3135 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3136 				}
3137 			}
3138 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3139 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3140 		}
3141 	}
3142 }
3143 
3144 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3145 		unsigned int TotalNumberOfActiveDPP,
3146 		unsigned int PixelChunkSizeInKByte,
3147 		unsigned int TotalNumberOfDCCActiveDPP,
3148 		unsigned int MetaChunkSize,
3149 		bool GPUVMEnable,
3150 		bool HostVMEnable,
3151 		unsigned int NumberOfActiveSurfaces,
3152 		unsigned int NumberOfDPP[],
3153 		unsigned int dpte_group_bytes[],
3154 		double HostVMInefficiencyFactor,
3155 		double HostVMMinPageSize,
3156 		unsigned int HostVMMaxNonCachedPageTableLevels)
3157 {
3158 	unsigned int k;
3159 	double   ret;
3160 	unsigned int  HostVMDynamicLevels;
3161 
3162 	if (GPUVMEnable == true && HostVMEnable == true) {
3163 		if (HostVMMinPageSize < 2048)
3164 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3165 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3166 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3167 		else
3168 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3169 	} else {
3170 		HostVMDynamicLevels = 0;
3171 	}
3172 
3173 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3174 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3175 
3176 	if (GPUVMEnable == true) {
3177 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3178 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3179 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3180 		}
3181 	}
3182 	return ret;
3183 }
3184 
3185 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3186 		unsigned int MaxInterDCNTileRepeaters,
3187 		double Dppclk,
3188 		double Dispclk,
3189 		double DCFClkDeepSleep,
3190 		double PixelClock,
3191 		unsigned int HTotal,
3192 		unsigned int VBlank,
3193 		unsigned int DynamicMetadataTransmittedBytes,
3194 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3195 		unsigned int InterlaceEnable,
3196 		bool ProgressiveToInterlaceUnitInOPP,
3197 
3198 		/* output */
3199 		double *TSetup,
3200 		double *Tdmbf,
3201 		double *Tdmec,
3202 		double *Tdmsks,
3203 		unsigned int *VUpdateOffsetPix,
3204 		double *VUpdateWidthPix,
3205 		double *VReadyOffsetPix)
3206 {
3207 	double TotalRepeaterDelayTime;
3208 
3209 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3210 	*VUpdateWidthPix  =
3211 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3212 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3213 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3214 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3215 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3216 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3217 	*Tdmec = HTotal / PixelClock;
3218 
3219 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3220 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3221 	else
3222 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3223 
3224 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3225 		*Tdmsks = *Tdmsks / 2;
3226 #ifdef __DML_VBA_DEBUG__
3227 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3228 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3229 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3230 
3231 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3232 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3233 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3234 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3235 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3236 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3237 #endif
3238 }
3239 
3240 double dml32_CalculateTWait(
3241 		unsigned int PrefetchMode,
3242 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3243 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3244 		bool DRRDisplay,
3245 		double DRAMClockChangeLatency,
3246 		double FCLKChangeLatency,
3247 		double UrgentLatency,
3248 		double SREnterPlusExitTime)
3249 {
3250 	double TWait = 0.0;
3251 
3252 	if (PrefetchMode == 0 &&
3253 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3254 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3255 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3256 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3257 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3258 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3259 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3260 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3261 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3262 	} else {
3263 		TWait = UrgentLatency;
3264 	}
3265 
3266 #ifdef __DML_VBA_DEBUG__
3267 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3268 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3269 #endif
3270 	return TWait;
3271 } // CalculateTWait
3272 
3273 // Function: get_return_bw_mbps
3274 // Megabyte per second
3275 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3276 		const int VoltageLevel,
3277 		const bool HostVMEnable,
3278 		const double DCFCLK,
3279 		const double FabricClock,
3280 		const double DRAMSpeed)
3281 {
3282 	double ReturnBW = 0.;
3283 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3284 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3285 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3286 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3287 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3288 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3289 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3290 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3291 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3292 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3293 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3294 
3295 	if (HostVMEnable != true)
3296 		ReturnBW = PixelDataOnlyReturnBW;
3297 	else
3298 		ReturnBW = PixelMixedWithVMDataReturnBW;
3299 
3300 #ifdef __DML_VBA_DEBUG__
3301 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3302 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3303 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3304 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3305 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3306 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3307 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3308 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3309 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3310 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3311 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3312 #endif
3313 	return ReturnBW;
3314 }
3315 
3316 // Function: get_return_bw_mbps_vm_only
3317 // Megabyte per second
3318 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3319 		const int VoltageLevel,
3320 		const double DCFCLK,
3321 		const double FabricClock,
3322 		const double DRAMSpeed)
3323 {
3324 	double VMDataOnlyReturnBW = dml_min3(
3325 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3326 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3327 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3328 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3329 					* (VoltageLevel < 2 ?
3330 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3331 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3332 #ifdef __DML_VBA_DEBUG__
3333 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3334 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3335 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3336 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3337 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3338 #endif
3339 	return VMDataOnlyReturnBW;
3340 }
3341 
3342 double dml32_CalculateExtraLatency(
3343 		unsigned int RoundTripPingLatencyCycles,
3344 		unsigned int ReorderingBytes,
3345 		double DCFCLK,
3346 		unsigned int TotalNumberOfActiveDPP,
3347 		unsigned int PixelChunkSizeInKByte,
3348 		unsigned int TotalNumberOfDCCActiveDPP,
3349 		unsigned int MetaChunkSize,
3350 		double ReturnBW,
3351 		bool GPUVMEnable,
3352 		bool HostVMEnable,
3353 		unsigned int NumberOfActiveSurfaces,
3354 		unsigned int NumberOfDPP[],
3355 		unsigned int dpte_group_bytes[],
3356 		double HostVMInefficiencyFactor,
3357 		double HostVMMinPageSize,
3358 		unsigned int HostVMMaxNonCachedPageTableLevels)
3359 {
3360 	double ExtraLatencyBytes;
3361 	double ExtraLatency;
3362 
3363 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3364 			ReorderingBytes,
3365 			TotalNumberOfActiveDPP,
3366 			PixelChunkSizeInKByte,
3367 			TotalNumberOfDCCActiveDPP,
3368 			MetaChunkSize,
3369 			GPUVMEnable,
3370 			HostVMEnable,
3371 			NumberOfActiveSurfaces,
3372 			NumberOfDPP,
3373 			dpte_group_bytes,
3374 			HostVMInefficiencyFactor,
3375 			HostVMMinPageSize,
3376 			HostVMMaxNonCachedPageTableLevels);
3377 
3378 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3379 
3380 #ifdef __DML_VBA_DEBUG__
3381 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3382 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3383 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3384 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3385 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3386 #endif
3387 
3388 	return ExtraLatency;
3389 } // CalculateExtraLatency
3390 
3391 bool dml32_CalculatePrefetchSchedule(
3392 		struct vba_vars_st *v,
3393 		unsigned int k,
3394 		double HostVMInefficiencyFactor,
3395 		DmlPipe *myPipe,
3396 		unsigned int DSCDelay,
3397 		unsigned int DPP_RECOUT_WIDTH,
3398 		unsigned int VStartup,
3399 		unsigned int MaxVStartup,
3400 		double UrgentLatency,
3401 		double UrgentExtraLatency,
3402 		double TCalc,
3403 		unsigned int PDEAndMetaPTEBytesFrame,
3404 		unsigned int MetaRowByte,
3405 		unsigned int PixelPTEBytesPerRow,
3406 		double PrefetchSourceLinesY,
3407 		unsigned int SwathWidthY,
3408 		unsigned int VInitPreFillY,
3409 		unsigned int MaxNumSwathY,
3410 		double PrefetchSourceLinesC,
3411 		unsigned int SwathWidthC,
3412 		unsigned int VInitPreFillC,
3413 		unsigned int MaxNumSwathC,
3414 		unsigned int swath_width_luma_ub,
3415 		unsigned int swath_width_chroma_ub,
3416 		unsigned int SwathHeightY,
3417 		unsigned int SwathHeightC,
3418 		double TWait,
3419 		/* Output */
3420 		double   *DSTXAfterScaler,
3421 		double   *DSTYAfterScaler,
3422 		double *DestinationLinesForPrefetch,
3423 		double *PrefetchBandwidth,
3424 		double *DestinationLinesToRequestVMInVBlank,
3425 		double *DestinationLinesToRequestRowInVBlank,
3426 		double *VRatioPrefetchY,
3427 		double *VRatioPrefetchC,
3428 		double *RequiredPrefetchPixDataBWLuma,
3429 		double *RequiredPrefetchPixDataBWChroma,
3430 		bool   *NotEnoughTimeForDynamicMetadata,
3431 		double *Tno_bw,
3432 		double *prefetch_vmrow_bw,
3433 		double *Tdmdl_vm,
3434 		double *Tdmdl,
3435 		double *TSetup,
3436 		unsigned int   *VUpdateOffsetPix,
3437 		double   *VUpdateWidthPix,
3438 		double   *VReadyOffsetPix)
3439 {
3440 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3441 	bool MyError = false;
3442 	unsigned int DPPCycles, DISPCLKCycles;
3443 	double DSTTotalPixelsAfterScaler;
3444 	double LineTime;
3445 	double dst_y_prefetch_equ;
3446 	double prefetch_bw_oto;
3447 	double Tvm_oto;
3448 	double Tr0_oto;
3449 	double Tvm_oto_lines;
3450 	double Tr0_oto_lines;
3451 	double dst_y_prefetch_oto;
3452 	double TimeForFetchingMetaPTE = 0;
3453 	double TimeForFetchingRowInVBlank = 0;
3454 	double LinesToRequestPrefetchPixelData = 0;
3455 	unsigned int HostVMDynamicLevelsTrips;
3456 	double  trip_to_mem;
3457 	double  Tvm_trips;
3458 	double  Tr0_trips;
3459 	double  Tvm_trips_rounded;
3460 	double  Tr0_trips_rounded;
3461 	double  Lsw_oto;
3462 	double  Tpre_rounded;
3463 	double  prefetch_bw_equ;
3464 	double  Tvm_equ;
3465 	double  Tr0_equ;
3466 	double  Tdmbf;
3467 	double  Tdmec;
3468 	double  Tdmsks;
3469 	double  prefetch_sw_bytes;
3470 	double  bytes_pp;
3471 	double  dep_bytes;
3472 	unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3473 	double  min_Lsw;
3474 	double  Tsw_est1 = 0;
3475 	double  Tsw_est3 = 0;
3476 
3477 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3478 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3479 	else
3480 		HostVMDynamicLevelsTrips = 0;
3481 #ifdef __DML_VBA_DEBUG__
3482 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3483 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3484 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3485 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3486 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3487 #endif
3488 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3489 			v->MaxInterDCNTileRepeaters,
3490 			myPipe->Dppclk,
3491 			myPipe->Dispclk,
3492 			myPipe->DCFClkDeepSleep,
3493 			myPipe->PixelClock,
3494 			myPipe->HTotal,
3495 			myPipe->VBlank,
3496 			v->DynamicMetadataTransmittedBytes[k],
3497 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3498 			myPipe->InterlaceEnable,
3499 			myPipe->ProgressiveToInterlaceUnitInOPP,
3500 			TSetup,
3501 
3502 			/* output */
3503 			&Tdmbf,
3504 			&Tdmec,
3505 			&Tdmsks,
3506 			VUpdateOffsetPix,
3507 			VUpdateWidthPix,
3508 			VReadyOffsetPix);
3509 
3510 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3511 	trip_to_mem = UrgentLatency;
3512 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3513 
3514 	if (v->DynamicMetadataVMEnabled == true)
3515 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3516 	else
3517 		*Tdmdl = TWait + UrgentExtraLatency;
3518 
3519 #ifdef __DML_VBA_ALLOW_DELTA__
3520 	if (v->DynamicMetadataEnable[k] == false)
3521 		*Tdmdl = 0.0;
3522 #endif
3523 
3524 	if (v->DynamicMetadataEnable[k] == true) {
3525 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3526 			*NotEnoughTimeForDynamicMetadata = true;
3527 #ifdef __DML_VBA_DEBUG__
3528 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3529 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3530 					__func__, Tdmbf);
3531 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3532 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3533 					__func__, Tdmsks);
3534 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3535 					__func__, *Tdmdl);
3536 #endif
3537 		} else {
3538 			*NotEnoughTimeForDynamicMetadata = false;
3539 		}
3540 	} else {
3541 		*NotEnoughTimeForDynamicMetadata = false;
3542 	}
3543 
3544 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3545 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3546 
3547 	if (myPipe->ScalerEnabled)
3548 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3549 	else
3550 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3551 
3552 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3553 
3554 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3555 
3556 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3557 		return true;
3558 
3559 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3560 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3561 
3562 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3563 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3564 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3565 					myPipe->HActive / 2 : 0)
3566 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3567 
3568 #ifdef __DML_VBA_DEBUG__
3569 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3570 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3571 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3572 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3573 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3574 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3575 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3576 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3577 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3578 #endif
3579 
3580 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3581 		*DSTYAfterScaler = 1;
3582 	else
3583 		*DSTYAfterScaler = 0;
3584 
3585 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3586 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3587 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3588 #ifdef __DML_VBA_DEBUG__
3589 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3590 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3591 #endif
3592 
3593 	MyError = false;
3594 
3595 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3596 
3597 	if (v->GPUVMEnable == true) {
3598 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3599 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3600 		if (v->GPUVMMaxPageTableLevels >= 3) {
3601 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3602 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3603 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3604 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3605 					4.0 * LineTime; // VBA_ERROR
3606 			*Tno_bw = UrgentExtraLatency;
3607 		} else {
3608 			*Tno_bw = 0;
3609 		}
3610 	} else if (myPipe->DCCEnable == true) {
3611 		Tvm_trips_rounded = LineTime / 4.0;
3612 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3613 		*Tno_bw = 0;
3614 	} else {
3615 		Tvm_trips_rounded = LineTime / 4.0;
3616 		Tr0_trips_rounded = LineTime / 2.0;
3617 		*Tno_bw = 0;
3618 	}
3619 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3620 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3621 
3622 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3623 			|| myPipe->SourcePixelFormat == dm_420_12) {
3624 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3625 	} else {
3626 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3627 	}
3628 
3629 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3630 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3631 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3632 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3633 
3634 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3635 	min_Lsw = dml_max(min_Lsw, 1.0);
3636 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3637 
3638 	if (v->GPUVMEnable == true) {
3639 		Tvm_oto = dml_max3(
3640 				Tvm_trips,
3641 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3642 				LineTime / 4.0);
3643 	} else
3644 		Tvm_oto = LineTime / 4.0;
3645 
3646 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3647 		Tr0_oto = dml_max4(
3648 				Tr0_trips,
3649 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3650 				(LineTime - Tvm_oto)/2.0,
3651 				LineTime / 4.0);
3652 #ifdef __DML_VBA_DEBUG__
3653 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3654 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3655 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3656 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3657 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3658 #endif
3659 	} else
3660 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3661 
3662 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3663 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3664 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3665 
3666 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3667 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3668 
3669 #ifdef __DML_VBA_DEBUG__
3670 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3671 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3672 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3673 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3674 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3675 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3676 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3677 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3678 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3679 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3680 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3681 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3682 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3683 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3684 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3685 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3686 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3687 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3688 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3689 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3690 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3691 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3692 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3693 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3694 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3695 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3696 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3697 #endif
3698 
3699 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3700 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3701 #ifdef __DML_VBA_DEBUG__
3702 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3703 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3704 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3705 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3706 			__func__, VStartup * LineTime);
3707 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3708 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3709 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3710 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3711 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3712 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3713 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3714 			__func__, *DSTYAfterScaler);
3715 #endif
3716 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3717 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3718 
3719 	if (prefetch_sw_bytes < dep_bytes)
3720 		prefetch_sw_bytes = 2 * dep_bytes;
3721 
3722 	*PrefetchBandwidth = 0;
3723 	*DestinationLinesToRequestVMInVBlank = 0;
3724 	*DestinationLinesToRequestRowInVBlank = 0;
3725 	*VRatioPrefetchY = 0;
3726 	*VRatioPrefetchC = 0;
3727 	*RequiredPrefetchPixDataBWLuma = 0;
3728 	if (dst_y_prefetch_equ > 1) {
3729 		double PrefetchBandwidth1;
3730 		double PrefetchBandwidth2;
3731 		double PrefetchBandwidth3;
3732 		double PrefetchBandwidth4;
3733 
3734 		if (Tpre_rounded - *Tno_bw > 0) {
3735 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3736 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3737 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3738 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3739 		} else
3740 			PrefetchBandwidth1 = 0;
3741 
3742 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3743 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3744 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3745 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3746 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3747 		}
3748 
3749 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3750 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3751 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3752 		else
3753 			PrefetchBandwidth2 = 0;
3754 
3755 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3756 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3757 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3758 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3759 		} else
3760 			PrefetchBandwidth3 = 0;
3761 
3762 
3763 		if (VStartup == MaxVStartup &&
3764 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3765 				LineTime - Tvm_trips_rounded > 0) {
3766 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3767 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3768 		}
3769 
3770 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3771 			PrefetchBandwidth4 = prefetch_sw_bytes /
3772 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3773 		} else {
3774 			PrefetchBandwidth4 = 0;
3775 		}
3776 
3777 #ifdef __DML_VBA_DEBUG__
3778 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3779 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3780 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3781 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3782 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3783 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3784 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3785 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3786 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3787 #endif
3788 		{
3789 			bool Case1OK;
3790 			bool Case2OK;
3791 			bool Case3OK;
3792 
3793 			if (PrefetchBandwidth1 > 0) {
3794 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3795 						>= Tvm_trips_rounded
3796 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3797 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3798 					Case1OK = true;
3799 				} else {
3800 					Case1OK = false;
3801 				}
3802 			} else {
3803 				Case1OK = false;
3804 			}
3805 
3806 			if (PrefetchBandwidth2 > 0) {
3807 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3808 						>= Tvm_trips_rounded
3809 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3810 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3811 					Case2OK = true;
3812 				} else {
3813 					Case2OK = false;
3814 				}
3815 			} else {
3816 				Case2OK = false;
3817 			}
3818 
3819 			if (PrefetchBandwidth3 > 0) {
3820 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3821 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3822 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3823 								Tr0_trips_rounded) {
3824 					Case3OK = true;
3825 				} else {
3826 					Case3OK = false;
3827 				}
3828 			} else {
3829 				Case3OK = false;
3830 			}
3831 
3832 			if (Case1OK)
3833 				prefetch_bw_equ = PrefetchBandwidth1;
3834 			else if (Case2OK)
3835 				prefetch_bw_equ = PrefetchBandwidth2;
3836 			else if (Case3OK)
3837 				prefetch_bw_equ = PrefetchBandwidth3;
3838 			else
3839 				prefetch_bw_equ = PrefetchBandwidth4;
3840 
3841 #ifdef __DML_VBA_DEBUG__
3842 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3843 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3844 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3845 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3846 #endif
3847 
3848 			if (prefetch_bw_equ > 0) {
3849 				if (v->GPUVMEnable == true) {
3850 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3851 							HostVMInefficiencyFactor / prefetch_bw_equ,
3852 							Tvm_trips, LineTime / 4);
3853 				} else {
3854 					Tvm_equ = LineTime / 4;
3855 				}
3856 
3857 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3858 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3859 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3860 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3861 				} else {
3862 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3863 				}
3864 			} else {
3865 				Tvm_equ = 0;
3866 				Tr0_equ = 0;
3867 #ifdef __DML_VBA_DEBUG__
3868 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3869 #endif
3870 			}
3871 		}
3872 
3873 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3874 			*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3875 			TimeForFetchingMetaPTE = Tvm_oto;
3876 			TimeForFetchingRowInVBlank = Tr0_oto;
3877 			*PrefetchBandwidth = prefetch_bw_oto;
3878 		} else {
3879 			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3880 			TimeForFetchingMetaPTE = Tvm_equ;
3881 			TimeForFetchingRowInVBlank = Tr0_equ;
3882 			*PrefetchBandwidth = prefetch_bw_equ;
3883 		}
3884 
3885 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3886 
3887 		*DestinationLinesToRequestRowInVBlank =
3888 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3889 
3890 		LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3891 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3892 
3893 #ifdef __DML_VBA_DEBUG__
3894 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3895 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3896 				__func__, *DestinationLinesToRequestVMInVBlank);
3897 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3898 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3899 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3900 				__func__, *DestinationLinesToRequestRowInVBlank);
3901 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3902 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3903 #endif
3904 
3905 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3906 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3907 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3908 #ifdef __DML_VBA_DEBUG__
3909 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3910 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3911 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3912 #endif
3913 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3914 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3915 					*VRatioPrefetchY =
3916 							dml_max((double) PrefetchSourceLinesY /
3917 									LinesToRequestPrefetchPixelData,
3918 									(double) MaxNumSwathY * SwathHeightY /
3919 									(LinesToRequestPrefetchPixelData -
3920 									(VInitPreFillY - 3.0) / 2.0));
3921 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3922 				} else {
3923 					MyError = true;
3924 					*VRatioPrefetchY = 0;
3925 				}
3926 #ifdef __DML_VBA_DEBUG__
3927 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3928 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3929 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3930 #endif
3931 			}
3932 
3933 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3934 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3935 
3936 #ifdef __DML_VBA_DEBUG__
3937 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3938 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3939 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3940 #endif
3941 			if ((SwathHeightC > 4)) {
3942 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3943 					*VRatioPrefetchC =
3944 						dml_max(*VRatioPrefetchC,
3945 							(double) MaxNumSwathC * SwathHeightC /
3946 							(LinesToRequestPrefetchPixelData -
3947 							(VInitPreFillC - 3.0) / 2.0));
3948 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3949 				} else {
3950 					MyError = true;
3951 					*VRatioPrefetchC = 0;
3952 				}
3953 #ifdef __DML_VBA_DEBUG__
3954 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3955 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3956 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3957 #endif
3958 			}
3959 
3960 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3961 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3962 					/ LineTime;
3963 
3964 #ifdef __DML_VBA_DEBUG__
3965 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3966 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3967 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3968 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3969 					__func__, *RequiredPrefetchPixDataBWLuma);
3970 #endif
3971 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3972 					LinesToRequestPrefetchPixelData
3973 					* myPipe->BytePerPixelC
3974 					* swath_width_chroma_ub / LineTime;
3975 		} else {
3976 			MyError = true;
3977 #ifdef __DML_VBA_DEBUG__
3978 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3979 					__func__, LinesToRequestPrefetchPixelData);
3980 #endif
3981 			*VRatioPrefetchY = 0;
3982 			*VRatioPrefetchC = 0;
3983 			*RequiredPrefetchPixDataBWLuma = 0;
3984 			*RequiredPrefetchPixDataBWChroma = 0;
3985 		}
3986 #ifdef __DML_VBA_DEBUG__
3987 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3988 			(double)LinesToRequestPrefetchPixelData * LineTime +
3989 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3990 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3991 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3992 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3993 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3994 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3995 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3996 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3997 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3998 				PixelPTEBytesPerRow);
3999 #endif
4000 	} else {
4001 		MyError = true;
4002 #ifdef __DML_VBA_DEBUG__
4003 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4004 				__func__, dst_y_prefetch_equ);
4005 #endif
4006 	}
4007 
4008 	{
4009 		double prefetch_vm_bw;
4010 		double prefetch_row_bw;
4011 
4012 		if (PDEAndMetaPTEBytesFrame == 0) {
4013 			prefetch_vm_bw = 0;
4014 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4015 #ifdef __DML_VBA_DEBUG__
4016 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4017 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4018 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4019 					__func__, *DestinationLinesToRequestVMInVBlank);
4020 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4021 #endif
4022 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4023 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4024 #ifdef __DML_VBA_DEBUG__
4025 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4026 #endif
4027 		} else {
4028 			prefetch_vm_bw = 0;
4029 			MyError = true;
4030 #ifdef __DML_VBA_DEBUG__
4031 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4032 					__func__, *DestinationLinesToRequestVMInVBlank);
4033 #endif
4034 		}
4035 
4036 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4037 			prefetch_row_bw = 0;
4038 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4039 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4040 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4041 
4042 #ifdef __DML_VBA_DEBUG__
4043 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4044 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4045 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4046 					__func__, *DestinationLinesToRequestRowInVBlank);
4047 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4048 #endif
4049 		} else {
4050 			prefetch_row_bw = 0;
4051 			MyError = true;
4052 #ifdef __DML_VBA_DEBUG__
4053 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4054 					__func__, *DestinationLinesToRequestRowInVBlank);
4055 #endif
4056 		}
4057 
4058 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4059 	}
4060 
4061 	if (MyError) {
4062 		*PrefetchBandwidth = 0;
4063 		TimeForFetchingMetaPTE = 0;
4064 		TimeForFetchingRowInVBlank = 0;
4065 		*DestinationLinesToRequestVMInVBlank = 0;
4066 		*DestinationLinesToRequestRowInVBlank = 0;
4067 		*DestinationLinesForPrefetch = 0;
4068 		LinesToRequestPrefetchPixelData = 0;
4069 		*VRatioPrefetchY = 0;
4070 		*VRatioPrefetchC = 0;
4071 		*RequiredPrefetchPixDataBWLuma = 0;
4072 		*RequiredPrefetchPixDataBWChroma = 0;
4073 	}
4074 
4075 	return MyError;
4076 } // CalculatePrefetchSchedule
4077 
4078 void dml32_CalculateFlipSchedule(
4079 		double HostVMInefficiencyFactor,
4080 		double UrgentExtraLatency,
4081 		double UrgentLatency,
4082 		unsigned int GPUVMMaxPageTableLevels,
4083 		bool HostVMEnable,
4084 		unsigned int HostVMMaxNonCachedPageTableLevels,
4085 		bool GPUVMEnable,
4086 		double HostVMMinPageSize,
4087 		double PDEAndMetaPTEBytesPerFrame,
4088 		double MetaRowBytes,
4089 		double DPTEBytesPerRow,
4090 		double BandwidthAvailableForImmediateFlip,
4091 		unsigned int TotImmediateFlipBytes,
4092 		enum source_format_class SourcePixelFormat,
4093 		double LineTime,
4094 		double VRatio,
4095 		double VRatioChroma,
4096 		double Tno_bw,
4097 		bool DCCEnable,
4098 		unsigned int dpte_row_height,
4099 		unsigned int meta_row_height,
4100 		unsigned int dpte_row_height_chroma,
4101 		unsigned int meta_row_height_chroma,
4102 		bool    use_one_row_for_frame_flip,
4103 
4104 		/* Output */
4105 		double *DestinationLinesToRequestVMInImmediateFlip,
4106 		double *DestinationLinesToRequestRowInImmediateFlip,
4107 		double *final_flip_bw,
4108 		bool *ImmediateFlipSupportedForPipe)
4109 {
4110 	double min_row_time = 0.0;
4111 	unsigned int HostVMDynamicLevelsTrips;
4112 	double TimeForFetchingMetaPTEImmediateFlip;
4113 	double TimeForFetchingRowInVBlankImmediateFlip;
4114 	double ImmediateFlipBW;
4115 
4116 	if (GPUVMEnable == true && HostVMEnable == true)
4117 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4118 	else
4119 		HostVMDynamicLevelsTrips = 0;
4120 
4121 #ifdef __DML_VBA_DEBUG__
4122 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4123 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4124 #endif
4125 
4126 	if (TotImmediateFlipBytes > 0) {
4127 		if (use_one_row_for_frame_flip) {
4128 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4129 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4130 		} else {
4131 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4132 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4133 		}
4134 		if (GPUVMEnable == true) {
4135 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4136 					HostVMInefficiencyFactor / ImmediateFlipBW,
4137 					UrgentExtraLatency + UrgentLatency *
4138 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4139 					LineTime / 4.0);
4140 		} else {
4141 			TimeForFetchingMetaPTEImmediateFlip = 0;
4142 		}
4143 		if ((GPUVMEnable == true || DCCEnable == true)) {
4144 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4145 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4146 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4147 		} else {
4148 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4149 		}
4150 
4151 		*DestinationLinesToRequestVMInImmediateFlip =
4152 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4153 		*DestinationLinesToRequestRowInImmediateFlip =
4154 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4155 
4156 		if (GPUVMEnable == true) {
4157 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4158 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4159 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4160 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4161 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4162 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4163 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4164 		} else {
4165 			*final_flip_bw = 0;
4166 		}
4167 	} else {
4168 		TimeForFetchingMetaPTEImmediateFlip = 0;
4169 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4170 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4171 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4172 		*final_flip_bw = 0;
4173 	}
4174 
4175 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4176 		if (GPUVMEnable == true && DCCEnable != true) {
4177 			min_row_time = dml_min(dpte_row_height *
4178 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4179 		} else if (GPUVMEnable != true && DCCEnable == true) {
4180 			min_row_time = dml_min(meta_row_height *
4181 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4182 		} else {
4183 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4184 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4185 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4186 		}
4187 	} else {
4188 		if (GPUVMEnable == true && DCCEnable != true) {
4189 			min_row_time = dpte_row_height * LineTime / VRatio;
4190 		} else if (GPUVMEnable != true && DCCEnable == true) {
4191 			min_row_time = meta_row_height * LineTime / VRatio;
4192 		} else {
4193 			min_row_time =
4194 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4195 		}
4196 	}
4197 
4198 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4199 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4200 					> min_row_time) {
4201 		*ImmediateFlipSupportedForPipe = false;
4202 	} else {
4203 		*ImmediateFlipSupportedForPipe = true;
4204 	}
4205 
4206 #ifdef __DML_VBA_DEBUG__
4207 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4208 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4209 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4210 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4211 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4212 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4213 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4214 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4215 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4216 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4217 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4218 #endif
4219 } // CalculateFlipSchedule
4220 
4221 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4222 		struct vba_vars_st *v,
4223 		unsigned int PrefetchMode,
4224 		double DCFCLK,
4225 		double ReturnBW,
4226 		SOCParametersList mmSOCParameters,
4227 		double SOCCLK,
4228 		double DCFClkDeepSleep,
4229 		unsigned int DETBufferSizeY[],
4230 		unsigned int DETBufferSizeC[],
4231 		unsigned int SwathHeightY[],
4232 		unsigned int SwathHeightC[],
4233 		double SwathWidthY[],
4234 		double SwathWidthC[],
4235 		unsigned int DPPPerSurface[],
4236 		double BytePerPixelDETY[],
4237 		double BytePerPixelDETC[],
4238 		double DSTXAfterScaler[],
4239 		double DSTYAfterScaler[],
4240 		bool UnboundedRequestEnabled,
4241 		unsigned int CompressedBufferSizeInkByte,
4242 
4243 		/* Output */
4244 		enum clock_change_support *DRAMClockChangeSupport,
4245 		double MaxActiveDRAMClockChangeLatencySupported[],
4246 		unsigned int SubViewportLinesNeededInMALL[],
4247 		enum dm_fclock_change_support *FCLKChangeSupport,
4248 		double *MinActiveFCLKChangeLatencySupported,
4249 		bool *USRRetrainingSupport,
4250 		double ActiveDRAMClockChangeLatencyMargin[])
4251 {
4252 	unsigned int i, j, k;
4253 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4254 	unsigned int DRAMClockChangeSupportNumber = 0;
4255 	unsigned int LastSurfaceWithoutMargin;
4256 	unsigned int DRAMClockChangeMethod = 0;
4257 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4258 	double MinActiveFCLKChangeMargin = 0.;
4259 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4260 	double ActiveClockChangeLatencyHidingY;
4261 	double ActiveClockChangeLatencyHidingC;
4262 	double ActiveClockChangeLatencyHiding;
4263 	double EffectiveDETBufferSizeY;
4264 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4265 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4266 	double TotalPixelBW = 0.0;
4267 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4268 	double     EffectiveLBLatencyHidingY;
4269 	double     EffectiveLBLatencyHidingC;
4270 	double     LinesInDETY[DC__NUM_DPP__MAX];
4271 	double     LinesInDETC[DC__NUM_DPP__MAX];
4272 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4273 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4274 	double     FullDETBufferingTimeY;
4275 	double     FullDETBufferingTimeC;
4276 	double     WritebackDRAMClockChangeLatencyMargin;
4277 	double     WritebackFCLKChangeLatencyMargin;
4278 	double     WritebackLatencyHiding;
4279 	bool    SameTimingForFCLKChange;
4280 
4281 	unsigned int    TotalActiveWriteback = 0;
4282 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4283 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4284 
4285 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4286 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4287 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4288 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4289 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4290 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4291 			+ 10 / DCFClkDeepSleep;
4292 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4293 			+ 10 / DCFClkDeepSleep;
4294 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4295 			+ 10 / DCFClkDeepSleep;
4296 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4297 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4298 
4299 #ifdef __DML_VBA_DEBUG__
4300 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4301 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4302 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4303 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4304 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4305 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4306 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4307 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4308 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4309 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4310 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4311 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4312 #endif
4313 
4314 
4315 	TotalActiveWriteback = 0;
4316 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4317 		if (v->WritebackEnable[k] == true)
4318 			TotalActiveWriteback = TotalActiveWriteback + 1;
4319 	}
4320 
4321 	if (TotalActiveWriteback <= 1) {
4322 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4323 	} else {
4324 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4325 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4326 	}
4327 	if (v->USRRetrainingRequiredFinal)
4328 		v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
4329 				+ mmSOCParameters.USRRetrainingLatency;
4330 
4331 	if (TotalActiveWriteback <= 1) {
4332 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4333 				+ mmSOCParameters.WritebackLatency;
4334 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4335 				+ mmSOCParameters.WritebackLatency;
4336 	} else {
4337 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4338 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4339 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4340 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4341 	}
4342 
4343 	if (v->USRRetrainingRequiredFinal)
4344 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4345 				+ mmSOCParameters.USRRetrainingLatency;
4346 
4347 	if (v->USRRetrainingRequiredFinal)
4348 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4349 				+ mmSOCParameters.USRRetrainingLatency;
4350 
4351 #ifdef __DML_VBA_DEBUG__
4352 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4353 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4354 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4355 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4356 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4357 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4358 #endif
4359 
4360 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4361 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4362 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4363 	}
4364 
4365 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4366 
4367 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4368 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4369 
4370 
4371 #ifdef __DML_VBA_DEBUG__
4372 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4373 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4374 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4375 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4376 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4377 #endif
4378 
4379 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4380 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4381 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4382 
4383 		if (UnboundedRequestEnabled) {
4384 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4385 					+ CompressedBufferSizeInkByte * 1024
4386 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4387 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4388 		}
4389 
4390 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4391 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4392 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4393 
4394 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4395 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4396 
4397 		if (v->NumberOfActiveSurfaces > 1) {
4398 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4399 					- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4400 							/ v->PixelClock[k] / v->VRatio[k];
4401 		}
4402 
4403 		if (BytePerPixelDETC[k] > 0) {
4404 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4405 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4406 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4407 					/ v->VRatioChroma[k];
4408 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4409 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4410 							/ v->PixelClock[k];
4411 			if (v->NumberOfActiveSurfaces > 1) {
4412 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4413 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4414 								/ v->PixelClock[k] / v->VRatioChroma[k];
4415 			}
4416 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4417 					ActiveClockChangeLatencyHidingC);
4418 		} else {
4419 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4420 		}
4421 
4422 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4423 				- v->Watermark.DRAMClockChangeWatermark;
4424 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4425 				- v->Watermark.FCLKChangeWatermark;
4426 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4427 
4428 		if (v->WritebackEnable[k]) {
4429 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4430 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4431 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4432 			if (v->WritebackPixelFormat[k] == dm_444_64)
4433 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4434 
4435 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4436 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4437 
4438 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4439 					- v->Watermark.WritebackFCLKChangeWatermark;
4440 
4441 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4442 					WritebackFCLKChangeLatencyMargin);
4443 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4444 					WritebackDRAMClockChangeLatencyMargin);
4445 		}
4446 		MaxActiveDRAMClockChangeLatencySupported[k] =
4447 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4448 						0 :
4449 						(ActiveDRAMClockChangeLatencyMargin[k]
4450 								+ mmSOCParameters.DRAMClockChangeLatency);
4451 	}
4452 
4453 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4454 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4455 			if (i == j ||
4456 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4457 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4458 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4459 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4460 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4461 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4462 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4463 				SynchronizedSurfaces[i][j] = true;
4464 			} else {
4465 				SynchronizedSurfaces[i][j] = false;
4466 			}
4467 		}
4468 	}
4469 
4470 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4471 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4472 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4473 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4474 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4475 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4476 			SurfaceWithMinActiveFCLKChangeMargin = k;
4477 		}
4478 	}
4479 
4480 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4481 
4482 	SameTimingForFCLKChange = true;
4483 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4484 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4485 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4486 					(SameTimingForFCLKChange ||
4487 					ActiveFCLKChangeLatencyMargin[k] <
4488 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4489 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4490 			}
4491 			SameTimingForFCLKChange = false;
4492 		}
4493 	}
4494 
4495 	if (MinActiveFCLKChangeMargin > 0) {
4496 		*FCLKChangeSupport = dm_fclock_change_vactive;
4497 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4498 			(PrefetchMode <= 1)) {
4499 		*FCLKChangeSupport = dm_fclock_change_vblank;
4500 	} else {
4501 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4502 	}
4503 
4504 	*USRRetrainingSupport = true;
4505 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4506 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4507 				(USRRetrainingLatencyMargin[k] < 0)) {
4508 			*USRRetrainingSupport = false;
4509 		}
4510 	}
4511 
4512 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4513 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4514 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4515 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4516 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4517 			if (PrefetchMode > 0) {
4518 				DRAMClockChangeSupportNumber = 2;
4519 			} else if (DRAMClockChangeSupportNumber == 0) {
4520 				DRAMClockChangeSupportNumber = 1;
4521 				LastSurfaceWithoutMargin = k;
4522 			} else if (DRAMClockChangeSupportNumber == 1 &&
4523 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4524 				DRAMClockChangeSupportNumber = 2;
4525 			}
4526 		}
4527 	}
4528 
4529 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4530 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4531 			DRAMClockChangeMethod = 1;
4532 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4533 			DRAMClockChangeMethod = 2;
4534 	}
4535 
4536 	if (DRAMClockChangeMethod == 0) {
4537 		if (DRAMClockChangeSupportNumber == 0)
4538 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4539 		else if (DRAMClockChangeSupportNumber == 1)
4540 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4541 		else
4542 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4543 	} else if (DRAMClockChangeMethod == 1) {
4544 		if (DRAMClockChangeSupportNumber == 0)
4545 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4546 		else if (DRAMClockChangeSupportNumber == 1)
4547 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4548 		else
4549 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4550 	} else {
4551 		if (DRAMClockChangeSupportNumber == 0)
4552 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4553 		else if (DRAMClockChangeSupportNumber == 1)
4554 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4555 		else
4556 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4557 	}
4558 
4559 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4560 		unsigned int dst_y_pstate;
4561 		unsigned int src_y_pstate_l;
4562 		unsigned int src_y_pstate_c;
4563 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4564 
4565 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4566 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4567 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4568 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4569 
4570 #ifdef __DML_VBA_DEBUG__
4571 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4572 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4573 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4574 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4575 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4576 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4577 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4578 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4579 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4580 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4581 #endif
4582 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4583 
4584 		if (BytePerPixelDETC[k] > 0) {
4585 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4586 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4587 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4588 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4589 
4590 #ifdef __DML_VBA_DEBUG__
4591 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4592 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4593 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4594 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4595 #endif
4596 		}
4597 	}
4598 #ifdef __DML_VBA_DEBUG__
4599 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4600 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4601 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4602 			__func__, *MinActiveFCLKChangeLatencySupported);
4603 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4604 #endif
4605 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4606 
4607 double dml32_CalculateWriteBackDISPCLK(
4608 		enum source_format_class WritebackPixelFormat,
4609 		double PixelClock,
4610 		double WritebackHRatio,
4611 		double WritebackVRatio,
4612 		unsigned int WritebackHTaps,
4613 		unsigned int WritebackVTaps,
4614 		unsigned int   WritebackSourceWidth,
4615 		unsigned int   WritebackDestinationWidth,
4616 		unsigned int HTotal,
4617 		unsigned int WritebackLineBufferSize,
4618 		double DISPCLKDPPCLKVCOSpeed)
4619 {
4620 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4621 
4622 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4623 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4624 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4625 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4626 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4627 }
4628 
4629 void dml32_CalculateMinAndMaxPrefetchMode(
4630 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4631 		unsigned int             *MinPrefetchMode,
4632 		unsigned int             *MaxPrefetchMode)
4633 {
4634 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4635 		*MinPrefetchMode = 3;
4636 		*MaxPrefetchMode = 3;
4637 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4638 		*MinPrefetchMode = 2;
4639 		*MaxPrefetchMode = 2;
4640 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4641 		*MinPrefetchMode = 1;
4642 		*MaxPrefetchMode = 1;
4643 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4644 		*MinPrefetchMode = 0;
4645 		*MaxPrefetchMode = 0;
4646 	} else {
4647 		*MinPrefetchMode = 0;
4648 		*MaxPrefetchMode = 3;
4649 	}
4650 } // CalculateMinAndMaxPrefetchMode
4651 
4652 void dml32_CalculatePixelDeliveryTimes(
4653 		unsigned int             NumberOfActiveSurfaces,
4654 		double              VRatio[],
4655 		double              VRatioChroma[],
4656 		double              VRatioPrefetchY[],
4657 		double              VRatioPrefetchC[],
4658 		unsigned int             swath_width_luma_ub[],
4659 		unsigned int             swath_width_chroma_ub[],
4660 		unsigned int             DPPPerSurface[],
4661 		double              HRatio[],
4662 		double              HRatioChroma[],
4663 		double              PixelClock[],
4664 		double              PSCL_THROUGHPUT[],
4665 		double              PSCL_THROUGHPUT_CHROMA[],
4666 		double              Dppclk[],
4667 		unsigned int             BytePerPixelC[],
4668 		enum dm_rotation_angle   SourceRotation[],
4669 		unsigned int             NumberOfCursors[],
4670 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4671 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4672 		unsigned int             BlockWidth256BytesY[],
4673 		unsigned int             BlockHeight256BytesY[],
4674 		unsigned int             BlockWidth256BytesC[],
4675 		unsigned int             BlockHeight256BytesC[],
4676 
4677 		/* Output */
4678 		double              DisplayPipeLineDeliveryTimeLuma[],
4679 		double              DisplayPipeLineDeliveryTimeChroma[],
4680 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4681 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4682 		double              DisplayPipeRequestDeliveryTimeLuma[],
4683 		double              DisplayPipeRequestDeliveryTimeChroma[],
4684 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4685 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4686 		double              CursorRequestDeliveryTime[],
4687 		double              CursorRequestDeliveryTimePrefetch[])
4688 {
4689 	double   req_per_swath_ub;
4690 	unsigned int k;
4691 
4692 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4693 
4694 #ifdef __DML_VBA_DEBUG__
4695 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4696 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4697 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4698 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4699 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4700 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4701 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4702 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4703 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4704 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4705 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4706 #endif
4707 
4708 		if (VRatio[k] <= 1) {
4709 			DisplayPipeLineDeliveryTimeLuma[k] =
4710 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4711 		} else {
4712 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4713 		}
4714 
4715 		if (BytePerPixelC[k] == 0) {
4716 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4717 		} else {
4718 			if (VRatioChroma[k] <= 1) {
4719 				DisplayPipeLineDeliveryTimeChroma[k] =
4720 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4721 			} else {
4722 				DisplayPipeLineDeliveryTimeChroma[k] =
4723 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4724 			}
4725 		}
4726 
4727 		if (VRatioPrefetchY[k] <= 1) {
4728 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4729 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4730 		} else {
4731 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4732 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4733 		}
4734 
4735 		if (BytePerPixelC[k] == 0) {
4736 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4737 		} else {
4738 			if (VRatioPrefetchC[k] <= 1) {
4739 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4740 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4741 			} else {
4742 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4743 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4744 			}
4745 		}
4746 #ifdef __DML_VBA_DEBUG__
4747 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4748 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4749 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4750 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4751 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4752 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4753 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4754 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4755 #endif
4756 	}
4757 
4758 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4759 		if (!IsVertical(SourceRotation[k]))
4760 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4761 		else
4762 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4763 #ifdef __DML_VBA_DEBUG__
4764 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4765 #endif
4766 
4767 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4768 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4769 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4770 		if (BytePerPixelC[k] == 0) {
4771 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4772 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4773 		} else {
4774 			if (!IsVertical(SourceRotation[k]))
4775 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4776 			else
4777 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4778 #ifdef __DML_VBA_DEBUG__
4779 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4780 #endif
4781 			DisplayPipeRequestDeliveryTimeChroma[k] =
4782 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4783 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4784 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4785 		}
4786 #ifdef __DML_VBA_DEBUG__
4787 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4788 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4789 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4790 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4791 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4792 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4793 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4794 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4795 #endif
4796 	}
4797 
4798 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4799 		unsigned int cursor_req_per_width;
4800 
4801 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4802 				256.0 / 8.0, 1.0);
4803 		if (NumberOfCursors[k] > 0) {
4804 			if (VRatio[k] <= 1) {
4805 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4806 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4807 			} else {
4808 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4809 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4810 			}
4811 			if (VRatioPrefetchY[k] <= 1) {
4812 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4813 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4814 			} else {
4815 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4816 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4817 			}
4818 		} else {
4819 			CursorRequestDeliveryTime[k] = 0;
4820 			CursorRequestDeliveryTimePrefetch[k] = 0;
4821 		}
4822 #ifdef __DML_VBA_DEBUG__
4823 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4824 				__func__, k, NumberOfCursors[k]);
4825 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4826 				__func__, k, CursorRequestDeliveryTime[k]);
4827 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4828 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4829 #endif
4830 	}
4831 } // CalculatePixelDeliveryTimes
4832 
4833 void dml32_CalculateMetaAndPTETimes(
4834 		bool use_one_row_for_frame[],
4835 		unsigned int NumberOfActiveSurfaces,
4836 		bool GPUVMEnable,
4837 		unsigned int MetaChunkSize,
4838 		unsigned int MinMetaChunkSizeBytes,
4839 		unsigned int    HTotal[],
4840 		double  VRatio[],
4841 		double  VRatioChroma[],
4842 		double  DestinationLinesToRequestRowInVBlank[],
4843 		double  DestinationLinesToRequestRowInImmediateFlip[],
4844 		bool DCCEnable[],
4845 		double  PixelClock[],
4846 		unsigned int BytePerPixelY[],
4847 		unsigned int BytePerPixelC[],
4848 		enum dm_rotation_angle SourceRotation[],
4849 		unsigned int dpte_row_height[],
4850 		unsigned int dpte_row_height_chroma[],
4851 		unsigned int meta_row_width[],
4852 		unsigned int meta_row_width_chroma[],
4853 		unsigned int meta_row_height[],
4854 		unsigned int meta_row_height_chroma[],
4855 		unsigned int meta_req_width[],
4856 		unsigned int meta_req_width_chroma[],
4857 		unsigned int meta_req_height[],
4858 		unsigned int meta_req_height_chroma[],
4859 		unsigned int dpte_group_bytes[],
4860 		unsigned int    PTERequestSizeY[],
4861 		unsigned int    PTERequestSizeC[],
4862 		unsigned int    PixelPTEReqWidthY[],
4863 		unsigned int    PixelPTEReqHeightY[],
4864 		unsigned int    PixelPTEReqWidthC[],
4865 		unsigned int    PixelPTEReqHeightC[],
4866 		unsigned int    dpte_row_width_luma_ub[],
4867 		unsigned int    dpte_row_width_chroma_ub[],
4868 
4869 		/* Output */
4870 		double DST_Y_PER_PTE_ROW_NOM_L[],
4871 		double DST_Y_PER_PTE_ROW_NOM_C[],
4872 		double DST_Y_PER_META_ROW_NOM_L[],
4873 		double DST_Y_PER_META_ROW_NOM_C[],
4874 		double TimePerMetaChunkNominal[],
4875 		double TimePerChromaMetaChunkNominal[],
4876 		double TimePerMetaChunkVBlank[],
4877 		double TimePerChromaMetaChunkVBlank[],
4878 		double TimePerMetaChunkFlip[],
4879 		double TimePerChromaMetaChunkFlip[],
4880 		double time_per_pte_group_nom_luma[],
4881 		double time_per_pte_group_vblank_luma[],
4882 		double time_per_pte_group_flip_luma[],
4883 		double time_per_pte_group_nom_chroma[],
4884 		double time_per_pte_group_vblank_chroma[],
4885 		double time_per_pte_group_flip_chroma[])
4886 {
4887 	unsigned int   meta_chunk_width;
4888 	unsigned int   min_meta_chunk_width;
4889 	unsigned int   meta_chunk_per_row_int;
4890 	unsigned int   meta_row_remainder;
4891 	unsigned int   meta_chunk_threshold;
4892 	unsigned int   meta_chunks_per_row_ub;
4893 	unsigned int   meta_chunk_width_chroma;
4894 	unsigned int   min_meta_chunk_width_chroma;
4895 	unsigned int   meta_chunk_per_row_int_chroma;
4896 	unsigned int   meta_row_remainder_chroma;
4897 	unsigned int   meta_chunk_threshold_chroma;
4898 	unsigned int   meta_chunks_per_row_ub_chroma;
4899 	unsigned int   dpte_group_width_luma;
4900 	unsigned int   dpte_groups_per_row_luma_ub;
4901 	unsigned int   dpte_group_width_chroma;
4902 	unsigned int   dpte_groups_per_row_chroma_ub;
4903 	unsigned int k;
4904 
4905 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4906 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4907 		if (BytePerPixelC[k] == 0)
4908 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4909 		else
4910 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4911 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4912 		if (BytePerPixelC[k] == 0)
4913 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4914 		else
4915 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4916 	}
4917 
4918 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4919 		if (DCCEnable[k] == true) {
4920 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4921 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4922 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4923 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4924 			if (!IsVertical(SourceRotation[k]))
4925 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4926 			else
4927 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4928 
4929 			if (meta_row_remainder <= meta_chunk_threshold)
4930 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4931 			else
4932 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4933 
4934 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4935 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4936 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4937 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4938 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4939 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4940 			if (BytePerPixelC[k] == 0) {
4941 				TimePerChromaMetaChunkNominal[k] = 0;
4942 				TimePerChromaMetaChunkVBlank[k] = 0;
4943 				TimePerChromaMetaChunkFlip[k] = 0;
4944 			} else {
4945 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4946 						meta_row_height_chroma[k];
4947 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4948 						meta_row_height_chroma[k];
4949 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4950 						meta_chunk_width_chroma;
4951 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4952 				if (!IsVertical(SourceRotation[k])) {
4953 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4954 							meta_req_width_chroma[k];
4955 				} else {
4956 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4957 							meta_req_height_chroma[k];
4958 				}
4959 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4960 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4961 				else
4962 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4963 
4964 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4965 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4966 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4967 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4968 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4969 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4970 			}
4971 		} else {
4972 			TimePerMetaChunkNominal[k] = 0;
4973 			TimePerMetaChunkVBlank[k] = 0;
4974 			TimePerMetaChunkFlip[k] = 0;
4975 			TimePerChromaMetaChunkNominal[k] = 0;
4976 			TimePerChromaMetaChunkVBlank[k] = 0;
4977 			TimePerChromaMetaChunkFlip[k] = 0;
4978 		}
4979 	}
4980 
4981 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4982 		if (GPUVMEnable == true) {
4983 			if (!IsVertical(SourceRotation[k])) {
4984 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
4985 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4986 			} else {
4987 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
4988 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4989 			}
4990 
4991 			if (use_one_row_for_frame[k]) {
4992 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4993 						(double) dpte_group_width_luma / 2.0, 1.0);
4994 			} else {
4995 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
4996 						(double) dpte_group_width_luma, 1.0);
4997 			}
4998 #ifdef __DML_VBA_DEBUG__
4999 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5000 					__func__, k, use_one_row_for_frame[k]);
5001 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5002 					__func__, k, dpte_group_bytes[k]);
5003 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5004 					__func__, k, PTERequestSizeY[k]);
5005 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5006 					__func__, k, PixelPTEReqWidthY[k]);
5007 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5008 					__func__, k, PixelPTEReqHeightY[k]);
5009 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5010 					__func__, k, dpte_row_width_luma_ub[k]);
5011 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5012 					__func__, k, dpte_group_width_luma);
5013 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5014 					__func__, k, dpte_groups_per_row_luma_ub);
5015 #endif
5016 
5017 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5018 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5019 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5020 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5021 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5022 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5023 			if (BytePerPixelC[k] == 0) {
5024 				time_per_pte_group_nom_chroma[k] = 0;
5025 				time_per_pte_group_vblank_chroma[k] = 0;
5026 				time_per_pte_group_flip_chroma[k] = 0;
5027 			} else {
5028 				if (!IsVertical(SourceRotation[k])) {
5029 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5030 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5031 				} else {
5032 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5033 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5034 				}
5035 
5036 				if (use_one_row_for_frame[k]) {
5037 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5038 							(double) dpte_group_width_chroma / 2.0, 1.0);
5039 				} else {
5040 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5041 							(double) dpte_group_width_chroma, 1.0);
5042 				}
5043 #ifdef __DML_VBA_DEBUG__
5044 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5045 						__func__, k, dpte_row_width_chroma_ub[k]);
5046 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5047 						__func__, k, dpte_group_width_chroma);
5048 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5049 						__func__, k, dpte_groups_per_row_chroma_ub);
5050 #endif
5051 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5052 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5053 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5054 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5055 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5056 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5057 			}
5058 		} else {
5059 			time_per_pte_group_nom_luma[k] = 0;
5060 			time_per_pte_group_vblank_luma[k] = 0;
5061 			time_per_pte_group_flip_luma[k] = 0;
5062 			time_per_pte_group_nom_chroma[k] = 0;
5063 			time_per_pte_group_vblank_chroma[k] = 0;
5064 			time_per_pte_group_flip_chroma[k] = 0;
5065 		}
5066 #ifdef __DML_VBA_DEBUG__
5067 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5068 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5069 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5070 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5071 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5072 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5073 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5074 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5075 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5076 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5077 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5078 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5079 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5080 				__func__, k, TimePerMetaChunkNominal[k]);
5081 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5082 				__func__, k, TimePerMetaChunkVBlank[k]);
5083 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5084 				__func__, k, TimePerMetaChunkFlip[k]);
5085 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5086 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5087 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5088 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5089 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5090 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5091 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5092 				__func__, k, time_per_pte_group_nom_luma[k]);
5093 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5094 				__func__, k, time_per_pte_group_vblank_luma[k]);
5095 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5096 				__func__, k, time_per_pte_group_flip_luma[k]);
5097 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5098 				__func__, k, time_per_pte_group_nom_chroma[k]);
5099 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5100 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5101 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5102 				__func__, k, time_per_pte_group_flip_chroma[k]);
5103 #endif
5104 	}
5105 } // CalculateMetaAndPTETimes
5106 
5107 void dml32_CalculateVMGroupAndRequestTimes(
5108 		unsigned int     NumberOfActiveSurfaces,
5109 		bool     GPUVMEnable,
5110 		unsigned int     GPUVMMaxPageTableLevels,
5111 		unsigned int     HTotal[],
5112 		unsigned int     BytePerPixelC[],
5113 		double      DestinationLinesToRequestVMInVBlank[],
5114 		double      DestinationLinesToRequestVMInImmediateFlip[],
5115 		bool     DCCEnable[],
5116 		double      PixelClock[],
5117 		unsigned int        dpte_row_width_luma_ub[],
5118 		unsigned int        dpte_row_width_chroma_ub[],
5119 		unsigned int     vm_group_bytes[],
5120 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5121 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5122 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5123 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5124 
5125 		/* Output */
5126 		double      TimePerVMGroupVBlank[],
5127 		double      TimePerVMGroupFlip[],
5128 		double      TimePerVMRequestVBlank[],
5129 		double      TimePerVMRequestFlip[])
5130 {
5131 	unsigned int k;
5132 	unsigned int   num_group_per_lower_vm_stage;
5133 	unsigned int   num_req_per_lower_vm_stage;
5134 
5135 #ifdef __DML_VBA_DEBUG__
5136 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5137 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5138 #endif
5139 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5140 
5141 #ifdef __DML_VBA_DEBUG__
5142 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5143 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5144 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5145 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5146 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5147 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5148 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5149 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5150 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5151 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5152 #endif
5153 
5154 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5155 			if (DCCEnable[k] == false) {
5156 				if (BytePerPixelC[k] > 0) {
5157 					num_group_per_lower_vm_stage = dml_ceil(
5158 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5159 							(double) (vm_group_bytes[k]), 1.0) +
5160 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5161 							(double) (vm_group_bytes[k]), 1.0);
5162 				} else {
5163 					num_group_per_lower_vm_stage = dml_ceil(
5164 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5165 							(double) (vm_group_bytes[k]), 1.0);
5166 				}
5167 			} else {
5168 				if (GPUVMMaxPageTableLevels == 1) {
5169 					if (BytePerPixelC[k] > 0) {
5170 						num_group_per_lower_vm_stage = dml_ceil(
5171 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5172 							(double) (vm_group_bytes[k]), 1.0) +
5173 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5174 							(double) (vm_group_bytes[k]), 1.0);
5175 					} else {
5176 						num_group_per_lower_vm_stage = dml_ceil(
5177 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5178 								(double) (vm_group_bytes[k]), 1.0);
5179 					}
5180 				} else {
5181 					if (BytePerPixelC[k] > 0) {
5182 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5183 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5184 							(double) (vm_group_bytes[k]), 1) +
5185 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5186 							(double) (vm_group_bytes[k]), 1) +
5187 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5188 							(double) (vm_group_bytes[k]), 1) +
5189 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5190 							(double) (vm_group_bytes[k]), 1);
5191 					} else {
5192 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5193 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5194 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5195 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5196 							(double) (vm_group_bytes[k]), 1);
5197 					}
5198 				}
5199 			}
5200 
5201 			if (DCCEnable[k] == false) {
5202 				if (BytePerPixelC[k] > 0) {
5203 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5204 							dpde0_bytes_per_frame_ub_c[k] / 64;
5205 				} else {
5206 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5207 				}
5208 			} else {
5209 				if (GPUVMMaxPageTableLevels == 1) {
5210 					if (BytePerPixelC[k] > 0) {
5211 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5212 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5213 					} else {
5214 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5215 					}
5216 				} else {
5217 					if (BytePerPixelC[k] > 0) {
5218 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5219 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5220 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5221 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5222 					} else {
5223 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5224 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5225 					}
5226 				}
5227 			}
5228 
5229 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5230 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5231 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5232 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5233 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5234 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5235 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5236 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5237 
5238 			if (GPUVMMaxPageTableLevels > 2) {
5239 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5240 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5241 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5242 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5243 			}
5244 
5245 		} else {
5246 			TimePerVMGroupVBlank[k] = 0;
5247 			TimePerVMGroupFlip[k] = 0;
5248 			TimePerVMRequestVBlank[k] = 0;
5249 			TimePerVMRequestFlip[k] = 0;
5250 		}
5251 
5252 #ifdef __DML_VBA_DEBUG__
5253 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5254 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5255 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5256 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5257 #endif
5258 	}
5259 } // CalculateVMGroupAndRequestTimes
5260 
5261 void dml32_CalculateDCCConfiguration(
5262 		bool             DCCEnabled,
5263 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5264 		enum source_format_class SourcePixelFormat,
5265 		unsigned int             SurfaceWidthLuma,
5266 		unsigned int             SurfaceWidthChroma,
5267 		unsigned int             SurfaceHeightLuma,
5268 		unsigned int             SurfaceHeightChroma,
5269 		unsigned int                nomDETInKByte,
5270 		unsigned int             RequestHeight256ByteLuma,
5271 		unsigned int             RequestHeight256ByteChroma,
5272 		enum dm_swizzle_mode     TilingFormat,
5273 		unsigned int             BytePerPixelY,
5274 		unsigned int             BytePerPixelC,
5275 		double              BytePerPixelDETY,
5276 		double              BytePerPixelDETC,
5277 		enum dm_rotation_angle   SourceRotation,
5278 		/* Output */
5279 		unsigned int        *MaxUncompressedBlockLuma,
5280 		unsigned int        *MaxUncompressedBlockChroma,
5281 		unsigned int        *MaxCompressedBlockLuma,
5282 		unsigned int        *MaxCompressedBlockChroma,
5283 		unsigned int        *IndependentBlockLuma,
5284 		unsigned int        *IndependentBlockChroma)
5285 {
5286 	typedef enum {
5287 		REQ_256Bytes,
5288 		REQ_128BytesNonContiguous,
5289 		REQ_128BytesContiguous,
5290 		REQ_NA
5291 	} RequestType;
5292 
5293 	RequestType   RequestLuma;
5294 	RequestType   RequestChroma;
5295 
5296 	unsigned int   segment_order_horz_contiguous_luma;
5297 	unsigned int   segment_order_horz_contiguous_chroma;
5298 	unsigned int   segment_order_vert_contiguous_luma;
5299 	unsigned int   segment_order_vert_contiguous_chroma;
5300 	unsigned int req128_horz_wc_l;
5301 	unsigned int req128_horz_wc_c;
5302 	unsigned int req128_vert_wc_l;
5303 	unsigned int req128_vert_wc_c;
5304 	unsigned int MAS_vp_horz_limit;
5305 	unsigned int MAS_vp_vert_limit;
5306 	unsigned int max_vp_horz_width;
5307 	unsigned int max_vp_vert_height;
5308 	unsigned int eff_surf_width_l;
5309 	unsigned int eff_surf_width_c;
5310 	unsigned int eff_surf_height_l;
5311 	unsigned int eff_surf_height_c;
5312 	unsigned int full_swath_bytes_horz_wc_l;
5313 	unsigned int full_swath_bytes_horz_wc_c;
5314 	unsigned int full_swath_bytes_vert_wc_l;
5315 	unsigned int full_swath_bytes_vert_wc_c;
5316 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5317 
5318 	unsigned int   yuv420;
5319 	unsigned int   horz_div_l;
5320 	unsigned int   horz_div_c;
5321 	unsigned int   vert_div_l;
5322 	unsigned int   vert_div_c;
5323 
5324 	unsigned int     swath_buf_size;
5325 	double   detile_buf_vp_horz_limit;
5326 	double   detile_buf_vp_vert_limit;
5327 
5328 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5329 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5330 	horz_div_l = 1;
5331 	horz_div_c = 1;
5332 	vert_div_l = 1;
5333 	vert_div_c = 1;
5334 
5335 	if (BytePerPixelY == 1)
5336 		vert_div_l = 0;
5337 	if (BytePerPixelC == 1)
5338 		vert_div_c = 0;
5339 
5340 	if (BytePerPixelC == 0) {
5341 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5342 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5343 				BytePerPixelY / (1 + horz_div_l));
5344 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5345 				(1 + vert_div_l));
5346 	} else {
5347 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5348 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5349 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5350 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5351 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5352 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5353 				(1 + vert_div_c) / (1 + yuv420));
5354 	}
5355 
5356 	if (SourcePixelFormat == dm_420_10) {
5357 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5358 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5359 	}
5360 
5361 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5362 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5363 
5364 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5365 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5366 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5367 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5368 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5369 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5370 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5371 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5372 
5373 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5374 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5375 	if (BytePerPixelC > 0) {
5376 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5377 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5378 	} else {
5379 		full_swath_bytes_horz_wc_c = 0;
5380 		full_swath_bytes_vert_wc_c = 0;
5381 	}
5382 
5383 	if (SourcePixelFormat == dm_420_10) {
5384 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5385 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5386 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5387 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5388 	}
5389 
5390 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5391 		req128_horz_wc_l = 0;
5392 		req128_horz_wc_c = 0;
5393 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5394 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5395 		req128_horz_wc_l = 0;
5396 		req128_horz_wc_c = 1;
5397 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5398 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5399 		req128_horz_wc_l = 1;
5400 		req128_horz_wc_c = 0;
5401 	} else {
5402 		req128_horz_wc_l = 1;
5403 		req128_horz_wc_c = 1;
5404 	}
5405 
5406 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5407 		req128_vert_wc_l = 0;
5408 		req128_vert_wc_c = 0;
5409 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5410 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5411 		req128_vert_wc_l = 0;
5412 		req128_vert_wc_c = 1;
5413 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5414 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5415 		req128_vert_wc_l = 1;
5416 		req128_vert_wc_c = 0;
5417 	} else {
5418 		req128_vert_wc_l = 1;
5419 		req128_vert_wc_c = 1;
5420 	}
5421 
5422 	if (BytePerPixelY == 2) {
5423 		segment_order_horz_contiguous_luma = 0;
5424 		segment_order_vert_contiguous_luma = 1;
5425 	} else {
5426 		segment_order_horz_contiguous_luma = 1;
5427 		segment_order_vert_contiguous_luma = 0;
5428 	}
5429 
5430 	if (BytePerPixelC == 2) {
5431 		segment_order_horz_contiguous_chroma = 0;
5432 		segment_order_vert_contiguous_chroma = 1;
5433 	} else {
5434 		segment_order_horz_contiguous_chroma = 1;
5435 		segment_order_vert_contiguous_chroma = 0;
5436 	}
5437 #ifdef __DML_VBA_DEBUG__
5438 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5439 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5440 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5441 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5442 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5443 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5444 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5445 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5446 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5447 			__func__, segment_order_horz_contiguous_chroma);
5448 #endif
5449 
5450 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5451 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5452 			RequestLuma = REQ_256Bytes;
5453 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5454 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5455 			RequestLuma = REQ_128BytesNonContiguous;
5456 		else
5457 			RequestLuma = REQ_128BytesContiguous;
5458 
5459 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5460 			RequestChroma = REQ_256Bytes;
5461 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5462 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5463 			RequestChroma = REQ_128BytesNonContiguous;
5464 		else
5465 			RequestChroma = REQ_128BytesContiguous;
5466 
5467 	} else if (!IsVertical(SourceRotation)) {
5468 		if (req128_horz_wc_l == 0)
5469 			RequestLuma = REQ_256Bytes;
5470 		else if (segment_order_horz_contiguous_luma == 0)
5471 			RequestLuma = REQ_128BytesNonContiguous;
5472 		else
5473 			RequestLuma = REQ_128BytesContiguous;
5474 
5475 		if (req128_horz_wc_c == 0)
5476 			RequestChroma = REQ_256Bytes;
5477 		else if (segment_order_horz_contiguous_chroma == 0)
5478 			RequestChroma = REQ_128BytesNonContiguous;
5479 		else
5480 			RequestChroma = REQ_128BytesContiguous;
5481 
5482 	} else {
5483 		if (req128_vert_wc_l == 0)
5484 			RequestLuma = REQ_256Bytes;
5485 		else if (segment_order_vert_contiguous_luma == 0)
5486 			RequestLuma = REQ_128BytesNonContiguous;
5487 		else
5488 			RequestLuma = REQ_128BytesContiguous;
5489 
5490 		if (req128_vert_wc_c == 0)
5491 			RequestChroma = REQ_256Bytes;
5492 		else if (segment_order_vert_contiguous_chroma == 0)
5493 			RequestChroma = REQ_128BytesNonContiguous;
5494 		else
5495 			RequestChroma = REQ_128BytesContiguous;
5496 	}
5497 
5498 	if (RequestLuma == REQ_256Bytes) {
5499 		*MaxUncompressedBlockLuma = 256;
5500 		*MaxCompressedBlockLuma = 256;
5501 		*IndependentBlockLuma = 0;
5502 	} else if (RequestLuma == REQ_128BytesContiguous) {
5503 		*MaxUncompressedBlockLuma = 256;
5504 		*MaxCompressedBlockLuma = 128;
5505 		*IndependentBlockLuma = 128;
5506 	} else {
5507 		*MaxUncompressedBlockLuma = 256;
5508 		*MaxCompressedBlockLuma = 64;
5509 		*IndependentBlockLuma = 64;
5510 	}
5511 
5512 	if (RequestChroma == REQ_256Bytes) {
5513 		*MaxUncompressedBlockChroma = 256;
5514 		*MaxCompressedBlockChroma = 256;
5515 		*IndependentBlockChroma = 0;
5516 	} else if (RequestChroma == REQ_128BytesContiguous) {
5517 		*MaxUncompressedBlockChroma = 256;
5518 		*MaxCompressedBlockChroma = 128;
5519 		*IndependentBlockChroma = 128;
5520 	} else {
5521 		*MaxUncompressedBlockChroma = 256;
5522 		*MaxCompressedBlockChroma = 64;
5523 		*IndependentBlockChroma = 64;
5524 	}
5525 
5526 	if (DCCEnabled != true || BytePerPixelC == 0) {
5527 		*MaxUncompressedBlockChroma = 0;
5528 		*MaxCompressedBlockChroma = 0;
5529 		*IndependentBlockChroma = 0;
5530 	}
5531 
5532 	if (DCCEnabled != true) {
5533 		*MaxUncompressedBlockLuma = 0;
5534 		*MaxCompressedBlockLuma = 0;
5535 		*IndependentBlockLuma = 0;
5536 	}
5537 
5538 #ifdef __DML_VBA_DEBUG__
5539 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5540 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5541 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5542 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5543 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5544 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5545 #endif
5546 
5547 } // CalculateDCCConfiguration
5548 
5549 void dml32_CalculateStutterEfficiency(
5550 		unsigned int      CompressedBufferSizeInkByte,
5551 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5552 		bool   UnboundedRequestEnabled,
5553 		unsigned int      MetaFIFOSizeInKEntries,
5554 		unsigned int      ZeroSizeBufferEntries,
5555 		unsigned int      PixelChunkSizeInKByte,
5556 		unsigned int   NumberOfActiveSurfaces,
5557 		unsigned int      ROBBufferSizeInKByte,
5558 		double    TotalDataReadBandwidth,
5559 		double    DCFCLK,
5560 		double    ReturnBW,
5561 		unsigned int      CompbufReservedSpace64B,
5562 		unsigned int      CompbufReservedSpaceZs,
5563 		double    SRExitTime,
5564 		double    SRExitZ8Time,
5565 		bool   SynchronizeTimingsFinal,
5566 		unsigned int   BlendingAndTiming[],
5567 		double    StutterEnterPlusExitWatermark,
5568 		double    Z8StutterEnterPlusExitWatermark,
5569 		bool   ProgressiveToInterlaceUnitInOPP,
5570 		bool   Interlace[],
5571 		double    MinTTUVBlank[],
5572 		unsigned int   DPPPerSurface[],
5573 		unsigned int      DETBufferSizeY[],
5574 		unsigned int   BytePerPixelY[],
5575 		double    BytePerPixelDETY[],
5576 		double      SwathWidthY[],
5577 		unsigned int   SwathHeightY[],
5578 		unsigned int   SwathHeightC[],
5579 		double    NetDCCRateLuma[],
5580 		double    NetDCCRateChroma[],
5581 		double    DCCFractionOfZeroSizeRequestsLuma[],
5582 		double    DCCFractionOfZeroSizeRequestsChroma[],
5583 		unsigned int      HTotal[],
5584 		unsigned int      VTotal[],
5585 		double    PixelClock[],
5586 		double    VRatio[],
5587 		enum dm_rotation_angle SourceRotation[],
5588 		unsigned int   BlockHeight256BytesY[],
5589 		unsigned int   BlockWidth256BytesY[],
5590 		unsigned int   BlockHeight256BytesC[],
5591 		unsigned int   BlockWidth256BytesC[],
5592 		unsigned int   DCCYMaxUncompressedBlock[],
5593 		unsigned int   DCCCMaxUncompressedBlock[],
5594 		unsigned int      VActive[],
5595 		bool   DCCEnable[],
5596 		bool   WritebackEnable[],
5597 		double    ReadBandwidthSurfaceLuma[],
5598 		double    ReadBandwidthSurfaceChroma[],
5599 		double    meta_row_bw[],
5600 		double    dpte_row_bw[],
5601 
5602 		/* Output */
5603 		double   *StutterEfficiencyNotIncludingVBlank,
5604 		double   *StutterEfficiency,
5605 		unsigned int     *NumberOfStutterBurstsPerFrame,
5606 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5607 		double   *Z8StutterEfficiency,
5608 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5609 		double   *StutterPeriod,
5610 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5611 {
5612 
5613 	bool FoundCriticalSurface = false;
5614 	unsigned int SwathSizeCriticalSurface = 0;
5615 	unsigned int LastChunkOfSwathSize;
5616 	unsigned int MissingPartOfLastSwathOfDETSize;
5617 	double LastZ8StutterPeriod = 0.0;
5618 	double LastStutterPeriod = 0.0;
5619 	unsigned int TotalNumberOfActiveOTG = 0;
5620 	double doublePixelClock;
5621 	unsigned int doubleHTotal;
5622 	unsigned int doubleVTotal;
5623 	bool SameTiming = true;
5624 	double DETBufferingTimeY;
5625 	double SwathWidthYCriticalSurface = 0.0;
5626 	double SwathHeightYCriticalSurface = 0.0;
5627 	double VActiveTimeCriticalSurface = 0.0;
5628 	double FrameTimeCriticalSurface = 0.0;
5629 	unsigned int BytePerPixelYCriticalSurface = 0;
5630 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5631 	unsigned int DETBufferSizeYCriticalSurface = 0;
5632 	double MinTTUVBlankCriticalSurface = 0.0;
5633 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5634 	bool doublePlaneCriticalSurface = 0;
5635 	bool doublePipeCriticalSurface = 0;
5636 	double TotalCompressedReadBandwidth;
5637 	double TotalRowReadBandwidth;
5638 	double AverageDCCCompressionRate;
5639 	double EffectiveCompressedBufferSize;
5640 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5641 	double StutterBurstTime;
5642 	unsigned int TotalActiveWriteback;
5643 	double LinesInDETY;
5644 	double LinesInDETYRoundedDownToSwath;
5645 	double MaximumEffectiveCompressionLuma;
5646 	double MaximumEffectiveCompressionChroma;
5647 	double TotalZeroSizeRequestReadBandwidth;
5648 	double TotalZeroSizeCompressedReadBandwidth;
5649 	double AverageDCCZeroSizeFraction;
5650 	double AverageZeroSizeCompressionRate;
5651 	unsigned int k;
5652 
5653 	TotalZeroSizeRequestReadBandwidth = 0;
5654 	TotalZeroSizeCompressedReadBandwidth = 0;
5655 	TotalRowReadBandwidth = 0;
5656 	TotalCompressedReadBandwidth = 0;
5657 
5658 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5659 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5660 			if (DCCEnable[k] == true) {
5661 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5662 						|| (!IsVertical(SourceRotation[k])
5663 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5664 						|| DCCYMaxUncompressedBlock[k] < 256) {
5665 					MaximumEffectiveCompressionLuma = 2;
5666 				} else {
5667 					MaximumEffectiveCompressionLuma = 4;
5668 				}
5669 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5670 						+ ReadBandwidthSurfaceLuma[k]
5671 								/ dml_min(NetDCCRateLuma[k],
5672 										MaximumEffectiveCompressionLuma);
5673 #ifdef __DML_VBA_DEBUG__
5674 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5675 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5676 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5677 						__func__, k, NetDCCRateLuma[k]);
5678 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5679 						__func__, k, MaximumEffectiveCompressionLuma);
5680 #endif
5681 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5682 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5683 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5684 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5685 								/ MaximumEffectiveCompressionLuma;
5686 
5687 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5688 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5689 							|| (!IsVertical(SourceRotation[k])
5690 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5691 							|| DCCCMaxUncompressedBlock[k] < 256) {
5692 						MaximumEffectiveCompressionChroma = 2;
5693 					} else {
5694 						MaximumEffectiveCompressionChroma = 4;
5695 					}
5696 					TotalCompressedReadBandwidth =
5697 							TotalCompressedReadBandwidth
5698 							+ ReadBandwidthSurfaceChroma[k]
5699 							/ dml_min(NetDCCRateChroma[k],
5700 							MaximumEffectiveCompressionChroma);
5701 #ifdef __DML_VBA_DEBUG__
5702 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5703 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5704 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5705 							__func__, k, NetDCCRateChroma[k]);
5706 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5707 							__func__, k, MaximumEffectiveCompressionChroma);
5708 #endif
5709 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5710 							+ ReadBandwidthSurfaceChroma[k]
5711 									* DCCFractionOfZeroSizeRequestsChroma[k];
5712 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5713 							+ ReadBandwidthSurfaceChroma[k]
5714 									* DCCFractionOfZeroSizeRequestsChroma[k]
5715 									/ MaximumEffectiveCompressionChroma;
5716 				}
5717 			} else {
5718 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5719 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5720 			}
5721 			TotalRowReadBandwidth = TotalRowReadBandwidth
5722 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5723 		}
5724 	}
5725 
5726 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5727 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5728 
5729 #ifdef __DML_VBA_DEBUG__
5730 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5731 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5732 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5733 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5734 			__func__, TotalZeroSizeCompressedReadBandwidth);
5735 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5736 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5737 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5738 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5739 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5740 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5741 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5742 #endif
5743 	if (AverageDCCZeroSizeFraction == 1) {
5744 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5745 				/ TotalZeroSizeCompressedReadBandwidth;
5746 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5747 				* AverageZeroSizeCompressionRate
5748 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5749 						* AverageZeroSizeCompressionRate;
5750 	} else if (AverageDCCZeroSizeFraction > 0) {
5751 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5752 				/ TotalZeroSizeCompressedReadBandwidth;
5753 		EffectiveCompressedBufferSize = dml_min(
5754 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5755 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5756 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5757 					+ 1 / AverageDCCCompressionRate))
5758 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5759 					* AverageDCCCompressionRate,
5760 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5761 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5762 
5763 #ifdef __DML_VBA_DEBUG__
5764 		dml_print("DML::%s: min 1 = %f\n", __func__,
5765 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5766 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5767 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5768 						AverageDCCCompressionRate));
5769 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5770 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5771 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5772 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5773 #endif
5774 	} else {
5775 		EffectiveCompressedBufferSize = dml_min(
5776 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5777 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5778 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5779 						* AverageDCCCompressionRate;
5780 
5781 #ifdef __DML_VBA_DEBUG__
5782 		dml_print("DML::%s: min 1 = %f\n", __func__,
5783 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5784 		dml_print("DML::%s: min 2 = %f\n", __func__,
5785 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5786 #endif
5787 	}
5788 
5789 #ifdef __DML_VBA_DEBUG__
5790 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5791 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5792 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5793 #endif
5794 
5795 	*StutterPeriod = 0;
5796 
5797 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5798 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5799 			LinesInDETY = ((double) DETBufferSizeY[k]
5800 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5801 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5802 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5803 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5804 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5805 					/ VRatio[k];
5806 #ifdef __DML_VBA_DEBUG__
5807 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5808 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5809 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5810 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5811 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5812 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5813 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5814 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5815 					__func__, k, LinesInDETYRoundedDownToSwath);
5816 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5817 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5818 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5819 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5820 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5821 #endif
5822 
5823 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5824 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5825 
5826 				FoundCriticalSurface = true;
5827 				*StutterPeriod = DETBufferingTimeY;
5828 				FrameTimeCriticalSurface = (
5829 						isInterlaceTiming ?
5830 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5831 						* (double) HTotal[k] / PixelClock[k];
5832 				VActiveTimeCriticalSurface = (
5833 						isInterlaceTiming ?
5834 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5835 						* (double) HTotal[k] / PixelClock[k];
5836 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5837 				SwathWidthYCriticalSurface = SwathWidthY[k];
5838 				SwathHeightYCriticalSurface = SwathHeightY[k];
5839 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5840 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5841 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5842 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5843 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5844 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5845 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5846 
5847 #ifdef __DML_VBA_DEBUG__
5848 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5849 						__func__, k, FoundCriticalSurface);
5850 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5851 						__func__, k, *StutterPeriod);
5852 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5853 						__func__, k, MinTTUVBlankCriticalSurface);
5854 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5855 						__func__, k, FrameTimeCriticalSurface);
5856 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5857 						__func__, k, VActiveTimeCriticalSurface);
5858 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5859 						__func__, k, BytePerPixelYCriticalSurface);
5860 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5861 						__func__, k, SwathWidthYCriticalSurface);
5862 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5863 						__func__, k, SwathHeightYCriticalSurface);
5864 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5865 						__func__, k, BlockWidth256BytesYCriticalSurface);
5866 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5867 						__func__, k, doublePlaneCriticalSurface);
5868 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5869 						__func__, k, doublePipeCriticalSurface);
5870 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5871 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5872 #endif
5873 			}
5874 		}
5875 	}
5876 
5877 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5878 			EffectiveCompressedBufferSize);
5879 #ifdef __DML_VBA_DEBUG__
5880 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5881 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5882 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5883 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5884 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5885 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5886 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5887 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5888 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5889 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5890 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5891 #endif
5892 
5893 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5894 			/ ReturnBW
5895 			+ (*StutterPeriod * TotalDataReadBandwidth
5896 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5897 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5898 #ifdef __DML_VBA_DEBUG__
5899 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5900 			AverageDCCCompressionRate / ReturnBW);
5901 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5902 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5903 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5904 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5905 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5906 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5907 #endif
5908 	StutterBurstTime = dml_max(StutterBurstTime,
5909 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5910 					* SwathWidthYCriticalSurface / ReturnBW);
5911 
5912 #ifdef __DML_VBA_DEBUG__
5913 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5914 			__func__,
5915 			LinesToFinishSwathTransferStutterCriticalSurface *
5916 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5917 #endif
5918 
5919 	TotalActiveWriteback = 0;
5920 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5921 		if (WritebackEnable[k])
5922 			TotalActiveWriteback = TotalActiveWriteback + 1;
5923 	}
5924 
5925 	if (TotalActiveWriteback == 0) {
5926 #ifdef __DML_VBA_DEBUG__
5927 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5928 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5929 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5930 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5931 #endif
5932 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5933 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5934 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5935 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5936 		*NumberOfStutterBurstsPerFrame = (
5937 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5938 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5939 		*Z8NumberOfStutterBurstsPerFrame = (
5940 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5941 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5942 	} else {
5943 		*StutterEfficiencyNotIncludingVBlank = 0.;
5944 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5945 		*NumberOfStutterBurstsPerFrame = 0;
5946 		*Z8NumberOfStutterBurstsPerFrame = 0;
5947 	}
5948 #ifdef __DML_VBA_DEBUG__
5949 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5950 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5951 			__func__, *StutterEfficiencyNotIncludingVBlank);
5952 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5953 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5954 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5955 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5956 #endif
5957 
5958 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5959 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5960 			if (BlendingAndTiming[k] == k) {
5961 				if (TotalNumberOfActiveOTG == 0) {
5962 					doublePixelClock = PixelClock[k];
5963 					doubleHTotal = HTotal[k];
5964 					doubleVTotal = VTotal[k];
5965 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5966 						|| doubleVTotal != VTotal[k]) {
5967 					SameTiming = false;
5968 				}
5969 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5970 			}
5971 		}
5972 	}
5973 
5974 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
5975 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5976 
5977 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5978 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5979 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5980 						+ StutterBurstTime * VActiveTimeCriticalSurface
5981 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5982 		} else {
5983 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5984 		}
5985 	} else {
5986 		*StutterEfficiency = 0;
5987 	}
5988 
5989 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5990 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
5991 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5992 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
5993 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
5994 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
5995 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5996 		} else {
5997 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
5998 		}
5999 	} else {
6000 		*Z8StutterEfficiency = 0.;
6001 	}
6002 
6003 #ifdef __DML_VBA_DEBUG__
6004 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6005 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6006 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6007 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6008 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6009 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6010 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6011 			__func__, *StutterEfficiencyNotIncludingVBlank);
6012 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6013 #endif
6014 
6015 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6016 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6017 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6018 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6019 			- DETBufferSizeYCriticalSurface;
6020 
6021 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6022 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6023 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6024 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6025 
6026 #ifdef __DML_VBA_DEBUG__
6027 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6028 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6029 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6030 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6031 #endif
6032 } // CalculateStutterEfficiency
6033 
6034 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6035 		unsigned int    ConfigReturnBufferSizeInKByte,
6036 		unsigned int    ROBBufferSizeInKByte,
6037 		unsigned int MaxNumDPP,
6038 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6039 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6040 
6041 		/* Output */
6042 		unsigned int *MaxTotalDETInKByte,
6043 		unsigned int *nomDETInKByte,
6044 		unsigned int *MinCompressedBufferSizeInKByte)
6045 {
6046 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6047 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6048 
6049 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6050 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6051 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6052 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6053 
6054 #ifdef __DML_VBA_DEBUG__
6055 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6056 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6057 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6058 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6059 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6060 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6061 #endif
6062 
6063 	if (det_buff_size_override_en) {
6064 		*nomDETInKByte = det_buff_size_override_val;
6065 #ifdef __DML_VBA_DEBUG__
6066 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6067 #endif
6068 	}
6069 } // CalculateMaxDETAndMinCompressedBufferSize
6070 
6071 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6072 		double ReturnBW,
6073 		bool NotUrgentLatencyHiding[],
6074 		double ReadBandwidthLuma[],
6075 		double ReadBandwidthChroma[],
6076 		double cursor_bw[],
6077 		double meta_row_bandwidth[],
6078 		double dpte_row_bandwidth[],
6079 		unsigned int NumberOfDPP[],
6080 		double UrgentBurstFactorLuma[],
6081 		double UrgentBurstFactorChroma[],
6082 		double UrgentBurstFactorCursor[])
6083 {
6084 	unsigned int k;
6085 	bool NotEnoughUrgentLatencyHiding = false;
6086 	bool CalculateVActiveBandwithSupport_val = false;
6087 	double VActiveBandwith = 0;
6088 
6089 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6090 		if (NotUrgentLatencyHiding[k]) {
6091 			NotEnoughUrgentLatencyHiding = true;
6092 		}
6093 	}
6094 
6095 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6096 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6097 	}
6098 
6099 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6100 
6101 #ifdef __DML_VBA_DEBUG__
6102 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6103 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6104 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6105 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6106 #endif
6107 	return CalculateVActiveBandwithSupport_val;
6108 }
6109 
6110 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6111 		double ReturnBW,
6112 		bool NotUrgentLatencyHiding[],
6113 		double ReadBandwidthLuma[],
6114 		double ReadBandwidthChroma[],
6115 		double PrefetchBandwidthLuma[],
6116 		double PrefetchBandwidthChroma[],
6117 		double cursor_bw[],
6118 		double meta_row_bandwidth[],
6119 		double dpte_row_bandwidth[],
6120 		double cursor_bw_pre[],
6121 		double prefetch_vmrow_bw[],
6122 		unsigned int NumberOfDPP[],
6123 		double UrgentBurstFactorLuma[],
6124 		double UrgentBurstFactorChroma[],
6125 		double UrgentBurstFactorCursor[],
6126 		double UrgentBurstFactorLumaPre[],
6127 		double UrgentBurstFactorChromaPre[],
6128 		double UrgentBurstFactorCursorPre[],
6129 
6130 		/* output */
6131 		double  *PrefetchBandwidth,
6132 		double  *FractionOfUrgentBandwidth,
6133 		bool *PrefetchBandwidthSupport)
6134 {
6135 	unsigned int k;
6136 	bool NotEnoughUrgentLatencyHiding = false;
6137 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6138 		if (NotUrgentLatencyHiding[k]) {
6139 			NotEnoughUrgentLatencyHiding = true;
6140 		}
6141 	}
6142 
6143 	*PrefetchBandwidth = 0;
6144 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6145 		*PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6146 				ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6147 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6148 	}
6149 
6150 	*PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6151 	*FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6152 }
6153 
6154 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6155 		double ReturnBW,
6156 		double ReadBandwidthLuma[],
6157 		double ReadBandwidthChroma[],
6158 		double PrefetchBandwidthLuma[],
6159 		double PrefetchBandwidthChroma[],
6160 		double cursor_bw[],
6161 		double cursor_bw_pre[],
6162 		unsigned int NumberOfDPP[],
6163 		double UrgentBurstFactorLuma[],
6164 		double UrgentBurstFactorChroma[],
6165 		double UrgentBurstFactorCursor[],
6166 		double UrgentBurstFactorLumaPre[],
6167 		double UrgentBurstFactorChromaPre[],
6168 		double UrgentBurstFactorCursorPre[])
6169 {
6170 	unsigned int k;
6171 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6172 
6173 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6174 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6175 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6176 	}
6177 
6178 	return CalculateBandwidthAvailableForImmediateFlip_val;
6179 }
6180 
6181 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6182 		double ReturnBW,
6183 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6184 		double final_flip_bw[],
6185 		double ReadBandwidthLuma[],
6186 		double ReadBandwidthChroma[],
6187 		double PrefetchBandwidthLuma[],
6188 		double PrefetchBandwidthChroma[],
6189 		double cursor_bw[],
6190 		double meta_row_bandwidth[],
6191 		double dpte_row_bandwidth[],
6192 		double cursor_bw_pre[],
6193 		double prefetch_vmrow_bw[],
6194 		unsigned int NumberOfDPP[],
6195 		double UrgentBurstFactorLuma[],
6196 		double UrgentBurstFactorChroma[],
6197 		double UrgentBurstFactorCursor[],
6198 		double UrgentBurstFactorLumaPre[],
6199 		double UrgentBurstFactorChromaPre[],
6200 		double UrgentBurstFactorCursorPre[],
6201 
6202 		/* output */
6203 		double  *TotalBandwidth,
6204 		double  *FractionOfUrgentBandwidth,
6205 		bool *ImmediateFlipBandwidthSupport)
6206 {
6207 	unsigned int k;
6208 	*TotalBandwidth = 0;
6209 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6210 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6211 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6212 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6213 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6214 		} else {
6215 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6216 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6217 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6218 		}
6219 	}
6220 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6221 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6222 }
6223