1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #include "dc.h"
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_314.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
45
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
48
49 // Move these to ip parameters/constant
50
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
53
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
56
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
59
60 typedef struct {
61 double DPPCLK;
62 double DISPCLK;
63 double PixelClock;
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
66 bool ScalerEnabled;
67 double VRatio;
68 double VRatioChroma;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
76 unsigned int VBlank;
77 unsigned int HTotal;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
81 int BytePerPixelY;
82 int BytePerPixelC;
83 bool ProgressiveToInterlaceUnitInOPP;
84 } Pipe;
85
86 #define BPP_INVALID 0
87 #define BPP_BLENDED_PIPE 0xffffffff
88
89 static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102 static unsigned int dscceComputeDelay(
103 unsigned int bpc,
104 double BPP,
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110 static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
113 Pipe *myPipe,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
126 bool GPUVMEnable,
127 bool HostVMEnable,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
136 double TCalc,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
152 double TWait,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
164 double *Tno_bw,
165 double *prefetch_vmrow_bw,
166 double *Tdmdl_vm,
167 double *Tdmdl,
168 double *TSetup,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174 static void CalculateDCCConfiguration(
175 bool DCCEnabled,
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197 static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
199 double VRatio,
200 double vtaps,
201 bool Interlace,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207 static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
209 bool DCCEnable,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
218 bool GPUVMEnable,
219 bool HostVMEnable,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
224 unsigned int Pitch,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
236 int *vm_group_bytes,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244 static void CalculateRowBandwidth(
245 bool GPUVMEnable,
246 enum source_format_class SourcePixelFormat,
247 double VRatio,
248 double VRatioChroma,
249 bool DCCEnable,
250 double LineTime,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
259 double *meta_row_bw,
260 double *dpte_row_bw);
261
262 static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
264 unsigned int k,
265 double HostVMInefficiencyFactor,
266 double UrgentExtraLatency,
267 double UrgentLatency,
268 double PDEAndMetaPTEBytesPerFrame,
269 double MetaRowBytes,
270 double DPTEBytesPerRow);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 int WritebackDestinationWidth,
277 int WritebackDestinationHeight,
278 int WritebackSourceHeight,
279 unsigned int HTotal);
280
281 static void CalculateVupdateAndDynamicMetadataParameters(
282 int MaxInterDCNTileRepeaters,
283 double DPPCLK,
284 double DISPCLK,
285 double DCFClkDeepSleep,
286 double PixelClock,
287 int HTotal,
288 int VBlank,
289 int DynamicMetadataTransmittedBytes,
290 int DynamicMetadataLinesBeforeActiveRequired,
291 int InterlaceEnable,
292 bool ProgressiveToInterlaceUnitInOPP,
293 double *TSetup,
294 double *Tdmbf,
295 double *Tdmec,
296 double *Tdmsks,
297 int *VUpdateOffsetPix,
298 double *VUpdateWidthPix,
299 double *VReadyOffsetPix);
300
301 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
302 struct display_mode_lib *mode_lib,
303 unsigned int PrefetchMode,
304 double DCFCLK,
305 double ReturnBW,
306 double UrgentLatency,
307 double ExtraLatency,
308 double SOCCLK,
309 double DCFCLKDeepSleep,
310 unsigned int DETBufferSizeY[],
311 unsigned int DETBufferSizeC[],
312 unsigned int SwathHeightY[],
313 unsigned int SwathHeightC[],
314 double SwathWidthY[],
315 double SwathWidthC[],
316 unsigned int DPPPerPlane[],
317 double BytePerPixelDETY[],
318 double BytePerPixelDETC[],
319 bool UnboundedRequestEnabled,
320 unsigned int CompressedBufferSizeInkByte,
321 enum clock_change_support *DRAMClockChangeSupport,
322 double *StutterExitWatermark,
323 double *StutterEnterPlusExitWatermark,
324 double *Z8StutterExitWatermark,
325 double *Z8StutterEnterPlusExitWatermark);
326
327 static void CalculateDCFCLKDeepSleep(
328 struct display_mode_lib *mode_lib,
329 unsigned int NumberOfActivePlanes,
330 int BytePerPixelY[],
331 int BytePerPixelC[],
332 double VRatio[],
333 double VRatioChroma[],
334 double SwathWidthY[],
335 double SwathWidthC[],
336 unsigned int DPPPerPlane[],
337 double HRatio[],
338 double HRatioChroma[],
339 double PixelClock[],
340 double PSCL_THROUGHPUT[],
341 double PSCL_THROUGHPUT_CHROMA[],
342 double DPPCLK[],
343 double ReadBandwidthLuma[],
344 double ReadBandwidthChroma[],
345 int ReturnBusWidth,
346 double *DCFCLKDeepSleep);
347
348 static void CalculateUrgentBurstFactor(
349 int swath_width_luma_ub,
350 int swath_width_chroma_ub,
351 unsigned int SwathHeightY,
352 unsigned int SwathHeightC,
353 double LineTime,
354 double UrgentLatency,
355 double CursorBufferSize,
356 unsigned int CursorWidth,
357 unsigned int CursorBPP,
358 double VRatio,
359 double VRatioC,
360 double BytePerPixelInDETY,
361 double BytePerPixelInDETC,
362 double DETBufferSizeY,
363 double DETBufferSizeC,
364 double *UrgentBurstFactorCursor,
365 double *UrgentBurstFactorLuma,
366 double *UrgentBurstFactorChroma,
367 bool *NotEnoughUrgentLatencyHiding);
368
369 static void UseMinimumDCFCLK(
370 struct display_mode_lib *mode_lib,
371 int MaxPrefetchMode,
372 int ReorderingBytes);
373
374 static void CalculatePixelDeliveryTimes(
375 unsigned int NumberOfActivePlanes,
376 double VRatio[],
377 double VRatioChroma[],
378 double VRatioPrefetchY[],
379 double VRatioPrefetchC[],
380 unsigned int swath_width_luma_ub[],
381 unsigned int swath_width_chroma_ub[],
382 unsigned int DPPPerPlane[],
383 double HRatio[],
384 double HRatioChroma[],
385 double PixelClock[],
386 double PSCL_THROUGHPUT[],
387 double PSCL_THROUGHPUT_CHROMA[],
388 double DPPCLK[],
389 int BytePerPixelC[],
390 enum scan_direction_class SourceScan[],
391 unsigned int NumberOfCursors[],
392 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
393 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
394 unsigned int BlockWidth256BytesY[],
395 unsigned int BlockHeight256BytesY[],
396 unsigned int BlockWidth256BytesC[],
397 unsigned int BlockHeight256BytesC[],
398 double DisplayPipeLineDeliveryTimeLuma[],
399 double DisplayPipeLineDeliveryTimeChroma[],
400 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
401 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
402 double DisplayPipeRequestDeliveryTimeLuma[],
403 double DisplayPipeRequestDeliveryTimeChroma[],
404 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
406 double CursorRequestDeliveryTime[],
407 double CursorRequestDeliveryTimePrefetch[]);
408
409 static void CalculateMetaAndPTETimes(
410 int NumberOfActivePlanes,
411 bool GPUVMEnable,
412 int MetaChunkSize,
413 int MinMetaChunkSizeBytes,
414 int HTotal[],
415 double VRatio[],
416 double VRatioChroma[],
417 double DestinationLinesToRequestRowInVBlank[],
418 double DestinationLinesToRequestRowInImmediateFlip[],
419 bool DCCEnable[],
420 double PixelClock[],
421 int BytePerPixelY[],
422 int BytePerPixelC[],
423 enum scan_direction_class SourceScan[],
424 int dpte_row_height[],
425 int dpte_row_height_chroma[],
426 int meta_row_width[],
427 int meta_row_width_chroma[],
428 int meta_row_height[],
429 int meta_row_height_chroma[],
430 int meta_req_width[],
431 int meta_req_width_chroma[],
432 int meta_req_height[],
433 int meta_req_height_chroma[],
434 int dpte_group_bytes[],
435 int PTERequestSizeY[],
436 int PTERequestSizeC[],
437 int PixelPTEReqWidthY[],
438 int PixelPTEReqHeightY[],
439 int PixelPTEReqWidthC[],
440 int PixelPTEReqHeightC[],
441 int dpte_row_width_luma_ub[],
442 int dpte_row_width_chroma_ub[],
443 double DST_Y_PER_PTE_ROW_NOM_L[],
444 double DST_Y_PER_PTE_ROW_NOM_C[],
445 double DST_Y_PER_META_ROW_NOM_L[],
446 double DST_Y_PER_META_ROW_NOM_C[],
447 double TimePerMetaChunkNominal[],
448 double TimePerChromaMetaChunkNominal[],
449 double TimePerMetaChunkVBlank[],
450 double TimePerChromaMetaChunkVBlank[],
451 double TimePerMetaChunkFlip[],
452 double TimePerChromaMetaChunkFlip[],
453 double time_per_pte_group_nom_luma[],
454 double time_per_pte_group_vblank_luma[],
455 double time_per_pte_group_flip_luma[],
456 double time_per_pte_group_nom_chroma[],
457 double time_per_pte_group_vblank_chroma[],
458 double time_per_pte_group_flip_chroma[]);
459
460 static void CalculateVMGroupAndRequestTimes(
461 unsigned int NumberOfActivePlanes,
462 bool GPUVMEnable,
463 unsigned int GPUVMMaxPageTableLevels,
464 unsigned int HTotal[],
465 int BytePerPixelC[],
466 double DestinationLinesToRequestVMInVBlank[],
467 double DestinationLinesToRequestVMInImmediateFlip[],
468 bool DCCEnable[],
469 double PixelClock[],
470 int dpte_row_width_luma_ub[],
471 int dpte_row_width_chroma_ub[],
472 int vm_group_bytes[],
473 unsigned int dpde0_bytes_per_frame_ub_l[],
474 unsigned int dpde0_bytes_per_frame_ub_c[],
475 int meta_pte_bytes_per_frame_ub_l[],
476 int meta_pte_bytes_per_frame_ub_c[],
477 double TimePerVMGroupVBlank[],
478 double TimePerVMGroupFlip[],
479 double TimePerVMRequestVBlank[],
480 double TimePerVMRequestFlip[]);
481
482 static void CalculateStutterEfficiency(
483 struct display_mode_lib *mode_lib,
484 int CompressedBufferSizeInkByte,
485 bool UnboundedRequestEnabled,
486 int ConfigReturnBufferSizeInKByte,
487 int MetaFIFOSizeInKEntries,
488 int ZeroSizeBufferEntries,
489 int NumberOfActivePlanes,
490 int ROBBufferSizeInKByte,
491 double TotalDataReadBandwidth,
492 double DCFCLK,
493 double ReturnBW,
494 double COMPBUF_RESERVED_SPACE_64B,
495 double COMPBUF_RESERVED_SPACE_ZS,
496 double SRExitTime,
497 double SRExitZ8Time,
498 bool SynchronizedVBlank,
499 double Z8StutterEnterPlusExitWatermark,
500 double StutterEnterPlusExitWatermark,
501 bool ProgressiveToInterlaceUnitInOPP,
502 bool Interlace[],
503 double MinTTUVBlank[],
504 int DPPPerPlane[],
505 unsigned int DETBufferSizeY[],
506 int BytePerPixelY[],
507 double BytePerPixelDETY[],
508 double SwathWidthY[],
509 int SwathHeightY[],
510 int SwathHeightC[],
511 double NetDCCRateLuma[],
512 double NetDCCRateChroma[],
513 double DCCFractionOfZeroSizeRequestsLuma[],
514 double DCCFractionOfZeroSizeRequestsChroma[],
515 int HTotal[],
516 int VTotal[],
517 double PixelClock[],
518 double VRatio[],
519 enum scan_direction_class SourceScan[],
520 int BlockHeight256BytesY[],
521 int BlockWidth256BytesY[],
522 int BlockHeight256BytesC[],
523 int BlockWidth256BytesC[],
524 int DCCYMaxUncompressedBlock[],
525 int DCCCMaxUncompressedBlock[],
526 int VActive[],
527 bool DCCEnable[],
528 bool WritebackEnable[],
529 double ReadBandwidthPlaneLuma[],
530 double ReadBandwidthPlaneChroma[],
531 double meta_row_bw[],
532 double dpte_row_bw[],
533 double *StutterEfficiencyNotIncludingVBlank,
534 double *StutterEfficiency,
535 int *NumberOfStutterBurstsPerFrame,
536 double *Z8StutterEfficiencyNotIncludingVBlank,
537 double *Z8StutterEfficiency,
538 int *Z8NumberOfStutterBurstsPerFrame,
539 double *StutterPeriod);
540
541 static void CalculateSwathAndDETConfiguration(
542 bool ForceSingleDPP,
543 int NumberOfActivePlanes,
544 unsigned int DETBufferSizeInKByte,
545 double MaximumSwathWidthLuma[],
546 double MaximumSwathWidthChroma[],
547 enum scan_direction_class SourceScan[],
548 enum source_format_class SourcePixelFormat[],
549 enum dm_swizzle_mode SurfaceTiling[],
550 int ViewportWidth[],
551 int ViewportHeight[],
552 int SurfaceWidthY[],
553 int SurfaceWidthC[],
554 int SurfaceHeightY[],
555 int SurfaceHeightC[],
556 int Read256BytesBlockHeightY[],
557 int Read256BytesBlockHeightC[],
558 int Read256BytesBlockWidthY[],
559 int Read256BytesBlockWidthC[],
560 enum odm_combine_mode ODMCombineEnabled[],
561 int BlendingAndTiming[],
562 int BytePerPixY[],
563 int BytePerPixC[],
564 double BytePerPixDETY[],
565 double BytePerPixDETC[],
566 int HActive[],
567 double HRatio[],
568 double HRatioChroma[],
569 int DPPPerPlane[],
570 int swath_width_luma_ub[],
571 int swath_width_chroma_ub[],
572 double SwathWidth[],
573 double SwathWidthChroma[],
574 int SwathHeightY[],
575 int SwathHeightC[],
576 unsigned int DETBufferSizeY[],
577 unsigned int DETBufferSizeC[],
578 bool ViewportSizeSupportPerPlane[],
579 bool *ViewportSizeSupport);
580 static void CalculateSwathWidth(
581 bool ForceSingleDPP,
582 int NumberOfActivePlanes,
583 enum source_format_class SourcePixelFormat[],
584 enum scan_direction_class SourceScan[],
585 int ViewportWidth[],
586 int ViewportHeight[],
587 int SurfaceWidthY[],
588 int SurfaceWidthC[],
589 int SurfaceHeightY[],
590 int SurfaceHeightC[],
591 enum odm_combine_mode ODMCombineEnabled[],
592 int BytePerPixY[],
593 int BytePerPixC[],
594 int Read256BytesBlockHeightY[],
595 int Read256BytesBlockHeightC[],
596 int Read256BytesBlockWidthY[],
597 int Read256BytesBlockWidthC[],
598 int BlendingAndTiming[],
599 int HActive[],
600 double HRatio[],
601 int DPPPerPlane[],
602 double SwathWidthSingleDPPY[],
603 double SwathWidthSingleDPPC[],
604 double SwathWidthY[],
605 double SwathWidthC[],
606 int MaximumSwathHeightY[],
607 int MaximumSwathHeightC[],
608 int swath_width_luma_ub[],
609 int swath_width_chroma_ub[]);
610
611 static double CalculateExtraLatency(
612 int RoundTripPingLatencyCycles,
613 int ReorderingBytes,
614 double DCFCLK,
615 int TotalNumberOfActiveDPP,
616 int PixelChunkSizeInKByte,
617 int TotalNumberOfDCCActiveDPP,
618 int MetaChunkSize,
619 double ReturnBW,
620 bool GPUVMEnable,
621 bool HostVMEnable,
622 int NumberOfActivePlanes,
623 int NumberOfDPP[],
624 int dpte_group_bytes[],
625 double HostVMInefficiencyFactor,
626 double HostVMMinPageSize,
627 int HostVMMaxNonCachedPageTableLevels);
628
629 static double CalculateExtraLatencyBytes(
630 int ReorderingBytes,
631 int TotalNumberOfActiveDPP,
632 int PixelChunkSizeInKByte,
633 int TotalNumberOfDCCActiveDPP,
634 int MetaChunkSize,
635 bool GPUVMEnable,
636 bool HostVMEnable,
637 int NumberOfActivePlanes,
638 int NumberOfDPP[],
639 int dpte_group_bytes[],
640 double HostVMInefficiencyFactor,
641 double HostVMMinPageSize,
642 int HostVMMaxNonCachedPageTableLevels);
643
644 static double CalculateUrgentLatency(
645 double UrgentLatencyPixelDataOnly,
646 double UrgentLatencyPixelMixedWithVMData,
647 double UrgentLatencyVMDataOnly,
648 bool DoUrgentLatencyAdjustment,
649 double UrgentLatencyAdjustmentFabricClockComponent,
650 double UrgentLatencyAdjustmentFabricClockReference,
651 double FabricClockSingle);
652
653 static void CalculateUnboundedRequestAndCompressedBufferSize(
654 unsigned int DETBufferSizeInKByte,
655 int ConfigReturnBufferSizeInKByte,
656 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
657 int TotalActiveDPP,
658 bool NoChromaPlanes,
659 int MaxNumDPP,
660 int CompressedBufferSegmentSizeInkByteFinal,
661 enum output_encoder_class *Output,
662 bool *UnboundedRequestEnabled,
663 int *CompressedBufferSizeInkByte);
664
665 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
666 static unsigned int CalculateMaxVStartup(
667 unsigned int VTotal,
668 unsigned int VActive,
669 unsigned int VBlankNom,
670 unsigned int HTotal,
671 double PixelClock,
672 bool ProgressiveTointerlaceUnitinOPP,
673 bool Interlace,
674 unsigned int VBlankNomDefaultUS,
675 double WritebackDelayTime);
676
dml314_recalculate(struct display_mode_lib * mode_lib)677 void dml314_recalculate(struct display_mode_lib *mode_lib)
678 {
679 ModeSupportAndSystemConfiguration(mode_lib);
680 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
681 DisplayPipeConfiguration(mode_lib);
682 #ifdef __DML_VBA_DEBUG__
683 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
684 #endif
685 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
686 }
687
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)688 static unsigned int dscceComputeDelay(
689 unsigned int bpc,
690 double BPP,
691 unsigned int sliceWidth,
692 unsigned int numSlices,
693 enum output_format_class pixelFormat,
694 enum output_encoder_class Output)
695 {
696 // valid bpc = source bits per component in the set of {8, 10, 12}
697 // valid bpp = increments of 1/16 of a bit
698 // min = 6/7/8 in N420/N422/444, respectively
699 // max = such that compression is 1:1
700 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
701 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
702 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
703
704 // fixed value
705 unsigned int rcModelSize = 8192;
706
707 // N422/N420 operate at 2 pixels per clock
708 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
709
710 if (pixelFormat == dm_420)
711 pixelsPerClock = 2;
712 else if (pixelFormat == dm_444)
713 pixelsPerClock = 1;
714 else if (pixelFormat == dm_n422)
715 pixelsPerClock = 2;
716 // #all other modes operate at 1 pixel per clock
717 else
718 pixelsPerClock = 1;
719
720 //initial transmit delay as per PPS
721 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
722
723 //compute ssm delay
724 if (bpc == 8)
725 D = 81;
726 else if (bpc == 10)
727 D = 89;
728 else
729 D = 113;
730
731 //divide by pixel per cycle to compute slice width as seen by DSC
732 w = sliceWidth / pixelsPerClock;
733
734 //422 mode has an additional cycle of delay
735 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
736 s = 0;
737 else
738 s = 1;
739
740 //main calculation for the dscce
741 ix = initalXmitDelay + 45;
742 wx = (w + 2) / 3;
743 P = 3 * wx - w;
744 l0 = ix / w;
745 a = ix + P * l0;
746 ax = (a + 2) / 3 + D + 6 + 1;
747 L = (ax + wx - 1) / wx;
748 if ((ix % w) == 0 && P != 0)
749 lstall = 1;
750 else
751 lstall = 0;
752 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
753
754 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
755 pixels = Delay * 3 * pixelsPerClock;
756 return pixels;
757 }
758
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)759 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
760 {
761 unsigned int Delay = 0;
762
763 if (pixelFormat == dm_420) {
764 // sfr
765 Delay = Delay + 2;
766 // dsccif
767 Delay = Delay + 0;
768 // dscc - input deserializer
769 Delay = Delay + 3;
770 // dscc gets pixels every other cycle
771 Delay = Delay + 2;
772 // dscc - input cdc fifo
773 Delay = Delay + 12;
774 // dscc gets pixels every other cycle
775 Delay = Delay + 13;
776 // dscc - cdc uncertainty
777 Delay = Delay + 2;
778 // dscc - output cdc fifo
779 Delay = Delay + 7;
780 // dscc gets pixels every other cycle
781 Delay = Delay + 3;
782 // dscc - cdc uncertainty
783 Delay = Delay + 2;
784 // dscc - output serializer
785 Delay = Delay + 1;
786 // sft
787 Delay = Delay + 1;
788 } else if (pixelFormat == dm_n422) {
789 // sfr
790 Delay = Delay + 2;
791 // dsccif
792 Delay = Delay + 1;
793 // dscc - input deserializer
794 Delay = Delay + 5;
795 // dscc - input cdc fifo
796 Delay = Delay + 25;
797 // dscc - cdc uncertainty
798 Delay = Delay + 2;
799 // dscc - output cdc fifo
800 Delay = Delay + 10;
801 // dscc - cdc uncertainty
802 Delay = Delay + 2;
803 // dscc - output serializer
804 Delay = Delay + 1;
805 // sft
806 Delay = Delay + 1;
807 } else {
808 // sfr
809 Delay = Delay + 2;
810 // dsccif
811 Delay = Delay + 0;
812 // dscc - input deserializer
813 Delay = Delay + 3;
814 // dscc - input cdc fifo
815 Delay = Delay + 12;
816 // dscc - cdc uncertainty
817 Delay = Delay + 2;
818 // dscc - output cdc fifo
819 Delay = Delay + 7;
820 // dscc - output serializer
821 Delay = Delay + 1;
822 // dscc - cdc uncertainty
823 Delay = Delay + 2;
824 // sft
825 Delay = Delay + 1;
826 }
827
828 return Delay;
829 }
830
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)831 static bool CalculatePrefetchSchedule(
832 struct display_mode_lib *mode_lib,
833 double HostVMInefficiencyFactor,
834 Pipe *myPipe,
835 unsigned int DSCDelay,
836 double DPPCLKDelaySubtotalPlusCNVCFormater,
837 double DPPCLKDelaySCL,
838 double DPPCLKDelaySCLLBOnly,
839 double DPPCLKDelayCNVCCursor,
840 double DISPCLKDelaySubtotal,
841 unsigned int DPP_RECOUT_WIDTH,
842 enum output_format_class OutputFormat,
843 unsigned int MaxInterDCNTileRepeaters,
844 unsigned int VStartup,
845 unsigned int MaxVStartup,
846 unsigned int GPUVMPageTableLevels,
847 bool GPUVMEnable,
848 bool HostVMEnable,
849 unsigned int HostVMMaxNonCachedPageTableLevels,
850 double HostVMMinPageSize,
851 bool DynamicMetadataEnable,
852 bool DynamicMetadataVMEnabled,
853 int DynamicMetadataLinesBeforeActiveRequired,
854 unsigned int DynamicMetadataTransmittedBytes,
855 double UrgentLatency,
856 double UrgentExtraLatency,
857 double TCalc,
858 unsigned int PDEAndMetaPTEBytesFrame,
859 unsigned int MetaRowByte,
860 unsigned int PixelPTEBytesPerRow,
861 double PrefetchSourceLinesY,
862 unsigned int SwathWidthY,
863 double VInitPreFillY,
864 unsigned int MaxNumSwathY,
865 double PrefetchSourceLinesC,
866 unsigned int SwathWidthC,
867 double VInitPreFillC,
868 unsigned int MaxNumSwathC,
869 int swath_width_luma_ub,
870 int swath_width_chroma_ub,
871 unsigned int SwathHeightY,
872 unsigned int SwathHeightC,
873 double TWait,
874 double *DSTXAfterScaler,
875 double *DSTYAfterScaler,
876 double *DestinationLinesForPrefetch,
877 double *PrefetchBandwidth,
878 double *DestinationLinesToRequestVMInVBlank,
879 double *DestinationLinesToRequestRowInVBlank,
880 double *VRatioPrefetchY,
881 double *VRatioPrefetchC,
882 double *RequiredPrefetchPixDataBWLuma,
883 double *RequiredPrefetchPixDataBWChroma,
884 bool *NotEnoughTimeForDynamicMetadata,
885 double *Tno_bw,
886 double *prefetch_vmrow_bw,
887 double *Tdmdl_vm,
888 double *Tdmdl,
889 double *TSetup,
890 int *VUpdateOffsetPix,
891 double *VUpdateWidthPix,
892 double *VReadyOffsetPix)
893 {
894 bool MyError = false;
895 unsigned int DPPCycles, DISPCLKCycles;
896 double DSTTotalPixelsAfterScaler;
897 double LineTime;
898 double dst_y_prefetch_equ;
899 #ifdef __DML_VBA_DEBUG__
900 double Tsw_oto;
901 #endif
902 double prefetch_bw_oto;
903 double prefetch_bw_pr;
904 double Tvm_oto;
905 double Tr0_oto;
906 double Tvm_oto_lines;
907 double Tr0_oto_lines;
908 double dst_y_prefetch_oto;
909 double TimeForFetchingMetaPTE = 0;
910 double TimeForFetchingRowInVBlank = 0;
911 double LinesToRequestPrefetchPixelData = 0;
912 unsigned int HostVMDynamicLevelsTrips;
913 double trip_to_mem;
914 double Tvm_trips;
915 double Tr0_trips;
916 double Tvm_trips_rounded;
917 double Tr0_trips_rounded;
918 double Lsw_oto;
919 double Tpre_rounded;
920 double prefetch_bw_equ;
921 double Tvm_equ;
922 double Tr0_equ;
923 double Tdmbf;
924 double Tdmec;
925 double Tdmsks;
926 double prefetch_sw_bytes;
927 double bytes_pp;
928 double dep_bytes;
929 int max_vratio_pre = 4;
930 double min_Lsw;
931 double Tsw_est1 = 0;
932 double Tsw_est3 = 0;
933 double max_Tsw = 0;
934
935 if (GPUVMEnable == true && HostVMEnable == true) {
936 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
937 } else {
938 HostVMDynamicLevelsTrips = 0;
939 }
940 #ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
942 #endif
943 CalculateVupdateAndDynamicMetadataParameters(
944 MaxInterDCNTileRepeaters,
945 myPipe->DPPCLK,
946 myPipe->DISPCLK,
947 myPipe->DCFCLKDeepSleep,
948 myPipe->PixelClock,
949 myPipe->HTotal,
950 myPipe->VBlank,
951 DynamicMetadataTransmittedBytes,
952 DynamicMetadataLinesBeforeActiveRequired,
953 myPipe->InterlaceEnable,
954 myPipe->ProgressiveToInterlaceUnitInOPP,
955 TSetup,
956 &Tdmbf,
957 &Tdmec,
958 &Tdmsks,
959 VUpdateOffsetPix,
960 VUpdateWidthPix,
961 VReadyOffsetPix);
962
963 LineTime = myPipe->HTotal / myPipe->PixelClock;
964 trip_to_mem = UrgentLatency;
965 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
966
967 #ifdef __DML_VBA_ALLOW_DELTA__
968 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
969 #else
970 if (DynamicMetadataVMEnabled == true) {
971 #endif
972 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
973 } else {
974 *Tdmdl = TWait + UrgentExtraLatency;
975 }
976
977 #ifdef __DML_VBA_ALLOW_DELTA__
978 if (DynamicMetadataEnable == false) {
979 *Tdmdl = 0.0;
980 }
981 #endif
982
983 if (DynamicMetadataEnable == true) {
984 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
985 *NotEnoughTimeForDynamicMetadata = true;
986 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
987 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
988 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
989 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
990 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
991 } else {
992 *NotEnoughTimeForDynamicMetadata = false;
993 }
994 } else {
995 *NotEnoughTimeForDynamicMetadata = false;
996 }
997
998 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
999
1000 if (myPipe->ScalerEnabled)
1001 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1002 else
1003 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1004
1005 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1006
1007 DISPCLKCycles = DISPCLKDelaySubtotal;
1008
1009 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1010 return true;
1011
1012 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1013
1014 #ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1016 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1017 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1018 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1019 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1020 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1021 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1022 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1023 #endif
1024
1025 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1026
1027 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1028 *DSTYAfterScaler = 1;
1029 else
1030 *DSTYAfterScaler = 0;
1031
1032 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1033 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1034 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1035
1036 #ifdef __DML_VBA_DEBUG__
1037 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1038 #endif
1039
1040 MyError = false;
1041
1042 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1043 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1044 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1045
1046 #ifdef __DML_VBA_ALLOW_DELTA__
1047 if (!myPipe->DCCEnable) {
1048 Tr0_trips = 0.0;
1049 Tr0_trips_rounded = 0.0;
1050 }
1051 #endif
1052
1053 if (!GPUVMEnable) {
1054 Tvm_trips = 0.0;
1055 Tvm_trips_rounded = 0.0;
1056 }
1057
1058 if (GPUVMEnable) {
1059 if (GPUVMPageTableLevels >= 3) {
1060 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1061 } else {
1062 *Tno_bw = 0;
1063 }
1064 } else if (!myPipe->DCCEnable) {
1065 *Tno_bw = LineTime;
1066 } else {
1067 *Tno_bw = LineTime / 4;
1068 }
1069
1070 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1071 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1072 else
1073 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1074 /*rev 99*/
1075 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1076 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1077 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1078 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1079 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1080
1081 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1082 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1083 #ifdef __DML_VBA_DEBUG__
1084 Tsw_oto = Lsw_oto * LineTime;
1085 #endif
1086
1087
1088 #ifdef __DML_VBA_DEBUG__
1089 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1090 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1091 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1092 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1093 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1094 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1095 #endif
1096
1097 if (GPUVMEnable == true)
1098 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1099 else
1100 Tvm_oto = LineTime / 4.0;
1101
1102 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1103 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1104 LineTime - Tvm_oto,
1105 LineTime / 4);
1106 } else {
1107 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1108 }
1109
1110 #ifdef __DML_VBA_DEBUG__
1111 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1112 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1113 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1114 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1115 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1116 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1117 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1118 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1119 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1120 #endif
1121
1122 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1123 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1124 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1125 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1126 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1127 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1128
1129 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1130
1131 if (prefetch_sw_bytes < dep_bytes)
1132 prefetch_sw_bytes = 2 * dep_bytes;
1133
1134 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1135 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1136 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1137 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1138 dml_print("DML: LineTime: %f\n", LineTime);
1139 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1140
1141 dml_print("DML: LineTime: %f\n", LineTime);
1142 dml_print("DML: VStartup: %d\n", VStartup);
1143 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1144 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1145 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1146 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1147 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1148 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1149 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1150 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1151 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1152 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1153 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1154
1155 *PrefetchBandwidth = 0;
1156 *DestinationLinesToRequestVMInVBlank = 0;
1157 *DestinationLinesToRequestRowInVBlank = 0;
1158 *VRatioPrefetchY = 0;
1159 *VRatioPrefetchC = 0;
1160 *RequiredPrefetchPixDataBWLuma = 0;
1161 if (dst_y_prefetch_equ > 1) {
1162 double PrefetchBandwidth1;
1163 double PrefetchBandwidth2;
1164 double PrefetchBandwidth3;
1165 double PrefetchBandwidth4;
1166
1167 if (Tpre_rounded - *Tno_bw > 0) {
1168 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1169 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1170 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1171 } else {
1172 PrefetchBandwidth1 = 0;
1173 }
1174
1175 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1176 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1177 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1178 }
1179
1180 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1181 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1182 else
1183 PrefetchBandwidth2 = 0;
1184
1185 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1186 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1187 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1188 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1189 } else {
1190 PrefetchBandwidth3 = 0;
1191 }
1192
1193 #ifdef __DML_VBA_DEBUG__
1194 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1195 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1196 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1197 #endif
1198 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1199 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1200 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1201 }
1202
1203 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1204 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1205 else
1206 PrefetchBandwidth4 = 0;
1207
1208 {
1209 bool Case1OK;
1210 bool Case2OK;
1211 bool Case3OK;
1212
1213 if (PrefetchBandwidth1 > 0) {
1214 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1215 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1216 Case1OK = true;
1217 } else {
1218 Case1OK = false;
1219 }
1220 } else {
1221 Case1OK = false;
1222 }
1223
1224 if (PrefetchBandwidth2 > 0) {
1225 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1226 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1227 Case2OK = true;
1228 } else {
1229 Case2OK = false;
1230 }
1231 } else {
1232 Case2OK = false;
1233 }
1234
1235 if (PrefetchBandwidth3 > 0) {
1236 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1237 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1238 Case3OK = true;
1239 } else {
1240 Case3OK = false;
1241 }
1242 } else {
1243 Case3OK = false;
1244 }
1245
1246 if (Case1OK) {
1247 prefetch_bw_equ = PrefetchBandwidth1;
1248 } else if (Case2OK) {
1249 prefetch_bw_equ = PrefetchBandwidth2;
1250 } else if (Case3OK) {
1251 prefetch_bw_equ = PrefetchBandwidth3;
1252 } else {
1253 prefetch_bw_equ = PrefetchBandwidth4;
1254 }
1255
1256 #ifdef __DML_VBA_DEBUG__
1257 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1258 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1259 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1260 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1261 #endif
1262
1263 if (prefetch_bw_equ > 0) {
1264 if (GPUVMEnable == true) {
1265 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1266 } else {
1267 Tvm_equ = LineTime / 4;
1268 }
1269
1270 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1271 Tr0_equ = dml_max4(
1272 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1273 Tr0_trips,
1274 (LineTime - Tvm_equ) / 2,
1275 LineTime / 4);
1276 } else {
1277 Tr0_equ = (LineTime - Tvm_equ) / 2;
1278 }
1279 } else {
1280 Tvm_equ = 0;
1281 Tr0_equ = 0;
1282 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1283 }
1284 }
1285
1286 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1287 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1288 TimeForFetchingMetaPTE = Tvm_oto;
1289 TimeForFetchingRowInVBlank = Tr0_oto;
1290 *PrefetchBandwidth = prefetch_bw_oto;
1291 } else {
1292 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1293 TimeForFetchingMetaPTE = Tvm_equ;
1294 TimeForFetchingRowInVBlank = Tr0_equ;
1295 *PrefetchBandwidth = prefetch_bw_equ;
1296 }
1297
1298 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1299
1300 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1301
1302 #ifdef __DML_VBA_ALLOW_DELTA__
1303 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1304 // See note above dated 5/30/2018
1305 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1306 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1307 #else
1308 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1309 #endif
1310
1311 #ifdef __DML_VBA_DEBUG__
1312 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1313 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1314 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1315 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1316 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1317 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1318 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1319 #endif
1320
1321 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1322
1323 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1324 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1325 #ifdef __DML_VBA_DEBUG__
1326 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1327 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1328 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1329 #endif
1330 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1331 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1332 *VRatioPrefetchY = dml_max(
1333 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1334 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1335 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1336 } else {
1337 MyError = true;
1338 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1339 *VRatioPrefetchY = 0;
1340 }
1341 #ifdef __DML_VBA_DEBUG__
1342 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1343 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1344 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1345 #endif
1346 }
1347
1348 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1349 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1350
1351 #ifdef __DML_VBA_DEBUG__
1352 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1353 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1354 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1355 #endif
1356 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1357 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1358 *VRatioPrefetchC = dml_max(
1359 *VRatioPrefetchC,
1360 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1361 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1362 } else {
1363 MyError = true;
1364 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1365 *VRatioPrefetchC = 0;
1366 }
1367 #ifdef __DML_VBA_DEBUG__
1368 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1369 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1370 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1371 #endif
1372 }
1373
1374 #ifdef __DML_VBA_DEBUG__
1375 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1376 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1377 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1378 #endif
1379
1380 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1381
1382 #ifdef __DML_VBA_DEBUG__
1383 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1384 #endif
1385
1386 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1387 / LineTime;
1388 } else {
1389 MyError = true;
1390 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1391 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1392 *VRatioPrefetchY = 0;
1393 *VRatioPrefetchC = 0;
1394 *RequiredPrefetchPixDataBWLuma = 0;
1395 *RequiredPrefetchPixDataBWChroma = 0;
1396 }
1397
1398 dml_print(
1399 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1400 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1401 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1402 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1403 dml_print(
1404 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1405 (double) LinesToRequestPrefetchPixelData * LineTime);
1406 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1407 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1408 dml_print(
1409 "DML: Tslack(pre): %fus - time left over in schedule\n",
1410 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1411 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1412 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1413
1414 } else {
1415 MyError = true;
1416 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1417 }
1418
1419 {
1420 double prefetch_vm_bw;
1421 double prefetch_row_bw;
1422
1423 if (PDEAndMetaPTEBytesFrame == 0) {
1424 prefetch_vm_bw = 0;
1425 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1426 #ifdef __DML_VBA_DEBUG__
1427 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1428 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1429 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1430 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1431 #endif
1432 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1433 #ifdef __DML_VBA_DEBUG__
1434 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1435 #endif
1436 } else {
1437 prefetch_vm_bw = 0;
1438 MyError = true;
1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1440 }
1441
1442 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1443 prefetch_row_bw = 0;
1444 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1445 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1446
1447 #ifdef __DML_VBA_DEBUG__
1448 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1449 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1450 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1451 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1452 #endif
1453 } else {
1454 prefetch_row_bw = 0;
1455 MyError = true;
1456 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1457 }
1458
1459 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1460 }
1461
1462 if (MyError) {
1463 *PrefetchBandwidth = 0;
1464 TimeForFetchingMetaPTE = 0;
1465 TimeForFetchingRowInVBlank = 0;
1466 *DestinationLinesToRequestVMInVBlank = 0;
1467 *DestinationLinesToRequestRowInVBlank = 0;
1468 *DestinationLinesForPrefetch = 0;
1469 LinesToRequestPrefetchPixelData = 0;
1470 *VRatioPrefetchY = 0;
1471 *VRatioPrefetchC = 0;
1472 *RequiredPrefetchPixDataBWLuma = 0;
1473 *RequiredPrefetchPixDataBWChroma = 0;
1474 }
1475
1476 return MyError;
1477 }
1478
1479 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1480 {
1481 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1482 }
1483
1484 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1485 {
1486 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1487 }
1488
1489 static void CalculateDCCConfiguration(
1490 bool DCCEnabled,
1491 bool DCCProgrammingAssumesScanDirectionUnknown,
1492 enum source_format_class SourcePixelFormat,
1493 unsigned int SurfaceWidthLuma,
1494 unsigned int SurfaceWidthChroma,
1495 unsigned int SurfaceHeightLuma,
1496 unsigned int SurfaceHeightChroma,
1497 double DETBufferSize,
1498 unsigned int RequestHeight256ByteLuma,
1499 unsigned int RequestHeight256ByteChroma,
1500 enum dm_swizzle_mode TilingFormat,
1501 unsigned int BytePerPixelY,
1502 unsigned int BytePerPixelC,
1503 double BytePerPixelDETY,
1504 double BytePerPixelDETC,
1505 enum scan_direction_class ScanOrientation,
1506 unsigned int *MaxUncompressedBlockLuma,
1507 unsigned int *MaxUncompressedBlockChroma,
1508 unsigned int *MaxCompressedBlockLuma,
1509 unsigned int *MaxCompressedBlockChroma,
1510 unsigned int *IndependentBlockLuma,
1511 unsigned int *IndependentBlockChroma)
1512 {
1513 int yuv420;
1514 int horz_div_l;
1515 int horz_div_c;
1516 int vert_div_l;
1517 int vert_div_c;
1518
1519 int swath_buf_size;
1520 double detile_buf_vp_horz_limit;
1521 double detile_buf_vp_vert_limit;
1522
1523 int MAS_vp_horz_limit;
1524 int MAS_vp_vert_limit;
1525 int max_vp_horz_width;
1526 int max_vp_vert_height;
1527 int eff_surf_width_l;
1528 int eff_surf_width_c;
1529 int eff_surf_height_l;
1530 int eff_surf_height_c;
1531
1532 int full_swath_bytes_horz_wc_l;
1533 int full_swath_bytes_horz_wc_c;
1534 int full_swath_bytes_vert_wc_l;
1535 int full_swath_bytes_vert_wc_c;
1536 int req128_horz_wc_l;
1537 int req128_horz_wc_c;
1538 int req128_vert_wc_l;
1539 int req128_vert_wc_c;
1540 int segment_order_horz_contiguous_luma;
1541 int segment_order_horz_contiguous_chroma;
1542 int segment_order_vert_contiguous_luma;
1543 int segment_order_vert_contiguous_chroma;
1544
1545 typedef enum {
1546 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1547 } RequestType;
1548 RequestType RequestLuma;
1549 RequestType RequestChroma;
1550
1551 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1552 horz_div_l = 1;
1553 horz_div_c = 1;
1554 vert_div_l = 1;
1555 vert_div_c = 1;
1556
1557 if (BytePerPixelY == 1)
1558 vert_div_l = 0;
1559 if (BytePerPixelC == 1)
1560 vert_div_c = 0;
1561 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1562 horz_div_l = 0;
1563 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1564 horz_div_c = 0;
1565
1566 if (BytePerPixelC == 0) {
1567 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1568 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1569 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1570 } else {
1571 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1572 detile_buf_vp_horz_limit = (double) swath_buf_size
1573 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1574 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1575 detile_buf_vp_vert_limit = (double) swath_buf_size
1576 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1577 }
1578
1579 if (SourcePixelFormat == dm_420_10) {
1580 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1581 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1582 }
1583
1584 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1585 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1586
1587 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1588 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1589 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1590 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1591 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1592 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1593 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1594 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1595
1596 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1597 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1598 if (BytePerPixelC > 0) {
1599 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1600 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1601 } else {
1602 full_swath_bytes_horz_wc_c = 0;
1603 full_swath_bytes_vert_wc_c = 0;
1604 }
1605
1606 if (SourcePixelFormat == dm_420_10) {
1607 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1608 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1609 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1610 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1611 }
1612
1613 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1614 req128_horz_wc_l = 0;
1615 req128_horz_wc_c = 0;
1616 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1617 req128_horz_wc_l = 0;
1618 req128_horz_wc_c = 1;
1619 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1620 req128_horz_wc_l = 1;
1621 req128_horz_wc_c = 0;
1622 } else {
1623 req128_horz_wc_l = 1;
1624 req128_horz_wc_c = 1;
1625 }
1626
1627 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1628 req128_vert_wc_l = 0;
1629 req128_vert_wc_c = 0;
1630 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1631 req128_vert_wc_l = 0;
1632 req128_vert_wc_c = 1;
1633 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1634 req128_vert_wc_l = 1;
1635 req128_vert_wc_c = 0;
1636 } else {
1637 req128_vert_wc_l = 1;
1638 req128_vert_wc_c = 1;
1639 }
1640
1641 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1642 segment_order_horz_contiguous_luma = 0;
1643 } else {
1644 segment_order_horz_contiguous_luma = 1;
1645 }
1646 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1647 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1648 segment_order_vert_contiguous_luma = 0;
1649 } else {
1650 segment_order_vert_contiguous_luma = 1;
1651 }
1652 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1653 segment_order_horz_contiguous_chroma = 0;
1654 } else {
1655 segment_order_horz_contiguous_chroma = 1;
1656 }
1657 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1658 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1659 segment_order_vert_contiguous_chroma = 0;
1660 } else {
1661 segment_order_vert_contiguous_chroma = 1;
1662 }
1663
1664 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1665 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1666 RequestLuma = REQ_256Bytes;
1667 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1668 RequestLuma = REQ_128BytesNonContiguous;
1669 } else {
1670 RequestLuma = REQ_128BytesContiguous;
1671 }
1672 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1673 RequestChroma = REQ_256Bytes;
1674 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1675 RequestChroma = REQ_128BytesNonContiguous;
1676 } else {
1677 RequestChroma = REQ_128BytesContiguous;
1678 }
1679 } else if (ScanOrientation != dm_vert) {
1680 if (req128_horz_wc_l == 0) {
1681 RequestLuma = REQ_256Bytes;
1682 } else if (segment_order_horz_contiguous_luma == 0) {
1683 RequestLuma = REQ_128BytesNonContiguous;
1684 } else {
1685 RequestLuma = REQ_128BytesContiguous;
1686 }
1687 if (req128_horz_wc_c == 0) {
1688 RequestChroma = REQ_256Bytes;
1689 } else if (segment_order_horz_contiguous_chroma == 0) {
1690 RequestChroma = REQ_128BytesNonContiguous;
1691 } else {
1692 RequestChroma = REQ_128BytesContiguous;
1693 }
1694 } else {
1695 if (req128_vert_wc_l == 0) {
1696 RequestLuma = REQ_256Bytes;
1697 } else if (segment_order_vert_contiguous_luma == 0) {
1698 RequestLuma = REQ_128BytesNonContiguous;
1699 } else {
1700 RequestLuma = REQ_128BytesContiguous;
1701 }
1702 if (req128_vert_wc_c == 0) {
1703 RequestChroma = REQ_256Bytes;
1704 } else if (segment_order_vert_contiguous_chroma == 0) {
1705 RequestChroma = REQ_128BytesNonContiguous;
1706 } else {
1707 RequestChroma = REQ_128BytesContiguous;
1708 }
1709 }
1710
1711 if (RequestLuma == REQ_256Bytes) {
1712 *MaxUncompressedBlockLuma = 256;
1713 *MaxCompressedBlockLuma = 256;
1714 *IndependentBlockLuma = 0;
1715 } else if (RequestLuma == REQ_128BytesContiguous) {
1716 *MaxUncompressedBlockLuma = 256;
1717 *MaxCompressedBlockLuma = 128;
1718 *IndependentBlockLuma = 128;
1719 } else {
1720 *MaxUncompressedBlockLuma = 256;
1721 *MaxCompressedBlockLuma = 64;
1722 *IndependentBlockLuma = 64;
1723 }
1724
1725 if (RequestChroma == REQ_256Bytes) {
1726 *MaxUncompressedBlockChroma = 256;
1727 *MaxCompressedBlockChroma = 256;
1728 *IndependentBlockChroma = 0;
1729 } else if (RequestChroma == REQ_128BytesContiguous) {
1730 *MaxUncompressedBlockChroma = 256;
1731 *MaxCompressedBlockChroma = 128;
1732 *IndependentBlockChroma = 128;
1733 } else {
1734 *MaxUncompressedBlockChroma = 256;
1735 *MaxCompressedBlockChroma = 64;
1736 *IndependentBlockChroma = 64;
1737 }
1738
1739 if (DCCEnabled != true || BytePerPixelC == 0) {
1740 *MaxUncompressedBlockChroma = 0;
1741 *MaxCompressedBlockChroma = 0;
1742 *IndependentBlockChroma = 0;
1743 }
1744
1745 if (DCCEnabled != true) {
1746 *MaxUncompressedBlockLuma = 0;
1747 *MaxCompressedBlockLuma = 0;
1748 *IndependentBlockLuma = 0;
1749 }
1750 }
1751
1752 static double CalculatePrefetchSourceLines(
1753 struct display_mode_lib *mode_lib,
1754 double VRatio,
1755 double vtaps,
1756 bool Interlace,
1757 bool ProgressiveToInterlaceUnitInOPP,
1758 unsigned int SwathHeight,
1759 unsigned int ViewportYStart,
1760 double *VInitPreFill,
1761 unsigned int *MaxNumSwath)
1762 {
1763 struct vba_vars_st *v = &mode_lib->vba;
1764 unsigned int MaxPartialSwath;
1765
1766 if (ProgressiveToInterlaceUnitInOPP)
1767 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1768 else
1769 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1770
1771 if (!v->IgnoreViewportPositioning) {
1772
1773 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1774
1775 if (*VInitPreFill > 1.0)
1776 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1777 else
1778 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1779 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1780
1781 } else {
1782
1783 if (ViewportYStart != 0)
1784 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1785
1786 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1787
1788 if (*VInitPreFill > 1.0)
1789 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1790 else
1791 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1792 }
1793
1794 #ifdef __DML_VBA_DEBUG__
1795 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1796 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1797 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1798 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1799 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1800 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1801 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1802 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1803 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1804 #endif
1805 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1806 }
1807
1808 static unsigned int CalculateVMAndRowBytes(
1809 struct display_mode_lib *mode_lib,
1810 bool DCCEnable,
1811 unsigned int BlockHeight256Bytes,
1812 unsigned int BlockWidth256Bytes,
1813 enum source_format_class SourcePixelFormat,
1814 unsigned int SurfaceTiling,
1815 unsigned int BytePerPixel,
1816 enum scan_direction_class ScanDirection,
1817 unsigned int SwathWidth,
1818 unsigned int ViewportHeight,
1819 bool GPUVMEnable,
1820 bool HostVMEnable,
1821 unsigned int HostVMMaxNonCachedPageTableLevels,
1822 unsigned int GPUVMMinPageSize,
1823 unsigned int HostVMMinPageSize,
1824 unsigned int PTEBufferSizeInRequests,
1825 unsigned int Pitch,
1826 unsigned int DCCMetaPitch,
1827 unsigned int *MacroTileWidth,
1828 unsigned int *MetaRowByte,
1829 unsigned int *PixelPTEBytesPerRow,
1830 bool *PTEBufferSizeNotExceeded,
1831 int *dpte_row_width_ub,
1832 unsigned int *dpte_row_height,
1833 unsigned int *MetaRequestWidth,
1834 unsigned int *MetaRequestHeight,
1835 unsigned int *meta_row_width,
1836 unsigned int *meta_row_height,
1837 int *vm_group_bytes,
1838 unsigned int *dpte_group_bytes,
1839 unsigned int *PixelPTEReqWidth,
1840 unsigned int *PixelPTEReqHeight,
1841 unsigned int *PTERequestSize,
1842 int *DPDE0BytesFrame,
1843 int *MetaPTEBytesFrame)
1844 {
1845 struct vba_vars_st *v = &mode_lib->vba;
1846 unsigned int MPDEBytesFrame;
1847 unsigned int DCCMetaSurfaceBytes;
1848 unsigned int MacroTileSizeBytes;
1849 unsigned int MacroTileHeight;
1850 unsigned int ExtraDPDEBytesFrame;
1851 unsigned int PDEAndMetaPTEBytesFrame;
1852 unsigned int PixelPTEReqHeightPTEs = 0;
1853 unsigned int HostVMDynamicLevels = 0;
1854 double FractionOfPTEReturnDrop;
1855
1856 if (GPUVMEnable == true && HostVMEnable == true) {
1857 if (HostVMMinPageSize < 2048) {
1858 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1859 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1860 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1861 } else {
1862 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1863 }
1864 }
1865
1866 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1867 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1868 if (ScanDirection != dm_vert) {
1869 *meta_row_height = *MetaRequestHeight;
1870 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1871 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1872 } else {
1873 *meta_row_height = *MetaRequestWidth;
1874 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1875 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1876 }
1877 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1878 if (GPUVMEnable == true) {
1879 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1880 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1881 } else {
1882 *MetaPTEBytesFrame = 0;
1883 MPDEBytesFrame = 0;
1884 }
1885
1886 if (DCCEnable != true) {
1887 *MetaPTEBytesFrame = 0;
1888 MPDEBytesFrame = 0;
1889 *MetaRowByte = 0;
1890 }
1891
1892 if (SurfaceTiling == dm_sw_linear) {
1893 MacroTileSizeBytes = 256;
1894 MacroTileHeight = BlockHeight256Bytes;
1895 } else {
1896 MacroTileSizeBytes = 65536;
1897 MacroTileHeight = 16 * BlockHeight256Bytes;
1898 }
1899 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1900
1901 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1902 if (ScanDirection != dm_vert) {
1903 *DPDE0BytesFrame = 64
1904 * (dml_ceil(
1905 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1906 / (8 * 2097152),
1907 1) + 1);
1908 } else {
1909 *DPDE0BytesFrame = 64
1910 * (dml_ceil(
1911 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1912 / (8 * 2097152),
1913 1) + 1);
1914 }
1915 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1916 } else {
1917 *DPDE0BytesFrame = 0;
1918 ExtraDPDEBytesFrame = 0;
1919 }
1920
1921 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1922
1923 #ifdef __DML_VBA_DEBUG__
1924 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1925 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1926 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1927 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1928 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1929 #endif
1930
1931 if (HostVMEnable == true) {
1932 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1933 }
1934 #ifdef __DML_VBA_DEBUG__
1935 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1936 #endif
1937
1938 if (SurfaceTiling == dm_sw_linear) {
1939 PixelPTEReqHeightPTEs = 1;
1940 *PixelPTEReqHeight = 1;
1941 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1942 *PTERequestSize = 64;
1943 FractionOfPTEReturnDrop = 0;
1944 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1945 PixelPTEReqHeightPTEs = 16;
1946 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1947 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1948 *PTERequestSize = 128;
1949 FractionOfPTEReturnDrop = 0;
1950 } else {
1951 PixelPTEReqHeightPTEs = 1;
1952 *PixelPTEReqHeight = MacroTileHeight;
1953 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1954 *PTERequestSize = 64;
1955 FractionOfPTEReturnDrop = 0;
1956 }
1957
1958 if (SurfaceTiling == dm_sw_linear) {
1959 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1960 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1961 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1962 } else if (ScanDirection != dm_vert) {
1963 *dpte_row_height = *PixelPTEReqHeight;
1964 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1965 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1966 } else {
1967 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1968 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1969 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1970 }
1971
1972 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1973 *PTEBufferSizeNotExceeded = true;
1974 } else {
1975 *PTEBufferSizeNotExceeded = false;
1976 }
1977
1978 if (GPUVMEnable != true) {
1979 *PixelPTEBytesPerRow = 0;
1980 *PTEBufferSizeNotExceeded = true;
1981 }
1982
1983 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1984
1985 if (HostVMEnable == true) {
1986 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1987 }
1988
1989 if (HostVMEnable == true) {
1990 *vm_group_bytes = 512;
1991 *dpte_group_bytes = 512;
1992 } else if (GPUVMEnable == true) {
1993 *vm_group_bytes = 2048;
1994 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1995 *dpte_group_bytes = 512;
1996 } else {
1997 *dpte_group_bytes = 2048;
1998 }
1999 } else {
2000 *vm_group_bytes = 0;
2001 *dpte_group_bytes = 0;
2002 }
2003 return PDEAndMetaPTEBytesFrame;
2004 }
2005
2006 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2007 {
2008 struct vba_vars_st *v = &mode_lib->vba;
2009 unsigned int j, k;
2010 double HostVMInefficiencyFactor = 1.0;
2011 bool NoChromaPlanes = true;
2012 int ReorderBytes;
2013 double VMDataOnlyReturnBW;
2014 double MaxTotalRDBandwidth = 0;
2015 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2016
2017 v->WritebackDISPCLK = 0.0;
2018 v->DISPCLKWithRamping = 0;
2019 v->DISPCLKWithoutRamping = 0;
2020 v->GlobalDPPCLK = 0.0;
2021 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2022 {
2023 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2024 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2025 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2026 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2027
2028 if (v->HostVMEnable != true) {
2029 v->ReturnBW = dml_min(
2030 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2031 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2032 } else {
2033 v->ReturnBW = dml_min(
2034 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2035 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2036 }
2037 }
2038 /* End DAL custom code */
2039
2040 // DISPCLK and DPPCLK Calculation
2041 //
2042 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2043 if (v->WritebackEnable[k]) {
2044 v->WritebackDISPCLK = dml_max(
2045 v->WritebackDISPCLK,
2046 dml314_CalculateWriteBackDISPCLK(
2047 v->WritebackPixelFormat[k],
2048 v->PixelClock[k],
2049 v->WritebackHRatio[k],
2050 v->WritebackVRatio[k],
2051 v->WritebackHTaps[k],
2052 v->WritebackVTaps[k],
2053 v->WritebackSourceWidth[k],
2054 v->WritebackDestinationWidth[k],
2055 v->HTotal[k],
2056 v->WritebackLineBufferSize));
2057 }
2058 }
2059
2060 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2061 if (v->HRatio[k] > 1) {
2062 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2063 v->MaxDCHUBToPSCLThroughput,
2064 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2065 } else {
2066 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2067 }
2068
2069 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2070 * dml_max(
2071 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2072 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2073
2074 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2075 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2076 }
2077
2078 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2079 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2080 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2081 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2082 } else {
2083 if (v->HRatioChroma[k] > 1) {
2084 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2085 v->MaxDCHUBToPSCLThroughput,
2086 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2087 } else {
2088 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2089 }
2090 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2091 * dml_max3(
2092 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2093 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2094 1.0);
2095
2096 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2097 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2098 }
2099
2100 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2101 }
2102 }
2103
2104 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2105 if (v->BlendingAndTiming[k] != k)
2106 continue;
2107 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2108 v->DISPCLKWithRamping = dml_max(
2109 v->DISPCLKWithRamping,
2110 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2111 * (1 + v->DISPCLKRampingMargin / 100));
2112 v->DISPCLKWithoutRamping = dml_max(
2113 v->DISPCLKWithoutRamping,
2114 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2115 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2116 v->DISPCLKWithRamping = dml_max(
2117 v->DISPCLKWithRamping,
2118 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2119 * (1 + v->DISPCLKRampingMargin / 100));
2120 v->DISPCLKWithoutRamping = dml_max(
2121 v->DISPCLKWithoutRamping,
2122 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2123 } else {
2124 v->DISPCLKWithRamping = dml_max(
2125 v->DISPCLKWithRamping,
2126 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2127 v->DISPCLKWithoutRamping = dml_max(
2128 v->DISPCLKWithoutRamping,
2129 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2130 }
2131 }
2132
2133 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2134 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2135
2136 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2137 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2138 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2139 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2140 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2141 v->DISPCLKDPPCLKVCOSpeed);
2142 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2143 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2144 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2145 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2146 } else {
2147 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2148 }
2149 v->DISPCLK = v->DISPCLK_calculated;
2150 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2151
2152 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2153 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2154 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2155 }
2156 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2157 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2158 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2159 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2160 }
2161
2162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2163 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2164 }
2165
2166 // Urgent and B P-State/DRAM Clock Change Watermark
2167 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2168 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2169
2170 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2171 CalculateBytePerPixelAnd256BBlockSizes(
2172 v->SourcePixelFormat[k],
2173 v->SurfaceTiling[k],
2174 &v->BytePerPixelY[k],
2175 &v->BytePerPixelC[k],
2176 &v->BytePerPixelDETY[k],
2177 &v->BytePerPixelDETC[k],
2178 &v->BlockHeight256BytesY[k],
2179 &v->BlockHeight256BytesC[k],
2180 &v->BlockWidth256BytesY[k],
2181 &v->BlockWidth256BytesC[k]);
2182 }
2183
2184 CalculateSwathWidth(
2185 false,
2186 v->NumberOfActivePlanes,
2187 v->SourcePixelFormat,
2188 v->SourceScan,
2189 v->ViewportWidth,
2190 v->ViewportHeight,
2191 v->SurfaceWidthY,
2192 v->SurfaceWidthC,
2193 v->SurfaceHeightY,
2194 v->SurfaceHeightC,
2195 v->ODMCombineEnabled,
2196 v->BytePerPixelY,
2197 v->BytePerPixelC,
2198 v->BlockHeight256BytesY,
2199 v->BlockHeight256BytesC,
2200 v->BlockWidth256BytesY,
2201 v->BlockWidth256BytesC,
2202 v->BlendingAndTiming,
2203 v->HActive,
2204 v->HRatio,
2205 v->DPPPerPlane,
2206 v->SwathWidthSingleDPPY,
2207 v->SwathWidthSingleDPPC,
2208 v->SwathWidthY,
2209 v->SwathWidthC,
2210 v->dummyinteger3,
2211 v->dummyinteger4,
2212 v->swath_width_luma_ub,
2213 v->swath_width_chroma_ub);
2214
2215 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2216 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2217 * v->VRatio[k];
2218 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2219 * v->VRatioChroma[k];
2220 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2221 }
2222
2223 // DCFCLK Deep Sleep
2224 CalculateDCFCLKDeepSleep(
2225 mode_lib,
2226 v->NumberOfActivePlanes,
2227 v->BytePerPixelY,
2228 v->BytePerPixelC,
2229 v->VRatio,
2230 v->VRatioChroma,
2231 v->SwathWidthY,
2232 v->SwathWidthC,
2233 v->DPPPerPlane,
2234 v->HRatio,
2235 v->HRatioChroma,
2236 v->PixelClock,
2237 v->PSCL_THROUGHPUT_LUMA,
2238 v->PSCL_THROUGHPUT_CHROMA,
2239 v->DPPCLK,
2240 v->ReadBandwidthPlaneLuma,
2241 v->ReadBandwidthPlaneChroma,
2242 v->ReturnBusWidth,
2243 &v->DCFCLKDeepSleep);
2244
2245 // DSCCLK
2246 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2247 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2248 v->DSCCLK_calculated[k] = 0.0;
2249 } else {
2250 if (v->OutputFormat[k] == dm_420)
2251 v->DSCFormatFactor = 2;
2252 else if (v->OutputFormat[k] == dm_444)
2253 v->DSCFormatFactor = 1;
2254 else if (v->OutputFormat[k] == dm_n422)
2255 v->DSCFormatFactor = 2;
2256 else
2257 v->DSCFormatFactor = 1;
2258 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2259 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2260 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2261 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2262 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2263 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2264 else
2265 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2266 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2267 }
2268 }
2269
2270 // DSC Delay
2271 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2272 double BPP = v->OutputBpp[k];
2273
2274 if (v->DSCEnabled[k] && BPP != 0) {
2275 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2276 v->DSCDelay[k] = dscceComputeDelay(
2277 v->DSCInputBitPerComponent[k],
2278 BPP,
2279 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2280 v->NumberOfDSCSlices[k],
2281 v->OutputFormat[k],
2282 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2283 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2284 v->DSCDelay[k] = 2
2285 * (dscceComputeDelay(
2286 v->DSCInputBitPerComponent[k],
2287 BPP,
2288 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2289 v->NumberOfDSCSlices[k] / 2.0,
2290 v->OutputFormat[k],
2291 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2292 } else {
2293 v->DSCDelay[k] = 4
2294 * (dscceComputeDelay(
2295 v->DSCInputBitPerComponent[k],
2296 BPP,
2297 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2298 v->NumberOfDSCSlices[k] / 4.0,
2299 v->OutputFormat[k],
2300 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2301 }
2302 v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelay[k] / v->HActive[k], 1);
2303 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2304 } else {
2305 v->DSCDelay[k] = 0;
2306 }
2307 }
2308
2309 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2310 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2311 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2312 v->DSCDelay[k] = v->DSCDelay[j];
2313
2314 // Prefetch
2315 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2316 unsigned int PDEAndMetaPTEBytesFrameY;
2317 unsigned int PixelPTEBytesPerRowY;
2318 unsigned int MetaRowByteY;
2319 unsigned int MetaRowByteC;
2320 unsigned int PDEAndMetaPTEBytesFrameC;
2321 unsigned int PixelPTEBytesPerRowC;
2322 bool PTEBufferSizeNotExceededY;
2323 bool PTEBufferSizeNotExceededC;
2324
2325 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2326 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2327 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2328 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2329 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2330 } else {
2331 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2332 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2333 }
2334
2335 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2336 mode_lib,
2337 v->DCCEnable[k],
2338 v->BlockHeight256BytesC[k],
2339 v->BlockWidth256BytesC[k],
2340 v->SourcePixelFormat[k],
2341 v->SurfaceTiling[k],
2342 v->BytePerPixelC[k],
2343 v->SourceScan[k],
2344 v->SwathWidthC[k],
2345 v->ViewportHeightChroma[k],
2346 v->GPUVMEnable,
2347 v->HostVMEnable,
2348 v->HostVMMaxNonCachedPageTableLevels,
2349 v->GPUVMMinPageSize,
2350 v->HostVMMinPageSize,
2351 v->PTEBufferSizeInRequestsForChroma,
2352 v->PitchC[k],
2353 v->DCCMetaPitchC[k],
2354 &v->MacroTileWidthC[k],
2355 &MetaRowByteC,
2356 &PixelPTEBytesPerRowC,
2357 &PTEBufferSizeNotExceededC,
2358 &v->dpte_row_width_chroma_ub[k],
2359 &v->dpte_row_height_chroma[k],
2360 &v->meta_req_width_chroma[k],
2361 &v->meta_req_height_chroma[k],
2362 &v->meta_row_width_chroma[k],
2363 &v->meta_row_height_chroma[k],
2364 &v->dummyinteger1,
2365 &v->dummyinteger2,
2366 &v->PixelPTEReqWidthC[k],
2367 &v->PixelPTEReqHeightC[k],
2368 &v->PTERequestSizeC[k],
2369 &v->dpde0_bytes_per_frame_ub_c[k],
2370 &v->meta_pte_bytes_per_frame_ub_c[k]);
2371
2372 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2373 mode_lib,
2374 v->VRatioChroma[k],
2375 v->VTAPsChroma[k],
2376 v->Interlace[k],
2377 v->ProgressiveToInterlaceUnitInOPP,
2378 v->SwathHeightC[k],
2379 v->ViewportYStartC[k],
2380 &v->VInitPreFillC[k],
2381 &v->MaxNumSwathC[k]);
2382 } else {
2383 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2384 v->PTEBufferSizeInRequestsForChroma = 0;
2385 PixelPTEBytesPerRowC = 0;
2386 PDEAndMetaPTEBytesFrameC = 0;
2387 MetaRowByteC = 0;
2388 v->MaxNumSwathC[k] = 0;
2389 v->PrefetchSourceLinesC[k] = 0;
2390 }
2391
2392 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2393 mode_lib,
2394 v->DCCEnable[k],
2395 v->BlockHeight256BytesY[k],
2396 v->BlockWidth256BytesY[k],
2397 v->SourcePixelFormat[k],
2398 v->SurfaceTiling[k],
2399 v->BytePerPixelY[k],
2400 v->SourceScan[k],
2401 v->SwathWidthY[k],
2402 v->ViewportHeight[k],
2403 v->GPUVMEnable,
2404 v->HostVMEnable,
2405 v->HostVMMaxNonCachedPageTableLevels,
2406 v->GPUVMMinPageSize,
2407 v->HostVMMinPageSize,
2408 v->PTEBufferSizeInRequestsForLuma,
2409 v->PitchY[k],
2410 v->DCCMetaPitchY[k],
2411 &v->MacroTileWidthY[k],
2412 &MetaRowByteY,
2413 &PixelPTEBytesPerRowY,
2414 &PTEBufferSizeNotExceededY,
2415 &v->dpte_row_width_luma_ub[k],
2416 &v->dpte_row_height[k],
2417 &v->meta_req_width[k],
2418 &v->meta_req_height[k],
2419 &v->meta_row_width[k],
2420 &v->meta_row_height[k],
2421 &v->vm_group_bytes[k],
2422 &v->dpte_group_bytes[k],
2423 &v->PixelPTEReqWidthY[k],
2424 &v->PixelPTEReqHeightY[k],
2425 &v->PTERequestSizeY[k],
2426 &v->dpde0_bytes_per_frame_ub_l[k],
2427 &v->meta_pte_bytes_per_frame_ub_l[k]);
2428
2429 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2430 mode_lib,
2431 v->VRatio[k],
2432 v->vtaps[k],
2433 v->Interlace[k],
2434 v->ProgressiveToInterlaceUnitInOPP,
2435 v->SwathHeightY[k],
2436 v->ViewportYStartY[k],
2437 &v->VInitPreFillY[k],
2438 &v->MaxNumSwathY[k]);
2439 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2440 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2441 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2442
2443 CalculateRowBandwidth(
2444 v->GPUVMEnable,
2445 v->SourcePixelFormat[k],
2446 v->VRatio[k],
2447 v->VRatioChroma[k],
2448 v->DCCEnable[k],
2449 v->HTotal[k] / v->PixelClock[k],
2450 MetaRowByteY,
2451 MetaRowByteC,
2452 v->meta_row_height[k],
2453 v->meta_row_height_chroma[k],
2454 PixelPTEBytesPerRowY,
2455 PixelPTEBytesPerRowC,
2456 v->dpte_row_height[k],
2457 v->dpte_row_height_chroma[k],
2458 &v->meta_row_bw[k],
2459 &v->dpte_row_bw[k]);
2460 }
2461
2462 v->TotalDCCActiveDPP = 0;
2463 v->TotalActiveDPP = 0;
2464 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2465 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2466 if (v->DCCEnable[k])
2467 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2468 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2469 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2470 NoChromaPlanes = false;
2471 }
2472
2473 ReorderBytes = v->NumberOfChannels
2474 * dml_max3(
2475 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2476 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2477 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2478
2479 VMDataOnlyReturnBW = dml_min(
2480 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2481 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2482 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2483 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2484
2485 #ifdef __DML_VBA_DEBUG__
2486 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2487 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2488 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2489 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2490 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2491 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2492 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2493 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2494 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2495 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2496 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2497 #endif
2498
2499 if (v->GPUVMEnable && v->HostVMEnable)
2500 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2501
2502 v->UrgentExtraLatency = CalculateExtraLatency(
2503 v->RoundTripPingLatencyCycles,
2504 ReorderBytes,
2505 v->DCFCLK,
2506 v->TotalActiveDPP,
2507 v->PixelChunkSizeInKByte,
2508 v->TotalDCCActiveDPP,
2509 v->MetaChunkSize,
2510 v->ReturnBW,
2511 v->GPUVMEnable,
2512 v->HostVMEnable,
2513 v->NumberOfActivePlanes,
2514 v->DPPPerPlane,
2515 v->dpte_group_bytes,
2516 HostVMInefficiencyFactor,
2517 v->HostVMMinPageSize,
2518 v->HostVMMaxNonCachedPageTableLevels);
2519
2520 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2521
2522 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2523 if (v->BlendingAndTiming[k] == k) {
2524 if (v->WritebackEnable[k] == true) {
2525 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2526 + CalculateWriteBackDelay(
2527 v->WritebackPixelFormat[k],
2528 v->WritebackHRatio[k],
2529 v->WritebackVRatio[k],
2530 v->WritebackVTaps[k],
2531 v->WritebackDestinationWidth[k],
2532 v->WritebackDestinationHeight[k],
2533 v->WritebackSourceHeight[k],
2534 v->HTotal[k]) / v->DISPCLK;
2535 } else
2536 v->WritebackDelay[v->VoltageLevel][k] = 0;
2537 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2538 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2539 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2540 v->WritebackDelay[v->VoltageLevel][k],
2541 v->WritebackLatency
2542 + CalculateWriteBackDelay(
2543 v->WritebackPixelFormat[j],
2544 v->WritebackHRatio[j],
2545 v->WritebackVRatio[j],
2546 v->WritebackVTaps[j],
2547 v->WritebackDestinationWidth[j],
2548 v->WritebackDestinationHeight[j],
2549 v->WritebackSourceHeight[j],
2550 v->HTotal[k]) / v->DISPCLK);
2551 }
2552 }
2553 }
2554 }
2555
2556 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2557 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2558 if (v->BlendingAndTiming[k] == j)
2559 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2560
2561 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2562 v->MaxVStartupLines[k] =
2563 CalculateMaxVStartup(
2564 v->VTotal[k],
2565 v->VActive[k],
2566 v->VBlankNom[k],
2567 v->HTotal[k],
2568 v->PixelClock[k],
2569 v->ProgressiveToInterlaceUnitInOPP,
2570 v->Interlace[k],
2571 v->ip.VBlankNomDefaultUS,
2572 v->WritebackDelay[v->VoltageLevel][k]);
2573
2574 #ifdef __DML_VBA_DEBUG__
2575 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2576 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2577 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2578 #endif
2579 }
2580
2581 v->MaximumMaxVStartupLines = 0;
2582 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2583 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2584
2585 // VBA_DELTA
2586 // We don't really care to iterate between the various prefetch modes
2587 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2588
2589 v->UrgentLatency = CalculateUrgentLatency(
2590 v->UrgentLatencyPixelDataOnly,
2591 v->UrgentLatencyPixelMixedWithVMData,
2592 v->UrgentLatencyVMDataOnly,
2593 v->DoUrgentLatencyAdjustment,
2594 v->UrgentLatencyAdjustmentFabricClockComponent,
2595 v->UrgentLatencyAdjustmentFabricClockReference,
2596 v->FabricClock);
2597
2598 v->FractionOfUrgentBandwidth = 0.0;
2599 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2600
2601 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2602
2603 do {
2604 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2605 bool DestinationLineTimesForPrefetchLessThan2 = false;
2606 bool VRatioPrefetchMoreThan4 = false;
2607 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2608
2609 MaxTotalRDBandwidth = 0;
2610
2611 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2612
2613 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2614 Pipe myPipe;
2615
2616 myPipe.DPPCLK = v->DPPCLK[k];
2617 myPipe.DISPCLK = v->DISPCLK;
2618 myPipe.PixelClock = v->PixelClock[k];
2619 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2620 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2621 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2622 myPipe.VRatio = v->VRatio[k];
2623 myPipe.VRatioChroma = v->VRatioChroma[k];
2624 myPipe.SourceScan = v->SourceScan[k];
2625 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2626 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2627 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2628 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2629 myPipe.InterlaceEnable = v->Interlace[k];
2630 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2631 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2632 myPipe.HTotal = v->HTotal[k];
2633 myPipe.DCCEnable = v->DCCEnable[k];
2634 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2635 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2636 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2637 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2638 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2639 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2640 v->ErrorResult[k] = CalculatePrefetchSchedule(
2641 mode_lib,
2642 HostVMInefficiencyFactor,
2643 &myPipe,
2644 v->DSCDelay[k],
2645 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2646 v->DPPCLKDelaySCL,
2647 v->DPPCLKDelaySCLLBOnly,
2648 v->DPPCLKDelayCNVCCursor,
2649 v->DISPCLKDelaySubtotal,
2650 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2651 v->OutputFormat[k],
2652 v->MaxInterDCNTileRepeaters,
2653 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2654 v->MaxVStartupLines[k],
2655 v->GPUVMMaxPageTableLevels,
2656 v->GPUVMEnable,
2657 v->HostVMEnable,
2658 v->HostVMMaxNonCachedPageTableLevels,
2659 v->HostVMMinPageSize,
2660 v->DynamicMetadataEnable[k],
2661 v->DynamicMetadataVMEnabled,
2662 v->DynamicMetadataLinesBeforeActiveRequired[k],
2663 v->DynamicMetadataTransmittedBytes[k],
2664 v->UrgentLatency,
2665 v->UrgentExtraLatency,
2666 v->TCalc,
2667 v->PDEAndMetaPTEBytesFrame[k],
2668 v->MetaRowByte[k],
2669 v->PixelPTEBytesPerRow[k],
2670 v->PrefetchSourceLinesY[k],
2671 v->SwathWidthY[k],
2672 v->VInitPreFillY[k],
2673 v->MaxNumSwathY[k],
2674 v->PrefetchSourceLinesC[k],
2675 v->SwathWidthC[k],
2676 v->VInitPreFillC[k],
2677 v->MaxNumSwathC[k],
2678 v->swath_width_luma_ub[k],
2679 v->swath_width_chroma_ub[k],
2680 v->SwathHeightY[k],
2681 v->SwathHeightC[k],
2682 TWait,
2683 &v->DSTXAfterScaler[k],
2684 &v->DSTYAfterScaler[k],
2685 &v->DestinationLinesForPrefetch[k],
2686 &v->PrefetchBandwidth[k],
2687 &v->DestinationLinesToRequestVMInVBlank[k],
2688 &v->DestinationLinesToRequestRowInVBlank[k],
2689 &v->VRatioPrefetchY[k],
2690 &v->VRatioPrefetchC[k],
2691 &v->RequiredPrefetchPixDataBWLuma[k],
2692 &v->RequiredPrefetchPixDataBWChroma[k],
2693 &v->NotEnoughTimeForDynamicMetadata[k],
2694 &v->Tno_bw[k],
2695 &v->prefetch_vmrow_bw[k],
2696 &v->Tdmdl_vm[k],
2697 &v->Tdmdl[k],
2698 &v->TSetup[k],
2699 &v->VUpdateOffsetPix[k],
2700 &v->VUpdateWidthPix[k],
2701 &v->VReadyOffsetPix[k]);
2702
2703 #ifdef __DML_VBA_DEBUG__
2704 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2705 #endif
2706 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2707 }
2708
2709 v->NoEnoughUrgentLatencyHiding = false;
2710 v->NoEnoughUrgentLatencyHidingPre = false;
2711
2712 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2713 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2714 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2715 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2716 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2717
2718 CalculateUrgentBurstFactor(
2719 v->swath_width_luma_ub[k],
2720 v->swath_width_chroma_ub[k],
2721 v->SwathHeightY[k],
2722 v->SwathHeightC[k],
2723 v->HTotal[k] / v->PixelClock[k],
2724 v->UrgentLatency,
2725 v->CursorBufferSize,
2726 v->CursorWidth[k][0],
2727 v->CursorBPP[k][0],
2728 v->VRatio[k],
2729 v->VRatioChroma[k],
2730 v->BytePerPixelDETY[k],
2731 v->BytePerPixelDETC[k],
2732 v->DETBufferSizeY[k],
2733 v->DETBufferSizeC[k],
2734 &v->UrgBurstFactorCursor[k],
2735 &v->UrgBurstFactorLuma[k],
2736 &v->UrgBurstFactorChroma[k],
2737 &v->NoUrgentLatencyHiding[k]);
2738
2739 CalculateUrgentBurstFactor(
2740 v->swath_width_luma_ub[k],
2741 v->swath_width_chroma_ub[k],
2742 v->SwathHeightY[k],
2743 v->SwathHeightC[k],
2744 v->HTotal[k] / v->PixelClock[k],
2745 v->UrgentLatency,
2746 v->CursorBufferSize,
2747 v->CursorWidth[k][0],
2748 v->CursorBPP[k][0],
2749 v->VRatioPrefetchY[k],
2750 v->VRatioPrefetchC[k],
2751 v->BytePerPixelDETY[k],
2752 v->BytePerPixelDETC[k],
2753 v->DETBufferSizeY[k],
2754 v->DETBufferSizeC[k],
2755 &v->UrgBurstFactorCursorPre[k],
2756 &v->UrgBurstFactorLumaPre[k],
2757 &v->UrgBurstFactorChromaPre[k],
2758 &v->NoUrgentLatencyHidingPre[k]);
2759
2760 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2761 + dml_max3(
2762 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2763 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2764 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2765 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2766 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2767 v->DPPPerPlane[k]
2768 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2769 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2770 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2771
2772 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2773 + dml_max3(
2774 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2775 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2776 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2777 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2778 + v->cursor_bw_pre[k]);
2779
2780 #ifdef __DML_VBA_DEBUG__
2781 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2782 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2783 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2784 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2785 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2786
2787 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2788 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2789
2790 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2791 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2792 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2793 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2794 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2795 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2796 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2797 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2798 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2799 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2800 #endif
2801
2802 if (v->DestinationLinesForPrefetch[k] < 2)
2803 DestinationLineTimesForPrefetchLessThan2 = true;
2804
2805 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2806 VRatioPrefetchMoreThan4 = true;
2807
2808 if (v->NoUrgentLatencyHiding[k] == true)
2809 v->NoEnoughUrgentLatencyHiding = true;
2810
2811 if (v->NoUrgentLatencyHidingPre[k] == true)
2812 v->NoEnoughUrgentLatencyHidingPre = true;
2813 }
2814
2815 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2816
2817 #ifdef __DML_VBA_DEBUG__
2818 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2819 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2820 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2821 #endif
2822
2823 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2824 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2825 v->PrefetchModeSupported = true;
2826 else {
2827 v->PrefetchModeSupported = false;
2828 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2829 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2830 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2831 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2832 }
2833
2834 // PREVIOUS_ERROR
2835 // This error result check was done after the PrefetchModeSupported. So we will
2836 // still try to calculate flip schedule even prefetch mode not supported
2837 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2838 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2839 v->PrefetchModeSupported = false;
2840 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2841 }
2842 }
2843
2844 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2845 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2846 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2847 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2848 - dml_max(
2849 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2850 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2851 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2852 v->DPPPerPlane[k]
2853 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2854 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2855 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2856 }
2857
2858 v->TotImmediateFlipBytes = 0;
2859 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2860 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2861 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2862 }
2863 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2864 CalculateFlipSchedule(
2865 mode_lib,
2866 k,
2867 HostVMInefficiencyFactor,
2868 v->UrgentExtraLatency,
2869 v->UrgentLatency,
2870 v->PDEAndMetaPTEBytesFrame[k],
2871 v->MetaRowByte[k],
2872 v->PixelPTEBytesPerRow[k]);
2873 }
2874
2875 v->total_dcn_read_bw_with_flip = 0.0;
2876 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2877 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2878 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2879 + dml_max3(
2880 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2881 v->DPPPerPlane[k] * v->final_flip_bw[k]
2882 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2883 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2884 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2885 v->DPPPerPlane[k]
2886 * (v->final_flip_bw[k]
2887 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2888 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2889 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2890 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2891 + dml_max3(
2892 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2893 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2894 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2895 v->DPPPerPlane[k]
2896 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2897 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2898 }
2899 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2900
2901 v->ImmediateFlipSupported = true;
2902 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2903 #ifdef __DML_VBA_DEBUG__
2904 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2905 #endif
2906 v->ImmediateFlipSupported = false;
2907 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2908 }
2909 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2910 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2911 #ifdef __DML_VBA_DEBUG__
2912 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2913 #endif
2914 v->ImmediateFlipSupported = false;
2915 }
2916 }
2917 } else {
2918 v->ImmediateFlipSupported = false;
2919 }
2920
2921 v->PrefetchAndImmediateFlipSupported =
2922 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2923 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2924 v->ImmediateFlipSupported)) ? true : false;
2925 #ifdef __DML_VBA_DEBUG__
2926 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2927 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2928 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2929 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2930 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2931 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2932 #endif
2933 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2934
2935 v->VStartupLines = v->VStartupLines + 1;
2936 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2937 ASSERT(v->PrefetchAndImmediateFlipSupported);
2938
2939 // Unbounded Request Enabled
2940 CalculateUnboundedRequestAndCompressedBufferSize(
2941 v->DETBufferSizeInKByte[0],
2942 v->ConfigReturnBufferSizeInKByte,
2943 v->UseUnboundedRequesting,
2944 v->TotalActiveDPP,
2945 NoChromaPlanes,
2946 v->MaxNumDPP,
2947 v->CompressedBufferSegmentSizeInkByte,
2948 v->Output,
2949 &v->UnboundedRequestEnabled,
2950 &v->CompressedBufferSizeInkByte);
2951
2952 //Watermarks and NB P-State/DRAM Clock Change Support
2953 {
2954 enum clock_change_support DRAMClockChangeSupport; // dummy
2955
2956 CalculateWatermarksAndDRAMSpeedChangeSupport(
2957 mode_lib,
2958 PrefetchMode,
2959 v->DCFCLK,
2960 v->ReturnBW,
2961 v->UrgentLatency,
2962 v->UrgentExtraLatency,
2963 v->SOCCLK,
2964 v->DCFCLKDeepSleep,
2965 v->DETBufferSizeY,
2966 v->DETBufferSizeC,
2967 v->SwathHeightY,
2968 v->SwathHeightC,
2969 v->SwathWidthY,
2970 v->SwathWidthC,
2971 v->DPPPerPlane,
2972 v->BytePerPixelDETY,
2973 v->BytePerPixelDETC,
2974 v->UnboundedRequestEnabled,
2975 v->CompressedBufferSizeInkByte,
2976 &DRAMClockChangeSupport,
2977 &v->StutterExitWatermark,
2978 &v->StutterEnterPlusExitWatermark,
2979 &v->Z8StutterExitWatermark,
2980 &v->Z8StutterEnterPlusExitWatermark);
2981
2982 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2983 if (v->WritebackEnable[k] == true) {
2984 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2985 0,
2986 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2987 } else {
2988 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2989 }
2990 }
2991 }
2992
2993 //Display Pipeline Delivery Time in Prefetch, Groups
2994 CalculatePixelDeliveryTimes(
2995 v->NumberOfActivePlanes,
2996 v->VRatio,
2997 v->VRatioChroma,
2998 v->VRatioPrefetchY,
2999 v->VRatioPrefetchC,
3000 v->swath_width_luma_ub,
3001 v->swath_width_chroma_ub,
3002 v->DPPPerPlane,
3003 v->HRatio,
3004 v->HRatioChroma,
3005 v->PixelClock,
3006 v->PSCL_THROUGHPUT_LUMA,
3007 v->PSCL_THROUGHPUT_CHROMA,
3008 v->DPPCLK,
3009 v->BytePerPixelC,
3010 v->SourceScan,
3011 v->NumberOfCursors,
3012 v->CursorWidth,
3013 v->CursorBPP,
3014 v->BlockWidth256BytesY,
3015 v->BlockHeight256BytesY,
3016 v->BlockWidth256BytesC,
3017 v->BlockHeight256BytesC,
3018 v->DisplayPipeLineDeliveryTimeLuma,
3019 v->DisplayPipeLineDeliveryTimeChroma,
3020 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3021 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3022 v->DisplayPipeRequestDeliveryTimeLuma,
3023 v->DisplayPipeRequestDeliveryTimeChroma,
3024 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3025 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3026 v->CursorRequestDeliveryTime,
3027 v->CursorRequestDeliveryTimePrefetch);
3028
3029 CalculateMetaAndPTETimes(
3030 v->NumberOfActivePlanes,
3031 v->GPUVMEnable,
3032 v->MetaChunkSize,
3033 v->MinMetaChunkSizeBytes,
3034 v->HTotal,
3035 v->VRatio,
3036 v->VRatioChroma,
3037 v->DestinationLinesToRequestRowInVBlank,
3038 v->DestinationLinesToRequestRowInImmediateFlip,
3039 v->DCCEnable,
3040 v->PixelClock,
3041 v->BytePerPixelY,
3042 v->BytePerPixelC,
3043 v->SourceScan,
3044 v->dpte_row_height,
3045 v->dpte_row_height_chroma,
3046 v->meta_row_width,
3047 v->meta_row_width_chroma,
3048 v->meta_row_height,
3049 v->meta_row_height_chroma,
3050 v->meta_req_width,
3051 v->meta_req_width_chroma,
3052 v->meta_req_height,
3053 v->meta_req_height_chroma,
3054 v->dpte_group_bytes,
3055 v->PTERequestSizeY,
3056 v->PTERequestSizeC,
3057 v->PixelPTEReqWidthY,
3058 v->PixelPTEReqHeightY,
3059 v->PixelPTEReqWidthC,
3060 v->PixelPTEReqHeightC,
3061 v->dpte_row_width_luma_ub,
3062 v->dpte_row_width_chroma_ub,
3063 v->DST_Y_PER_PTE_ROW_NOM_L,
3064 v->DST_Y_PER_PTE_ROW_NOM_C,
3065 v->DST_Y_PER_META_ROW_NOM_L,
3066 v->DST_Y_PER_META_ROW_NOM_C,
3067 v->TimePerMetaChunkNominal,
3068 v->TimePerChromaMetaChunkNominal,
3069 v->TimePerMetaChunkVBlank,
3070 v->TimePerChromaMetaChunkVBlank,
3071 v->TimePerMetaChunkFlip,
3072 v->TimePerChromaMetaChunkFlip,
3073 v->time_per_pte_group_nom_luma,
3074 v->time_per_pte_group_vblank_luma,
3075 v->time_per_pte_group_flip_luma,
3076 v->time_per_pte_group_nom_chroma,
3077 v->time_per_pte_group_vblank_chroma,
3078 v->time_per_pte_group_flip_chroma);
3079
3080 CalculateVMGroupAndRequestTimes(
3081 v->NumberOfActivePlanes,
3082 v->GPUVMEnable,
3083 v->GPUVMMaxPageTableLevels,
3084 v->HTotal,
3085 v->BytePerPixelC,
3086 v->DestinationLinesToRequestVMInVBlank,
3087 v->DestinationLinesToRequestVMInImmediateFlip,
3088 v->DCCEnable,
3089 v->PixelClock,
3090 v->dpte_row_width_luma_ub,
3091 v->dpte_row_width_chroma_ub,
3092 v->vm_group_bytes,
3093 v->dpde0_bytes_per_frame_ub_l,
3094 v->dpde0_bytes_per_frame_ub_c,
3095 v->meta_pte_bytes_per_frame_ub_l,
3096 v->meta_pte_bytes_per_frame_ub_c,
3097 v->TimePerVMGroupVBlank,
3098 v->TimePerVMGroupFlip,
3099 v->TimePerVMRequestVBlank,
3100 v->TimePerVMRequestFlip);
3101
3102 // Min TTUVBlank
3103 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3104 if (PrefetchMode == 0) {
3105 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3106 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3107 v->MinTTUVBlank[k] = dml_max(
3108 v->DRAMClockChangeWatermark,
3109 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3110 } else if (PrefetchMode == 1) {
3111 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3112 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3113 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3114 } else {
3115 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3116 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3117 v->MinTTUVBlank[k] = v->UrgentWatermark;
3118 }
3119 if (!v->DynamicMetadataEnable[k])
3120 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3121 }
3122
3123 // DCC Configuration
3124 v->ActiveDPPs = 0;
3125 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3126 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3127 v->SourcePixelFormat[k],
3128 v->SurfaceWidthY[k],
3129 v->SurfaceWidthC[k],
3130 v->SurfaceHeightY[k],
3131 v->SurfaceHeightC[k],
3132 v->DETBufferSizeInKByte[0] * 1024,
3133 v->BlockHeight256BytesY[k],
3134 v->BlockHeight256BytesC[k],
3135 v->SurfaceTiling[k],
3136 v->BytePerPixelY[k],
3137 v->BytePerPixelC[k],
3138 v->BytePerPixelDETY[k],
3139 v->BytePerPixelDETC[k],
3140 v->SourceScan[k],
3141 &v->DCCYMaxUncompressedBlock[k],
3142 &v->DCCCMaxUncompressedBlock[k],
3143 &v->DCCYMaxCompressedBlock[k],
3144 &v->DCCCMaxCompressedBlock[k],
3145 &v->DCCYIndependentBlock[k],
3146 &v->DCCCIndependentBlock[k]);
3147 }
3148
3149 // VStartup Adjustment
3150 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3151 bool isInterlaceTiming;
3152 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3153 #ifdef __DML_VBA_DEBUG__
3154 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3155 #endif
3156
3157 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3158
3159 #ifdef __DML_VBA_DEBUG__
3160 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3161 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3162 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3163 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3164 #endif
3165
3166 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3167 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3168 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3169 }
3170
3171 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3172 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3173 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3174 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3175 } else {
3176 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3177 }
3178 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3179 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3180 <= (isInterlaceTiming ?
3181 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3182 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3183 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3184 } else {
3185 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3186 }
3187 #ifdef __DML_VBA_DEBUG__
3188 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3189 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3190 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3191 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3192 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3193 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3194 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3195 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3196 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3197 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3198 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3199 #endif
3200 }
3201
3202 {
3203 //Maximum Bandwidth Used
3204 double TotalWRBandwidth = 0;
3205 double MaxPerPlaneVActiveWRBandwidth = 0;
3206 double WRBandwidth = 0;
3207
3208 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3209 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3210 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3211 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3212 } else if (v->WritebackEnable[k] == true) {
3213 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3214 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3215 }
3216 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3217 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3218 }
3219
3220 v->TotalDataReadBandwidth = 0;
3221 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3222 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3223 }
3224 }
3225 // Stutter Efficiency
3226 CalculateStutterEfficiency(
3227 mode_lib,
3228 v->CompressedBufferSizeInkByte,
3229 v->UnboundedRequestEnabled,
3230 v->ConfigReturnBufferSizeInKByte,
3231 v->MetaFIFOSizeInKEntries,
3232 v->ZeroSizeBufferEntries,
3233 v->NumberOfActivePlanes,
3234 v->ROBBufferSizeInKByte,
3235 v->TotalDataReadBandwidth,
3236 v->DCFCLK,
3237 v->ReturnBW,
3238 v->COMPBUF_RESERVED_SPACE_64B,
3239 v->COMPBUF_RESERVED_SPACE_ZS,
3240 v->SRExitTime,
3241 v->SRExitZ8Time,
3242 v->SynchronizedVBlank,
3243 v->StutterEnterPlusExitWatermark,
3244 v->Z8StutterEnterPlusExitWatermark,
3245 v->ProgressiveToInterlaceUnitInOPP,
3246 v->Interlace,
3247 v->MinTTUVBlank,
3248 v->DPPPerPlane,
3249 v->DETBufferSizeY,
3250 v->BytePerPixelY,
3251 v->BytePerPixelDETY,
3252 v->SwathWidthY,
3253 v->SwathHeightY,
3254 v->SwathHeightC,
3255 v->DCCRateLuma,
3256 v->DCCRateChroma,
3257 v->DCCFractionOfZeroSizeRequestsLuma,
3258 v->DCCFractionOfZeroSizeRequestsChroma,
3259 v->HTotal,
3260 v->VTotal,
3261 v->PixelClock,
3262 v->VRatio,
3263 v->SourceScan,
3264 v->BlockHeight256BytesY,
3265 v->BlockWidth256BytesY,
3266 v->BlockHeight256BytesC,
3267 v->BlockWidth256BytesC,
3268 v->DCCYMaxUncompressedBlock,
3269 v->DCCCMaxUncompressedBlock,
3270 v->VActive,
3271 v->DCCEnable,
3272 v->WritebackEnable,
3273 v->ReadBandwidthPlaneLuma,
3274 v->ReadBandwidthPlaneChroma,
3275 v->meta_row_bw,
3276 v->dpte_row_bw,
3277 &v->StutterEfficiencyNotIncludingVBlank,
3278 &v->StutterEfficiency,
3279 &v->NumberOfStutterBurstsPerFrame,
3280 &v->Z8StutterEfficiencyNotIncludingVBlank,
3281 &v->Z8StutterEfficiency,
3282 &v->Z8NumberOfStutterBurstsPerFrame,
3283 &v->StutterPeriod);
3284 }
3285
3286 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3287 {
3288 struct vba_vars_st *v = &mode_lib->vba;
3289 // Display Pipe Configuration
3290 double BytePerPixDETY[DC__NUM_DPP__MAX];
3291 double BytePerPixDETC[DC__NUM_DPP__MAX];
3292 int BytePerPixY[DC__NUM_DPP__MAX];
3293 int BytePerPixC[DC__NUM_DPP__MAX];
3294 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3295 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3296 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3297 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3298 double dummy1[DC__NUM_DPP__MAX];
3299 double dummy2[DC__NUM_DPP__MAX];
3300 double dummy3[DC__NUM_DPP__MAX];
3301 double dummy4[DC__NUM_DPP__MAX];
3302 int dummy5[DC__NUM_DPP__MAX];
3303 int dummy6[DC__NUM_DPP__MAX];
3304 bool dummy7[DC__NUM_DPP__MAX];
3305 bool dummysinglestring;
3306
3307 unsigned int k;
3308
3309 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3310
3311 CalculateBytePerPixelAnd256BBlockSizes(
3312 v->SourcePixelFormat[k],
3313 v->SurfaceTiling[k],
3314 &BytePerPixY[k],
3315 &BytePerPixC[k],
3316 &BytePerPixDETY[k],
3317 &BytePerPixDETC[k],
3318 &Read256BytesBlockHeightY[k],
3319 &Read256BytesBlockHeightC[k],
3320 &Read256BytesBlockWidthY[k],
3321 &Read256BytesBlockWidthC[k]);
3322 }
3323
3324 CalculateSwathAndDETConfiguration(
3325 false,
3326 v->NumberOfActivePlanes,
3327 v->DETBufferSizeInKByte[0],
3328 dummy1,
3329 dummy2,
3330 v->SourceScan,
3331 v->SourcePixelFormat,
3332 v->SurfaceTiling,
3333 v->ViewportWidth,
3334 v->ViewportHeight,
3335 v->SurfaceWidthY,
3336 v->SurfaceWidthC,
3337 v->SurfaceHeightY,
3338 v->SurfaceHeightC,
3339 Read256BytesBlockHeightY,
3340 Read256BytesBlockHeightC,
3341 Read256BytesBlockWidthY,
3342 Read256BytesBlockWidthC,
3343 v->ODMCombineEnabled,
3344 v->BlendingAndTiming,
3345 BytePerPixY,
3346 BytePerPixC,
3347 BytePerPixDETY,
3348 BytePerPixDETC,
3349 v->HActive,
3350 v->HRatio,
3351 v->HRatioChroma,
3352 v->DPPPerPlane,
3353 dummy5,
3354 dummy6,
3355 dummy3,
3356 dummy4,
3357 v->SwathHeightY,
3358 v->SwathHeightC,
3359 v->DETBufferSizeY,
3360 v->DETBufferSizeC,
3361 dummy7,
3362 &dummysinglestring);
3363 }
3364
3365 static bool CalculateBytePerPixelAnd256BBlockSizes(
3366 enum source_format_class SourcePixelFormat,
3367 enum dm_swizzle_mode SurfaceTiling,
3368 unsigned int *BytePerPixelY,
3369 unsigned int *BytePerPixelC,
3370 double *BytePerPixelDETY,
3371 double *BytePerPixelDETC,
3372 unsigned int *BlockHeight256BytesY,
3373 unsigned int *BlockHeight256BytesC,
3374 unsigned int *BlockWidth256BytesY,
3375 unsigned int *BlockWidth256BytesC)
3376 {
3377 if (SourcePixelFormat == dm_444_64) {
3378 *BytePerPixelDETY = 8;
3379 *BytePerPixelDETC = 0;
3380 *BytePerPixelY = 8;
3381 *BytePerPixelC = 0;
3382 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3383 *BytePerPixelDETY = 4;
3384 *BytePerPixelDETC = 0;
3385 *BytePerPixelY = 4;
3386 *BytePerPixelC = 0;
3387 } else if (SourcePixelFormat == dm_444_16) {
3388 *BytePerPixelDETY = 2;
3389 *BytePerPixelDETC = 0;
3390 *BytePerPixelY = 2;
3391 *BytePerPixelC = 0;
3392 } else if (SourcePixelFormat == dm_444_8) {
3393 *BytePerPixelDETY = 1;
3394 *BytePerPixelDETC = 0;
3395 *BytePerPixelY = 1;
3396 *BytePerPixelC = 0;
3397 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3398 *BytePerPixelDETY = 4;
3399 *BytePerPixelDETC = 1;
3400 *BytePerPixelY = 4;
3401 *BytePerPixelC = 1;
3402 } else if (SourcePixelFormat == dm_420_8) {
3403 *BytePerPixelDETY = 1;
3404 *BytePerPixelDETC = 2;
3405 *BytePerPixelY = 1;
3406 *BytePerPixelC = 2;
3407 } else if (SourcePixelFormat == dm_420_12) {
3408 *BytePerPixelDETY = 2;
3409 *BytePerPixelDETC = 4;
3410 *BytePerPixelY = 2;
3411 *BytePerPixelC = 4;
3412 } else {
3413 *BytePerPixelDETY = 4.0 / 3;
3414 *BytePerPixelDETC = 8.0 / 3;
3415 *BytePerPixelY = 2;
3416 *BytePerPixelC = 4;
3417 }
3418
3419 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3420 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3421 if (SurfaceTiling == dm_sw_linear) {
3422 *BlockHeight256BytesY = 1;
3423 } else if (SourcePixelFormat == dm_444_64) {
3424 *BlockHeight256BytesY = 4;
3425 } else if (SourcePixelFormat == dm_444_8) {
3426 *BlockHeight256BytesY = 16;
3427 } else {
3428 *BlockHeight256BytesY = 8;
3429 }
3430 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3431 *BlockHeight256BytesC = 0;
3432 *BlockWidth256BytesC = 0;
3433 } else {
3434 if (SurfaceTiling == dm_sw_linear) {
3435 *BlockHeight256BytesY = 1;
3436 *BlockHeight256BytesC = 1;
3437 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3438 *BlockHeight256BytesY = 8;
3439 *BlockHeight256BytesC = 16;
3440 } else if (SourcePixelFormat == dm_420_8) {
3441 *BlockHeight256BytesY = 16;
3442 *BlockHeight256BytesC = 8;
3443 } else {
3444 *BlockHeight256BytesY = 8;
3445 *BlockHeight256BytesC = 8;
3446 }
3447 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3448 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3449 }
3450 return true;
3451 }
3452
3453 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3454 {
3455 if (PrefetchMode == 0) {
3456 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3457 } else if (PrefetchMode == 1) {
3458 return dml_max(SREnterPlusExitTime, UrgentLatency);
3459 } else {
3460 return UrgentLatency;
3461 }
3462 }
3463
3464 double dml314_CalculateWriteBackDISPCLK(
3465 enum source_format_class WritebackPixelFormat,
3466 double PixelClock,
3467 double WritebackHRatio,
3468 double WritebackVRatio,
3469 unsigned int WritebackHTaps,
3470 unsigned int WritebackVTaps,
3471 long WritebackSourceWidth,
3472 long WritebackDestinationWidth,
3473 unsigned int HTotal,
3474 unsigned int WritebackLineBufferSize)
3475 {
3476 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3477
3478 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3479 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3480 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3481 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3482 }
3483
3484 static double CalculateWriteBackDelay(
3485 enum source_format_class WritebackPixelFormat,
3486 double WritebackHRatio,
3487 double WritebackVRatio,
3488 unsigned int WritebackVTaps,
3489 int WritebackDestinationWidth,
3490 int WritebackDestinationHeight,
3491 int WritebackSourceHeight,
3492 unsigned int HTotal)
3493 {
3494 double CalculateWriteBackDelay;
3495 double Line_length;
3496 double Output_lines_last_notclamped;
3497 double WritebackVInit;
3498
3499 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3500 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3501 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3502 if (Output_lines_last_notclamped < 0) {
3503 CalculateWriteBackDelay = 0;
3504 } else {
3505 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3506 }
3507 return CalculateWriteBackDelay;
3508 }
3509
3510 static void CalculateVupdateAndDynamicMetadataParameters(
3511 int MaxInterDCNTileRepeaters,
3512 double DPPCLK,
3513 double DISPCLK,
3514 double DCFClkDeepSleep,
3515 double PixelClock,
3516 int HTotal,
3517 int VBlank,
3518 int DynamicMetadataTransmittedBytes,
3519 int DynamicMetadataLinesBeforeActiveRequired,
3520 int InterlaceEnable,
3521 bool ProgressiveToInterlaceUnitInOPP,
3522 double *TSetup,
3523 double *Tdmbf,
3524 double *Tdmec,
3525 double *Tdmsks,
3526 int *VUpdateOffsetPix,
3527 double *VUpdateWidthPix,
3528 double *VReadyOffsetPix)
3529 {
3530 double TotalRepeaterDelayTime;
3531
3532 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3533 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3534 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3535 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3536 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3537 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3538 *Tdmec = HTotal / PixelClock;
3539 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3540 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3541 } else {
3542 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3543 }
3544 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3545 *Tdmsks = *Tdmsks / 2;
3546 }
3547 #ifdef __DML_VBA_DEBUG__
3548 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3549 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3550 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3551 #endif
3552 }
3553
3554 static void CalculateRowBandwidth(
3555 bool GPUVMEnable,
3556 enum source_format_class SourcePixelFormat,
3557 double VRatio,
3558 double VRatioChroma,
3559 bool DCCEnable,
3560 double LineTime,
3561 unsigned int MetaRowByteLuma,
3562 unsigned int MetaRowByteChroma,
3563 unsigned int meta_row_height_luma,
3564 unsigned int meta_row_height_chroma,
3565 unsigned int PixelPTEBytesPerRowLuma,
3566 unsigned int PixelPTEBytesPerRowChroma,
3567 unsigned int dpte_row_height_luma,
3568 unsigned int dpte_row_height_chroma,
3569 double *meta_row_bw,
3570 double *dpte_row_bw)
3571 {
3572 if (DCCEnable != true) {
3573 *meta_row_bw = 0;
3574 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3575 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3576 } else {
3577 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3578 }
3579
3580 if (GPUVMEnable != true) {
3581 *dpte_row_bw = 0;
3582 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3583 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3584 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3585 } else {
3586 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3587 }
3588 }
3589
3590 static void CalculateFlipSchedule(
3591 struct display_mode_lib *mode_lib,
3592 unsigned int k,
3593 double HostVMInefficiencyFactor,
3594 double UrgentExtraLatency,
3595 double UrgentLatency,
3596 double PDEAndMetaPTEBytesPerFrame,
3597 double MetaRowBytes,
3598 double DPTEBytesPerRow)
3599 {
3600 struct vba_vars_st *v = &mode_lib->vba;
3601 double min_row_time = 0.0;
3602 unsigned int HostVMDynamicLevelsTrips;
3603 double TimeForFetchingMetaPTEImmediateFlip;
3604 double TimeForFetchingRowInVBlankImmediateFlip;
3605 double ImmediateFlipBW = 1.0;
3606 double LineTime = v->HTotal[k] / v->PixelClock[k];
3607
3608 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3609 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3610 } else {
3611 HostVMDynamicLevelsTrips = 0;
3612 }
3613
3614 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3615 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3616 }
3617
3618 if (v->GPUVMEnable == true) {
3619 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3620 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3621 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3622 LineTime / 4.0);
3623 } else {
3624 TimeForFetchingMetaPTEImmediateFlip = 0;
3625 }
3626
3627 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3628 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3629 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3630 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3631 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3632 LineTime / 4);
3633 } else {
3634 TimeForFetchingRowInVBlankImmediateFlip = 0;
3635 }
3636
3637 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3638
3639 if (v->GPUVMEnable == true) {
3640 v->final_flip_bw[k] = dml_max(
3641 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3642 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3643 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3644 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3645 } else {
3646 v->final_flip_bw[k] = 0;
3647 }
3648
3649 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3650 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3651 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3652 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3653 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3654 } else {
3655 min_row_time = dml_min4(
3656 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3657 v->meta_row_height[k] * LineTime / v->VRatio[k],
3658 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3659 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3660 }
3661 } else {
3662 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3663 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3664 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3665 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3666 } else {
3667 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3668 }
3669 }
3670
3671 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3672 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3673 v->ImmediateFlipSupportedForPipe[k] = false;
3674 } else {
3675 v->ImmediateFlipSupportedForPipe[k] = true;
3676 }
3677
3678 #ifdef __DML_VBA_DEBUG__
3679 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3680 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3681 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3682 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3683 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3684 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3685 #endif
3686
3687 }
3688
3689 static double TruncToValidBPP(
3690 double LinkBitRate,
3691 int Lanes,
3692 int HTotal,
3693 int HActive,
3694 double PixelClock,
3695 double DesiredBPP,
3696 bool DSCEnable,
3697 enum output_encoder_class Output,
3698 enum output_format_class Format,
3699 unsigned int DSCInputBitPerComponent,
3700 int DSCSlices,
3701 int AudioRate,
3702 int AudioLayout,
3703 enum odm_combine_mode ODMCombine)
3704 {
3705 double MaxLinkBPP;
3706 int MinDSCBPP;
3707 double MaxDSCBPP;
3708 int NonDSCBPP0;
3709 int NonDSCBPP1;
3710 int NonDSCBPP2;
3711
3712 if (Format == dm_420) {
3713 NonDSCBPP0 = 12;
3714 NonDSCBPP1 = 15;
3715 NonDSCBPP2 = 18;
3716 MinDSCBPP = 6;
3717 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3718 } else if (Format == dm_444) {
3719 NonDSCBPP0 = 24;
3720 NonDSCBPP1 = 30;
3721 NonDSCBPP2 = 36;
3722 MinDSCBPP = 8;
3723 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3724 } else {
3725
3726 NonDSCBPP0 = 16;
3727 NonDSCBPP1 = 20;
3728 NonDSCBPP2 = 24;
3729
3730 if (Format == dm_n422) {
3731 MinDSCBPP = 7;
3732 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3733 } else {
3734 MinDSCBPP = 8;
3735 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3736 }
3737 }
3738
3739 if (DSCEnable && Output == dm_dp) {
3740 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3741 } else {
3742 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3743 }
3744
3745 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3746 MaxLinkBPP = 16;
3747 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3748 MaxLinkBPP = 32;
3749 }
3750
3751 if (DesiredBPP == 0) {
3752 if (DSCEnable) {
3753 if (MaxLinkBPP < MinDSCBPP) {
3754 return BPP_INVALID;
3755 } else if (MaxLinkBPP >= MaxDSCBPP) {
3756 return MaxDSCBPP;
3757 } else {
3758 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3759 }
3760 } else {
3761 if (MaxLinkBPP >= NonDSCBPP2) {
3762 return NonDSCBPP2;
3763 } else if (MaxLinkBPP >= NonDSCBPP1) {
3764 return NonDSCBPP1;
3765 } else if (MaxLinkBPP >= NonDSCBPP0) {
3766 return 16.0;
3767 } else {
3768 return BPP_INVALID;
3769 }
3770 }
3771 } else {
3772 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3773 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3774 return BPP_INVALID;
3775 } else {
3776 return DesiredBPP;
3777 }
3778 }
3779 }
3780
3781 static noinline void CalculatePrefetchSchedulePerPlane(
3782 struct display_mode_lib *mode_lib,
3783 double HostVMInefficiencyFactor,
3784 int i,
3785 unsigned int j,
3786 unsigned int k)
3787 {
3788 struct vba_vars_st *v = &mode_lib->vba;
3789 Pipe myPipe;
3790
3791 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3792 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3793 myPipe.PixelClock = v->PixelClock[k];
3794 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3795 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3796 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3797 myPipe.VRatio = mode_lib->vba.VRatio[k];
3798 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3799
3800 myPipe.SourceScan = v->SourceScan[k];
3801 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3802 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3803 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3804 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3805 myPipe.InterlaceEnable = v->Interlace[k];
3806 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3807 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3808 myPipe.HTotal = v->HTotal[k];
3809 myPipe.DCCEnable = v->DCCEnable[k];
3810 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3811 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3812 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3813 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3814 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3815 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3816 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3817 mode_lib,
3818 HostVMInefficiencyFactor,
3819 &myPipe,
3820 v->DSCDelayPerState[i][k],
3821 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3822 v->DPPCLKDelaySCL,
3823 v->DPPCLKDelaySCLLBOnly,
3824 v->DPPCLKDelayCNVCCursor,
3825 v->DISPCLKDelaySubtotal,
3826 v->SwathWidthYThisState[k] / v->HRatio[k],
3827 v->OutputFormat[k],
3828 v->MaxInterDCNTileRepeaters,
3829 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3830 v->MaximumVStartup[i][j][k],
3831 v->GPUVMMaxPageTableLevels,
3832 v->GPUVMEnable,
3833 v->HostVMEnable,
3834 v->HostVMMaxNonCachedPageTableLevels,
3835 v->HostVMMinPageSize,
3836 v->DynamicMetadataEnable[k],
3837 v->DynamicMetadataVMEnabled,
3838 v->DynamicMetadataLinesBeforeActiveRequired[k],
3839 v->DynamicMetadataTransmittedBytes[k],
3840 v->UrgLatency[i],
3841 v->ExtraLatency,
3842 v->TimeCalc,
3843 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3844 v->MetaRowBytes[i][j][k],
3845 v->DPTEBytesPerRow[i][j][k],
3846 v->PrefetchLinesY[i][j][k],
3847 v->SwathWidthYThisState[k],
3848 v->PrefillY[k],
3849 v->MaxNumSwY[k],
3850 v->PrefetchLinesC[i][j][k],
3851 v->SwathWidthCThisState[k],
3852 v->PrefillC[k],
3853 v->MaxNumSwC[k],
3854 v->swath_width_luma_ub_this_state[k],
3855 v->swath_width_chroma_ub_this_state[k],
3856 v->SwathHeightYThisState[k],
3857 v->SwathHeightCThisState[k],
3858 v->TWait,
3859 &v->DSTXAfterScaler[k],
3860 &v->DSTYAfterScaler[k],
3861 &v->LineTimesForPrefetch[k],
3862 &v->PrefetchBW[k],
3863 &v->LinesForMetaPTE[k],
3864 &v->LinesForMetaAndDPTERow[k],
3865 &v->VRatioPreY[i][j][k],
3866 &v->VRatioPreC[i][j][k],
3867 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3868 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3869 &v->NoTimeForDynamicMetadata[i][j][k],
3870 &v->Tno_bw[k],
3871 &v->prefetch_vmrow_bw[k],
3872 &v->dummy7[k],
3873 &v->dummy8[k],
3874 &v->dummy13[k],
3875 &v->VUpdateOffsetPix[k],
3876 &v->VUpdateWidthPix[k],
3877 &v->VReadyOffsetPix[k]);
3878 }
3879
3880 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3881 {
3882 struct vba_vars_st *v = &mode_lib->vba;
3883
3884 int i, j;
3885 unsigned int k, m;
3886 int ReorderingBytes;
3887 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3888 bool NoChroma = true;
3889 bool EnoughWritebackUnits = true;
3890 bool P2IWith420 = false;
3891 bool DSCOnlyIfNecessaryWithBPP = false;
3892 bool DSC422NativeNotSupported = false;
3893 double MaxTotalVActiveRDBandwidth;
3894 bool ViewportExceedsSurface = false;
3895 bool FMTBufferExceeded = false;
3896
3897 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3898
3899 CalculateMinAndMaxPrefetchMode(
3900 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3901 &MinPrefetchMode, &MaxPrefetchMode);
3902
3903 /*Scale Ratio, taps Support Check*/
3904
3905 v->ScaleRatioAndTapsSupport = true;
3906 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3907 if (v->ScalerEnabled[k] == false
3908 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3909 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3910 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3911 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3912 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3913 v->ScaleRatioAndTapsSupport = false;
3914 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3915 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3916 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3917 || v->VRatio[k] > v->vtaps[k]
3918 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3919 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3920 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3921 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3922 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3923 || v->HRatioChroma[k] > v->MaxHSCLRatio
3924 || v->VRatioChroma[k] > v->MaxVSCLRatio
3925 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3926 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3927 v->ScaleRatioAndTapsSupport = false;
3928 }
3929 }
3930 /*Source Format, Pixel Format and Scan Support Check*/
3931
3932 v->SourceFormatPixelAndScanSupport = true;
3933 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3934 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3935 v->SourceFormatPixelAndScanSupport = false;
3936 }
3937 }
3938 /*Bandwidth Support Check*/
3939
3940 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3941 CalculateBytePerPixelAnd256BBlockSizes(
3942 v->SourcePixelFormat[k],
3943 v->SurfaceTiling[k],
3944 &v->BytePerPixelY[k],
3945 &v->BytePerPixelC[k],
3946 &v->BytePerPixelInDETY[k],
3947 &v->BytePerPixelInDETC[k],
3948 &v->Read256BlockHeightY[k],
3949 &v->Read256BlockHeightC[k],
3950 &v->Read256BlockWidthY[k],
3951 &v->Read256BlockWidthC[k]);
3952 }
3953 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3954 if (v->SourceScan[k] != dm_vert) {
3955 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3956 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3957 } else {
3958 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3959 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3960 }
3961 }
3962 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3963 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3964 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3965 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3966 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3967 }
3968 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3969 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3970 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3971 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3972 } else if (v->WritebackEnable[k] == true) {
3973 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3974 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3975 } else {
3976 v->WriteBandwidth[k] = 0.0;
3977 }
3978 }
3979
3980 /*Writeback Latency support check*/
3981
3982 v->WritebackLatencySupport = true;
3983 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3984 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3985 v->WritebackLatencySupport = false;
3986 }
3987 }
3988
3989 /*Writeback Mode Support Check*/
3990
3991 v->TotalNumberOfActiveWriteback = 0;
3992 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3993 if (v->WritebackEnable[k] == true) {
3994 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3995 }
3996 }
3997
3998 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3999 EnoughWritebackUnits = false;
4000 }
4001
4002 /*Writeback Scale Ratio and Taps Support Check*/
4003
4004 v->WritebackScaleRatioAndTapsSupport = true;
4005 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4006 if (v->WritebackEnable[k] == true) {
4007 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4008 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4009 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4010 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4011 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4012 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4013 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4014 v->WritebackScaleRatioAndTapsSupport = false;
4015 }
4016 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4017 v->WritebackScaleRatioAndTapsSupport = false;
4018 }
4019 }
4020 }
4021 /*Maximum DISPCLK/DPPCLK Support check*/
4022
4023 v->WritebackRequiredDISPCLK = 0.0;
4024 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4025 if (v->WritebackEnable[k] == true) {
4026 v->WritebackRequiredDISPCLK = dml_max(
4027 v->WritebackRequiredDISPCLK,
4028 dml314_CalculateWriteBackDISPCLK(
4029 v->WritebackPixelFormat[k],
4030 v->PixelClock[k],
4031 v->WritebackHRatio[k],
4032 v->WritebackVRatio[k],
4033 v->WritebackHTaps[k],
4034 v->WritebackVTaps[k],
4035 v->WritebackSourceWidth[k],
4036 v->WritebackDestinationWidth[k],
4037 v->HTotal[k],
4038 v->WritebackLineBufferSize));
4039 }
4040 }
4041 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4042 if (v->HRatio[k] > 1.0) {
4043 v->PSCL_FACTOR[k] = dml_min(
4044 v->MaxDCHUBToPSCLThroughput,
4045 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4046 } else {
4047 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4048 }
4049 if (v->BytePerPixelC[k] == 0.0) {
4050 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4051 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4052 * dml_max3(
4053 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4054 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4055 1.0);
4056 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4057 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4058 }
4059 } else {
4060 if (v->HRatioChroma[k] > 1.0) {
4061 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4062 v->MaxDCHUBToPSCLThroughput,
4063 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4064 } else {
4065 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4066 }
4067 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4068 * dml_max5(
4069 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4070 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4071 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4072 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4073 1.0);
4074 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4075 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4076 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4077 }
4078 }
4079 }
4080 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4081 int MaximumSwathWidthSupportLuma;
4082 int MaximumSwathWidthSupportChroma;
4083
4084 if (v->SurfaceTiling[k] == dm_sw_linear) {
4085 MaximumSwathWidthSupportLuma = 8192.0;
4086 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4087 MaximumSwathWidthSupportLuma = 2880.0;
4088 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4089 MaximumSwathWidthSupportLuma = 3840.0;
4090 } else {
4091 MaximumSwathWidthSupportLuma = 5760.0;
4092 }
4093
4094 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4095 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4096 } else {
4097 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4098 }
4099 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4100 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4101 if (v->BytePerPixelC[k] == 0.0) {
4102 v->MaximumSwathWidthInLineBufferChroma = 0;
4103 } else {
4104 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4105 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4106 }
4107 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4108 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4109 }
4110
4111 CalculateSwathAndDETConfiguration(
4112 true,
4113 v->NumberOfActivePlanes,
4114 v->DETBufferSizeInKByte[0],
4115 v->MaximumSwathWidthLuma,
4116 v->MaximumSwathWidthChroma,
4117 v->SourceScan,
4118 v->SourcePixelFormat,
4119 v->SurfaceTiling,
4120 v->ViewportWidth,
4121 v->ViewportHeight,
4122 v->SurfaceWidthY,
4123 v->SurfaceWidthC,
4124 v->SurfaceHeightY,
4125 v->SurfaceHeightC,
4126 v->Read256BlockHeightY,
4127 v->Read256BlockHeightC,
4128 v->Read256BlockWidthY,
4129 v->Read256BlockWidthC,
4130 v->odm_combine_dummy,
4131 v->BlendingAndTiming,
4132 v->BytePerPixelY,
4133 v->BytePerPixelC,
4134 v->BytePerPixelInDETY,
4135 v->BytePerPixelInDETC,
4136 v->HActive,
4137 v->HRatio,
4138 v->HRatioChroma,
4139 v->NoOfDPPThisState,
4140 v->swath_width_luma_ub_this_state,
4141 v->swath_width_chroma_ub_this_state,
4142 v->SwathWidthYThisState,
4143 v->SwathWidthCThisState,
4144 v->SwathHeightYThisState,
4145 v->SwathHeightCThisState,
4146 v->DETBufferSizeYThisState,
4147 v->DETBufferSizeCThisState,
4148 v->SingleDPPViewportSizeSupportPerPlane,
4149 &v->ViewportSizeSupport[0][0]);
4150
4151 for (i = 0; i < v->soc.num_states; i++) {
4152 for (j = 0; j < 2; j++) {
4153 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4154 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4155 v->RequiredDISPCLK[i][j] = 0.0;
4156 v->DISPCLK_DPPCLK_Support[i][j] = true;
4157 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4158 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4159 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4160 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4161 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4162 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4163 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4164 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4165 }
4166 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4167 * (1 + v->DISPCLKRampingMargin / 100.0);
4168 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4169 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4170 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4171 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4172 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4173 }
4174 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4175 * (1 + v->DISPCLKRampingMargin / 100.0);
4176 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4177 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4178 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4179 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4180 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4181 }
4182
4183 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4184 || !(v->Output[k] == dm_dp ||
4185 v->Output[k] == dm_dp2p0 ||
4186 v->Output[k] == dm_edp)) {
4187 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4188 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4189
4190 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4191 FMTBufferExceeded = true;
4192 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4193 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4194 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4195 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4196 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4197 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4198 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4199 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4200 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4201 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4202 } else {
4203 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4204 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4205 }
4206 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4207 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4208 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4211 } else {
4212 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4213 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4214 }
4215 }
4216 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4217 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4218 if (v->Output[k] == dm_hdmi) {
4219 FMTBufferExceeded = true;
4220 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4221 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4222 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4223
4224 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4225 FMTBufferExceeded = true;
4226 } else {
4227 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4228 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4229 }
4230 }
4231 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4232 v->MPCCombine[i][j][k] = false;
4233 v->NoOfDPP[i][j][k] = 4;
4234 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4235 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4236 v->MPCCombine[i][j][k] = false;
4237 v->NoOfDPP[i][j][k] = 2;
4238 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4239 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4240 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4241 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4242 v->MPCCombine[i][j][k] = false;
4243 v->NoOfDPP[i][j][k] = 1;
4244 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4245 } else {
4246 v->MPCCombine[i][j][k] = true;
4247 v->NoOfDPP[i][j][k] = 2;
4248 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4249 }
4250 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4251 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4252 > v->MaxDppclkRoundedDownToDFSGranularity)
4253 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4254 v->DISPCLK_DPPCLK_Support[i][j] = false;
4255 }
4256 }
4257 v->TotalNumberOfActiveDPP[i][j] = 0;
4258 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4259 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4260 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4261 if (v->NoOfDPP[i][j][k] == 1)
4262 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4263 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4264 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4265 NoChroma = false;
4266 }
4267
4268 // UPTO
4269 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4270 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4271 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4272 double BWOfNonSplitPlaneOfMaximumBandwidth;
4273 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4274
4275 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4276 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4277 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4278 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4279 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4280 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4281 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4282 }
4283 }
4284 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4285 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4286 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4287 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4288 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4289 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4290 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4291 }
4292 }
4293 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4294 v->RequiredDISPCLK[i][j] = 0.0;
4295 v->DISPCLK_DPPCLK_Support[i][j] = true;
4296 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4297 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4298 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4299 v->MPCCombine[i][j][k] = true;
4300 v->NoOfDPP[i][j][k] = 2;
4301 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4302 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4303 } else {
4304 v->MPCCombine[i][j][k] = false;
4305 v->NoOfDPP[i][j][k] = 1;
4306 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4307 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4308 }
4309 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4310 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4311 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4312 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4313 } else {
4314 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4315 }
4316 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4317 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4318 > v->MaxDppclkRoundedDownToDFSGranularity)
4319 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4320 v->DISPCLK_DPPCLK_Support[i][j] = false;
4321 }
4322 }
4323 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4324 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4325 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4326 }
4327 }
4328 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4329 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4330 v->DISPCLK_DPPCLK_Support[i][j] = false;
4331 }
4332 }
4333 }
4334
4335 /*Total Available Pipes Support Check*/
4336
4337 for (i = 0; i < v->soc.num_states; i++) {
4338 for (j = 0; j < 2; j++) {
4339 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4340 v->TotalAvailablePipesSupport[i][j] = true;
4341 } else {
4342 v->TotalAvailablePipesSupport[i][j] = false;
4343 }
4344 }
4345 }
4346 /*Display IO and DSC Support Check*/
4347
4348 v->NonsupportedDSCInputBPC = false;
4349 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4350 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4351 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4352 v->NonsupportedDSCInputBPC = true;
4353 }
4354 }
4355
4356 /*Number Of DSC Slices*/
4357 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4358 if (v->BlendingAndTiming[k] == k) {
4359 if (v->PixelClockBackEnd[k] > 3200) {
4360 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4361 } else if (v->PixelClockBackEnd[k] > 1360) {
4362 v->NumberOfDSCSlices[k] = 8;
4363 } else if (v->PixelClockBackEnd[k] > 680) {
4364 v->NumberOfDSCSlices[k] = 4;
4365 } else if (v->PixelClockBackEnd[k] > 340) {
4366 v->NumberOfDSCSlices[k] = 2;
4367 } else {
4368 v->NumberOfDSCSlices[k] = 1;
4369 }
4370 } else {
4371 v->NumberOfDSCSlices[k] = 0;
4372 }
4373 }
4374
4375 for (i = 0; i < v->soc.num_states; i++) {
4376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4377 v->RequiresDSC[i][k] = false;
4378 v->RequiresFEC[i][k] = false;
4379 if (v->BlendingAndTiming[k] == k) {
4380 if (v->Output[k] == dm_hdmi) {
4381 v->RequiresDSC[i][k] = false;
4382 v->RequiresFEC[i][k] = false;
4383 v->OutputBppPerState[i][k] = TruncToValidBPP(
4384 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4385 3,
4386 v->HTotal[k],
4387 v->HActive[k],
4388 v->PixelClockBackEnd[k],
4389 v->ForcedOutputLinkBPP[k],
4390 false,
4391 v->Output[k],
4392 v->OutputFormat[k],
4393 v->DSCInputBitPerComponent[k],
4394 v->NumberOfDSCSlices[k],
4395 v->AudioSampleRate[k],
4396 v->AudioSampleLayout[k],
4397 v->ODMCombineEnablePerState[i][k]);
4398 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4399 if (v->DSCEnable[k] == true) {
4400 v->RequiresDSC[i][k] = true;
4401 v->LinkDSCEnable = true;
4402 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4403 v->RequiresFEC[i][k] = true;
4404 } else {
4405 v->RequiresFEC[i][k] = false;
4406 }
4407 } else {
4408 v->RequiresDSC[i][k] = false;
4409 v->LinkDSCEnable = false;
4410 if (v->Output[k] == dm_dp2p0) {
4411 v->RequiresFEC[i][k] = true;
4412 } else {
4413 v->RequiresFEC[i][k] = false;
4414 }
4415 }
4416 if (v->Output[k] == dm_dp2p0) {
4417 v->Outbpp = BPP_INVALID;
4418 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4419 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4420 v->Outbpp = TruncToValidBPP(
4421 (1.0 - v->Downspreading / 100.0) * 10000,
4422 v->OutputLinkDPLanes[k],
4423 v->HTotal[k],
4424 v->HActive[k],
4425 v->PixelClockBackEnd[k],
4426 v->ForcedOutputLinkBPP[k],
4427 v->LinkDSCEnable,
4428 v->Output[k],
4429 v->OutputFormat[k],
4430 v->DSCInputBitPerComponent[k],
4431 v->NumberOfDSCSlices[k],
4432 v->AudioSampleRate[k],
4433 v->AudioSampleLayout[k],
4434 v->ODMCombineEnablePerState[i][k]);
4435 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4436 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4437 v->RequiresDSC[i][k] = true;
4438 v->LinkDSCEnable = true;
4439 v->Outbpp = TruncToValidBPP(
4440 (1.0 - v->Downspreading / 100.0) * 10000,
4441 v->OutputLinkDPLanes[k],
4442 v->HTotal[k],
4443 v->HActive[k],
4444 v->PixelClockBackEnd[k],
4445 v->ForcedOutputLinkBPP[k],
4446 v->LinkDSCEnable,
4447 v->Output[k],
4448 v->OutputFormat[k],
4449 v->DSCInputBitPerComponent[k],
4450 v->NumberOfDSCSlices[k],
4451 v->AudioSampleRate[k],
4452 v->AudioSampleLayout[k],
4453 v->ODMCombineEnablePerState[i][k]);
4454 }
4455 v->OutputBppPerState[i][k] = v->Outbpp;
4456 // TODO: Need some other way to handle this nonsense
4457 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4458 }
4459 if (v->Outbpp == BPP_INVALID &&
4460 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4461 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4462 v->Outbpp = TruncToValidBPP(
4463 (1.0 - v->Downspreading / 100.0) * 13500,
4464 v->OutputLinkDPLanes[k],
4465 v->HTotal[k],
4466 v->HActive[k],
4467 v->PixelClockBackEnd[k],
4468 v->ForcedOutputLinkBPP[k],
4469 v->LinkDSCEnable,
4470 v->Output[k],
4471 v->OutputFormat[k],
4472 v->DSCInputBitPerComponent[k],
4473 v->NumberOfDSCSlices[k],
4474 v->AudioSampleRate[k],
4475 v->AudioSampleLayout[k],
4476 v->ODMCombineEnablePerState[i][k]);
4477 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4478 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4479 v->RequiresDSC[i][k] = true;
4480 v->LinkDSCEnable = true;
4481 v->Outbpp = TruncToValidBPP(
4482 (1.0 - v->Downspreading / 100.0) * 13500,
4483 v->OutputLinkDPLanes[k],
4484 v->HTotal[k],
4485 v->HActive[k],
4486 v->PixelClockBackEnd[k],
4487 v->ForcedOutputLinkBPP[k],
4488 v->LinkDSCEnable,
4489 v->Output[k],
4490 v->OutputFormat[k],
4491 v->DSCInputBitPerComponent[k],
4492 v->NumberOfDSCSlices[k],
4493 v->AudioSampleRate[k],
4494 v->AudioSampleLayout[k],
4495 v->ODMCombineEnablePerState[i][k]);
4496 }
4497 v->OutputBppPerState[i][k] = v->Outbpp;
4498 // TODO: Need some other way to handle this nonsense
4499 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4500 }
4501 if (v->Outbpp == BPP_INVALID &&
4502 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4503 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4504 v->Outbpp = TruncToValidBPP(
4505 (1.0 - v->Downspreading / 100.0) * 20000,
4506 v->OutputLinkDPLanes[k],
4507 v->HTotal[k],
4508 v->HActive[k],
4509 v->PixelClockBackEnd[k],
4510 v->ForcedOutputLinkBPP[k],
4511 v->LinkDSCEnable,
4512 v->Output[k],
4513 v->OutputFormat[k],
4514 v->DSCInputBitPerComponent[k],
4515 v->NumberOfDSCSlices[k],
4516 v->AudioSampleRate[k],
4517 v->AudioSampleLayout[k],
4518 v->ODMCombineEnablePerState[i][k]);
4519 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4520 v->ForcedOutputLinkBPP[k] == 0) {
4521 v->RequiresDSC[i][k] = true;
4522 v->LinkDSCEnable = true;
4523 v->Outbpp = TruncToValidBPP(
4524 (1.0 - v->Downspreading / 100.0) * 20000,
4525 v->OutputLinkDPLanes[k],
4526 v->HTotal[k],
4527 v->HActive[k],
4528 v->PixelClockBackEnd[k],
4529 v->ForcedOutputLinkBPP[k],
4530 v->LinkDSCEnable,
4531 v->Output[k],
4532 v->OutputFormat[k],
4533 v->DSCInputBitPerComponent[k],
4534 v->NumberOfDSCSlices[k],
4535 v->AudioSampleRate[k],
4536 v->AudioSampleLayout[k],
4537 v->ODMCombineEnablePerState[i][k]);
4538 }
4539 v->OutputBppPerState[i][k] = v->Outbpp;
4540 // TODO: Need some other way to handle this nonsense
4541 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4542 }
4543 } else {
4544 v->Outbpp = BPP_INVALID;
4545 if (v->PHYCLKPerState[i] >= 270.0) {
4546 v->Outbpp = TruncToValidBPP(
4547 (1.0 - v->Downspreading / 100.0) * 2700,
4548 v->OutputLinkDPLanes[k],
4549 v->HTotal[k],
4550 v->HActive[k],
4551 v->PixelClockBackEnd[k],
4552 v->ForcedOutputLinkBPP[k],
4553 v->LinkDSCEnable,
4554 v->Output[k],
4555 v->OutputFormat[k],
4556 v->DSCInputBitPerComponent[k],
4557 v->NumberOfDSCSlices[k],
4558 v->AudioSampleRate[k],
4559 v->AudioSampleLayout[k],
4560 v->ODMCombineEnablePerState[i][k]);
4561 v->OutputBppPerState[i][k] = v->Outbpp;
4562 // TODO: Need some other way to handle this nonsense
4563 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4564 }
4565 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4566 v->Outbpp = TruncToValidBPP(
4567 (1.0 - v->Downspreading / 100.0) * 5400,
4568 v->OutputLinkDPLanes[k],
4569 v->HTotal[k],
4570 v->HActive[k],
4571 v->PixelClockBackEnd[k],
4572 v->ForcedOutputLinkBPP[k],
4573 v->LinkDSCEnable,
4574 v->Output[k],
4575 v->OutputFormat[k],
4576 v->DSCInputBitPerComponent[k],
4577 v->NumberOfDSCSlices[k],
4578 v->AudioSampleRate[k],
4579 v->AudioSampleLayout[k],
4580 v->ODMCombineEnablePerState[i][k]);
4581 v->OutputBppPerState[i][k] = v->Outbpp;
4582 // TODO: Need some other way to handle this nonsense
4583 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4584 }
4585 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4586 v->Outbpp = TruncToValidBPP(
4587 (1.0 - v->Downspreading / 100.0) * 8100,
4588 v->OutputLinkDPLanes[k],
4589 v->HTotal[k],
4590 v->HActive[k],
4591 v->PixelClockBackEnd[k],
4592 v->ForcedOutputLinkBPP[k],
4593 v->LinkDSCEnable,
4594 v->Output[k],
4595 v->OutputFormat[k],
4596 v->DSCInputBitPerComponent[k],
4597 v->NumberOfDSCSlices[k],
4598 v->AudioSampleRate[k],
4599 v->AudioSampleLayout[k],
4600 v->ODMCombineEnablePerState[i][k]);
4601 v->OutputBppPerState[i][k] = v->Outbpp;
4602 // TODO: Need some other way to handle this nonsense
4603 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4604 }
4605 }
4606 }
4607 } else {
4608 v->OutputBppPerState[i][k] = 0;
4609 }
4610 }
4611 }
4612
4613 for (i = 0; i < v->soc.num_states; i++) {
4614 v->LinkCapacitySupport[i] = true;
4615 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4616 if (v->BlendingAndTiming[k] == k
4617 && (v->Output[k] == dm_dp ||
4618 v->Output[k] == dm_edp ||
4619 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4620 v->LinkCapacitySupport[i] = false;
4621 }
4622 }
4623 }
4624
4625 // UPTO 2172
4626 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4627 if (v->BlendingAndTiming[k] == k
4628 && (v->Output[k] == dm_dp ||
4629 v->Output[k] == dm_edp ||
4630 v->Output[k] == dm_hdmi)) {
4631 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4632 P2IWith420 = true;
4633 }
4634 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4635 && !v->DSC422NativeSupport) {
4636 DSC422NativeNotSupported = true;
4637 }
4638 }
4639 }
4640
4641
4642 for (i = 0; i < v->soc.num_states; ++i) {
4643 v->ODMCombine4To1SupportCheckOK[i] = true;
4644 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4645 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4646 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4647 || v->Output[k] == dm_hdmi)) {
4648 v->ODMCombine4To1SupportCheckOK[i] = false;
4649 }
4650 }
4651 }
4652
4653 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4654
4655 for (i = 0; i < v->soc.num_states; i++) {
4656 v->NotEnoughDSCUnits[i] = false;
4657 v->TotalDSCUnitsRequired = 0.0;
4658 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4659 if (v->RequiresDSC[i][k] == true) {
4660 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4661 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4662 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4663 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4664 } else {
4665 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4666 }
4667 }
4668 }
4669 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4670 v->NotEnoughDSCUnits[i] = true;
4671 }
4672 }
4673 /*DSC Delay per state*/
4674
4675 for (i = 0; i < v->soc.num_states; i++) {
4676 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4677 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4678 v->BPP = 0.0;
4679 } else {
4680 v->BPP = v->OutputBppPerState[i][k];
4681 }
4682 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4683 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4684 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4685 v->DSCInputBitPerComponent[k],
4686 v->BPP,
4687 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4688 v->NumberOfDSCSlices[k],
4689 v->OutputFormat[k],
4690 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4691 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4692 v->DSCDelayPerState[i][k] = 2.0
4693 * (dscceComputeDelay(
4694 v->DSCInputBitPerComponent[k],
4695 v->BPP,
4696 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4697 v->NumberOfDSCSlices[k] / 2,
4698 v->OutputFormat[k],
4699 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4700 } else {
4701 v->DSCDelayPerState[i][k] = 4.0
4702 * (dscceComputeDelay(
4703 v->DSCInputBitPerComponent[k],
4704 v->BPP,
4705 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4706 v->NumberOfDSCSlices[k] / 4,
4707 v->OutputFormat[k],
4708 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4709 }
4710 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelayPerState[i][k] / v->HActive[k], 1.0);
4711 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4712 } else {
4713 v->DSCDelayPerState[i][k] = 0.0;
4714 }
4715 }
4716 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4717 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4718 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4719 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4720 }
4721 }
4722 }
4723 }
4724
4725 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4726 //
4727 for (i = 0; i < v->soc.num_states; ++i) {
4728 for (j = 0; j <= 1; ++j) {
4729 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4730 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4731 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4732 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4733 }
4734
4735 CalculateSwathAndDETConfiguration(
4736 false,
4737 v->NumberOfActivePlanes,
4738 v->DETBufferSizeInKByte[0],
4739 v->MaximumSwathWidthLuma,
4740 v->MaximumSwathWidthChroma,
4741 v->SourceScan,
4742 v->SourcePixelFormat,
4743 v->SurfaceTiling,
4744 v->ViewportWidth,
4745 v->ViewportHeight,
4746 v->SurfaceWidthY,
4747 v->SurfaceWidthC,
4748 v->SurfaceHeightY,
4749 v->SurfaceHeightC,
4750 v->Read256BlockHeightY,
4751 v->Read256BlockHeightC,
4752 v->Read256BlockWidthY,
4753 v->Read256BlockWidthC,
4754 v->ODMCombineEnableThisState,
4755 v->BlendingAndTiming,
4756 v->BytePerPixelY,
4757 v->BytePerPixelC,
4758 v->BytePerPixelInDETY,
4759 v->BytePerPixelInDETC,
4760 v->HActive,
4761 v->HRatio,
4762 v->HRatioChroma,
4763 v->NoOfDPPThisState,
4764 v->swath_width_luma_ub_this_state,
4765 v->swath_width_chroma_ub_this_state,
4766 v->SwathWidthYThisState,
4767 v->SwathWidthCThisState,
4768 v->SwathHeightYThisState,
4769 v->SwathHeightCThisState,
4770 v->DETBufferSizeYThisState,
4771 v->DETBufferSizeCThisState,
4772 v->dummystring,
4773 &v->ViewportSizeSupport[i][j]);
4774
4775 CalculateDCFCLKDeepSleep(
4776 mode_lib,
4777 v->NumberOfActivePlanes,
4778 v->BytePerPixelY,
4779 v->BytePerPixelC,
4780 v->VRatio,
4781 v->VRatioChroma,
4782 v->SwathWidthYThisState,
4783 v->SwathWidthCThisState,
4784 v->NoOfDPPThisState,
4785 v->HRatio,
4786 v->HRatioChroma,
4787 v->PixelClock,
4788 v->PSCL_FACTOR,
4789 v->PSCL_FACTOR_CHROMA,
4790 v->RequiredDPPCLKThisState,
4791 v->ReadBandwidthLuma,
4792 v->ReadBandwidthChroma,
4793 v->ReturnBusWidth,
4794 &v->ProjectedDCFCLKDeepSleep[i][j]);
4795
4796 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4797 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4798 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4799 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4800 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4801 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4802 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4803 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4804 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4805 }
4806 }
4807 }
4808
4809 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4810 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4811 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4812 }
4813
4814 for (i = 0; i < v->soc.num_states; i++) {
4815 for (j = 0; j < 2; j++) {
4816 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4817
4818 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4819 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4820 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4821 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4822 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4823 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4824 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4825 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4826 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4827 }
4828
4829 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4830 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4831 if (v->DCCEnable[k] == true) {
4832 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4833 }
4834 }
4835
4836 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4837 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4838 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4839
4840 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4841 && v->SourceScan[k] != dm_vert) {
4842 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4843 / 2;
4844 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4845 } else {
4846 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4847 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4848 }
4849
4850 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4851 mode_lib,
4852 v->DCCEnable[k],
4853 v->Read256BlockHeightC[k],
4854 v->Read256BlockWidthC[k],
4855 v->SourcePixelFormat[k],
4856 v->SurfaceTiling[k],
4857 v->BytePerPixelC[k],
4858 v->SourceScan[k],
4859 v->SwathWidthCThisState[k],
4860 v->ViewportHeightChroma[k],
4861 v->GPUVMEnable,
4862 v->HostVMEnable,
4863 v->HostVMMaxNonCachedPageTableLevels,
4864 v->GPUVMMinPageSize,
4865 v->HostVMMinPageSize,
4866 v->PTEBufferSizeInRequestsForChroma,
4867 v->PitchC[k],
4868 0.0,
4869 &v->MacroTileWidthC[k],
4870 &v->MetaRowBytesC,
4871 &v->DPTEBytesPerRowC,
4872 &v->PTEBufferSizeNotExceededC[i][j][k],
4873 &v->dummyinteger7,
4874 &v->dpte_row_height_chroma[k],
4875 &v->dummyinteger28,
4876 &v->dummyinteger26,
4877 &v->dummyinteger23,
4878 &v->meta_row_height_chroma[k],
4879 &v->dummyinteger8,
4880 &v->dummyinteger9,
4881 &v->dummyinteger19,
4882 &v->dummyinteger20,
4883 &v->dummyinteger17,
4884 &v->dummyinteger10,
4885 &v->dummyinteger11);
4886
4887 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4888 mode_lib,
4889 v->VRatioChroma[k],
4890 v->VTAPsChroma[k],
4891 v->Interlace[k],
4892 v->ProgressiveToInterlaceUnitInOPP,
4893 v->SwathHeightCThisState[k],
4894 v->ViewportYStartC[k],
4895 &v->PrefillC[k],
4896 &v->MaxNumSwC[k]);
4897 } else {
4898 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4899 v->PTEBufferSizeInRequestsForChroma = 0;
4900 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4901 v->MetaRowBytesC = 0.0;
4902 v->DPTEBytesPerRowC = 0.0;
4903 v->PrefetchLinesC[i][j][k] = 0.0;
4904 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4905 }
4906 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4907 mode_lib,
4908 v->DCCEnable[k],
4909 v->Read256BlockHeightY[k],
4910 v->Read256BlockWidthY[k],
4911 v->SourcePixelFormat[k],
4912 v->SurfaceTiling[k],
4913 v->BytePerPixelY[k],
4914 v->SourceScan[k],
4915 v->SwathWidthYThisState[k],
4916 v->ViewportHeight[k],
4917 v->GPUVMEnable,
4918 v->HostVMEnable,
4919 v->HostVMMaxNonCachedPageTableLevels,
4920 v->GPUVMMinPageSize,
4921 v->HostVMMinPageSize,
4922 v->PTEBufferSizeInRequestsForLuma,
4923 v->PitchY[k],
4924 v->DCCMetaPitchY[k],
4925 &v->MacroTileWidthY[k],
4926 &v->MetaRowBytesY,
4927 &v->DPTEBytesPerRowY,
4928 &v->PTEBufferSizeNotExceededY[i][j][k],
4929 &v->dummyinteger7,
4930 &v->dpte_row_height[k],
4931 &v->dummyinteger29,
4932 &v->dummyinteger27,
4933 &v->dummyinteger24,
4934 &v->meta_row_height[k],
4935 &v->dummyinteger25,
4936 &v->dpte_group_bytes[k],
4937 &v->dummyinteger21,
4938 &v->dummyinteger22,
4939 &v->dummyinteger18,
4940 &v->dummyinteger5,
4941 &v->dummyinteger6);
4942 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4943 mode_lib,
4944 v->VRatio[k],
4945 v->vtaps[k],
4946 v->Interlace[k],
4947 v->ProgressiveToInterlaceUnitInOPP,
4948 v->SwathHeightYThisState[k],
4949 v->ViewportYStartY[k],
4950 &v->PrefillY[k],
4951 &v->MaxNumSwY[k]);
4952 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4953 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4954 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4955
4956 CalculateRowBandwidth(
4957 v->GPUVMEnable,
4958 v->SourcePixelFormat[k],
4959 v->VRatio[k],
4960 v->VRatioChroma[k],
4961 v->DCCEnable[k],
4962 v->HTotal[k] / v->PixelClock[k],
4963 v->MetaRowBytesY,
4964 v->MetaRowBytesC,
4965 v->meta_row_height[k],
4966 v->meta_row_height_chroma[k],
4967 v->DPTEBytesPerRowY,
4968 v->DPTEBytesPerRowC,
4969 v->dpte_row_height[k],
4970 v->dpte_row_height_chroma[k],
4971 &v->meta_row_bandwidth[i][j][k],
4972 &v->dpte_row_bandwidth[i][j][k]);
4973 }
4974 /*
4975 * DCCMetaBufferSizeSupport(i, j) = True
4976 * For k = 0 To NumberOfActivePlanes - 1
4977 * If MetaRowBytes(i, j, k) > 24064 Then
4978 * DCCMetaBufferSizeSupport(i, j) = False
4979 * End If
4980 * Next k
4981 */
4982 v->DCCMetaBufferSizeSupport[i][j] = true;
4983 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4984 if (v->MetaRowBytes[i][j][k] > 24064)
4985 v->DCCMetaBufferSizeSupport[i][j] = false;
4986 }
4987 v->UrgLatency[i] = CalculateUrgentLatency(
4988 v->UrgentLatencyPixelDataOnly,
4989 v->UrgentLatencyPixelMixedWithVMData,
4990 v->UrgentLatencyVMDataOnly,
4991 v->DoUrgentLatencyAdjustment,
4992 v->UrgentLatencyAdjustmentFabricClockComponent,
4993 v->UrgentLatencyAdjustmentFabricClockReference,
4994 v->FabricClockPerState[i]);
4995
4996 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4997 CalculateUrgentBurstFactor(
4998 v->swath_width_luma_ub_this_state[k],
4999 v->swath_width_chroma_ub_this_state[k],
5000 v->SwathHeightYThisState[k],
5001 v->SwathHeightCThisState[k],
5002 v->HTotal[k] / v->PixelClock[k],
5003 v->UrgLatency[i],
5004 v->CursorBufferSize,
5005 v->CursorWidth[k][0],
5006 v->CursorBPP[k][0],
5007 v->VRatio[k],
5008 v->VRatioChroma[k],
5009 v->BytePerPixelInDETY[k],
5010 v->BytePerPixelInDETC[k],
5011 v->DETBufferSizeYThisState[k],
5012 v->DETBufferSizeCThisState[k],
5013 &v->UrgentBurstFactorCursor[k],
5014 &v->UrgentBurstFactorLuma[k],
5015 &v->UrgentBurstFactorChroma[k],
5016 &NotUrgentLatencyHiding[k]);
5017 }
5018
5019 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5020 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5021 if (NotUrgentLatencyHiding[k]) {
5022 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5023 }
5024 }
5025
5026 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5027 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5028 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5029 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5030 }
5031
5032 v->TotalVActivePixelBandwidth[i][j] = 0;
5033 v->TotalVActiveCursorBandwidth[i][j] = 0;
5034 v->TotalMetaRowBandwidth[i][j] = 0;
5035 v->TotalDPTERowBandwidth[i][j] = 0;
5036 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5037 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5038 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5039 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5040 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5041 }
5042 }
5043 }
5044
5045 //Calculate Return BW
5046 for (i = 0; i < v->soc.num_states; ++i) {
5047 for (j = 0; j <= 1; ++j) {
5048 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5049 if (v->BlendingAndTiming[k] == k) {
5050 if (v->WritebackEnable[k] == true) {
5051 v->WritebackDelayTime[k] = v->WritebackLatency
5052 + CalculateWriteBackDelay(
5053 v->WritebackPixelFormat[k],
5054 v->WritebackHRatio[k],
5055 v->WritebackVRatio[k],
5056 v->WritebackVTaps[k],
5057 v->WritebackDestinationWidth[k],
5058 v->WritebackDestinationHeight[k],
5059 v->WritebackSourceHeight[k],
5060 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5061 } else {
5062 v->WritebackDelayTime[k] = 0.0;
5063 }
5064 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5065 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5066 v->WritebackDelayTime[k] = dml_max(
5067 v->WritebackDelayTime[k],
5068 v->WritebackLatency
5069 + CalculateWriteBackDelay(
5070 v->WritebackPixelFormat[m],
5071 v->WritebackHRatio[m],
5072 v->WritebackVRatio[m],
5073 v->WritebackVTaps[m],
5074 v->WritebackDestinationWidth[m],
5075 v->WritebackDestinationHeight[m],
5076 v->WritebackSourceHeight[m],
5077 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5078 }
5079 }
5080 }
5081 }
5082 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5083 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5084 if (v->BlendingAndTiming[k] == m) {
5085 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5086 }
5087 }
5088 }
5089 v->MaxMaxVStartup[i][j] = 0;
5090 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5091 v->MaximumVStartup[i][j][k] =
5092 CalculateMaxVStartup(
5093 v->VTotal[k],
5094 v->VActive[k],
5095 v->VBlankNom[k],
5096 v->HTotal[k],
5097 v->PixelClock[k],
5098 v->ProgressiveToInterlaceUnitInOPP,
5099 v->Interlace[k],
5100 v->ip.VBlankNomDefaultUS,
5101 v->WritebackDelayTime[k]);
5102 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5103 }
5104 }
5105 }
5106
5107 ReorderingBytes = v->NumberOfChannels
5108 * dml_max3(
5109 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5110 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5111 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5112
5113 for (i = 0; i < v->soc.num_states; ++i) {
5114 for (j = 0; j <= 1; ++j) {
5115 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5116 }
5117 }
5118
5119 if (v->UseMinimumRequiredDCFCLK == true)
5120 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5121
5122 for (i = 0; i < v->soc.num_states; ++i) {
5123 for (j = 0; j <= 1; ++j) {
5124 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5125 v->ReturnBusWidth * v->DCFCLKState[i][j],
5126 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5127 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5128 double PixelDataOnlyReturnBWPerState = dml_min(
5129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5131 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5132 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5133 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5134
5135 if (v->HostVMEnable != true) {
5136 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5137 } else {
5138 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5139 }
5140 }
5141 }
5142
5143 //Re-ordering Buffer Support Check
5144 for (i = 0; i < v->soc.num_states; ++i) {
5145 for (j = 0; j <= 1; ++j) {
5146 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5147 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5148 v->ROBSupport[i][j] = true;
5149 } else {
5150 v->ROBSupport[i][j] = false;
5151 }
5152 }
5153 }
5154
5155 //Vertical Active BW support check
5156
5157 MaxTotalVActiveRDBandwidth = 0;
5158 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5159 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5160 }
5161
5162 for (i = 0; i < v->soc.num_states; ++i) {
5163 for (j = 0; j <= 1; ++j) {
5164 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5165 dml_min(
5166 v->ReturnBusWidth * v->DCFCLKState[i][j],
5167 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5168 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5169 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5170 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5171
5172 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5173 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5174 } else {
5175 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5176 }
5177 }
5178 }
5179
5180 v->UrgentLatency = CalculateUrgentLatency(
5181 v->UrgentLatencyPixelDataOnly,
5182 v->UrgentLatencyPixelMixedWithVMData,
5183 v->UrgentLatencyVMDataOnly,
5184 v->DoUrgentLatencyAdjustment,
5185 v->UrgentLatencyAdjustmentFabricClockComponent,
5186 v->UrgentLatencyAdjustmentFabricClockReference,
5187 v->FabricClock);
5188 //Prefetch Check
5189 for (i = 0; i < v->soc.num_states; ++i) {
5190 for (j = 0; j <= 1; ++j) {
5191 double VMDataOnlyReturnBWPerState;
5192 double HostVMInefficiencyFactor = 1;
5193 int NextPrefetchModeState = MinPrefetchMode;
5194 bool UnboundedRequestEnabledThisState = false;
5195 int CompressedBufferSizeInkByteThisState = 0;
5196 double dummy;
5197
5198 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5199
5200 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5201 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5202 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5203 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5204 }
5205
5206 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5207 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5208 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5209 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5210 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5211 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5212 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5213 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5214 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5215 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5216 }
5217
5218 VMDataOnlyReturnBWPerState = dml_min(
5219 dml_min(
5220 v->ReturnBusWidth * v->DCFCLKState[i][j],
5221 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5222 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5223 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5224 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5225 if (v->GPUVMEnable && v->HostVMEnable)
5226 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5227
5228 v->ExtraLatency = CalculateExtraLatency(
5229 v->RoundTripPingLatencyCycles,
5230 ReorderingBytes,
5231 v->DCFCLKState[i][j],
5232 v->TotalNumberOfActiveDPP[i][j],
5233 v->PixelChunkSizeInKByte,
5234 v->TotalNumberOfDCCActiveDPP[i][j],
5235 v->MetaChunkSize,
5236 v->ReturnBWPerState[i][j],
5237 v->GPUVMEnable,
5238 v->HostVMEnable,
5239 v->NumberOfActivePlanes,
5240 v->NoOfDPPThisState,
5241 v->dpte_group_bytes,
5242 HostVMInefficiencyFactor,
5243 v->HostVMMinPageSize,
5244 v->HostVMMaxNonCachedPageTableLevels);
5245
5246 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5247 do {
5248 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5249 v->MaxVStartup = v->NextMaxVStartup;
5250
5251 v->TWait = CalculateTWait(
5252 v->PrefetchModePerState[i][j],
5253 v->DRAMClockChangeLatency,
5254 v->UrgLatency[i],
5255 v->SREnterPlusExitTime);
5256
5257 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5258 CalculatePrefetchSchedulePerPlane(mode_lib,
5259 HostVMInefficiencyFactor,
5260 i, j, k);
5261 }
5262
5263 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5264 CalculateUrgentBurstFactor(
5265 v->swath_width_luma_ub_this_state[k],
5266 v->swath_width_chroma_ub_this_state[k],
5267 v->SwathHeightYThisState[k],
5268 v->SwathHeightCThisState[k],
5269 v->HTotal[k] / v->PixelClock[k],
5270 v->UrgLatency[i],
5271 v->CursorBufferSize,
5272 v->CursorWidth[k][0],
5273 v->CursorBPP[k][0],
5274 v->VRatioPreY[i][j][k],
5275 v->VRatioPreC[i][j][k],
5276 v->BytePerPixelInDETY[k],
5277 v->BytePerPixelInDETC[k],
5278 v->DETBufferSizeYThisState[k],
5279 v->DETBufferSizeCThisState[k],
5280 &v->UrgentBurstFactorCursorPre[k],
5281 &v->UrgentBurstFactorLumaPre[k],
5282 &v->UrgentBurstFactorChromaPre[k],
5283 &v->NotUrgentLatencyHidingPre[k]);
5284 }
5285
5286 v->MaximumReadBandwidthWithPrefetch = 0.0;
5287 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5288 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5289 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5290
5291 v->MaximumReadBandwidthWithPrefetch =
5292 v->MaximumReadBandwidthWithPrefetch
5293 + dml_max3(
5294 v->VActivePixelBandwidth[i][j][k]
5295 + v->VActiveCursorBandwidth[i][j][k]
5296 + v->NoOfDPP[i][j][k]
5297 * (v->meta_row_bandwidth[i][j][k]
5298 + v->dpte_row_bandwidth[i][j][k]),
5299 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5300 v->NoOfDPP[i][j][k]
5301 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5302 * v->UrgentBurstFactorLumaPre[k]
5303 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5304 * v->UrgentBurstFactorChromaPre[k])
5305 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5306 }
5307
5308 v->NotEnoughUrgentLatencyHidingPre = false;
5309 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5310 if (v->NotUrgentLatencyHidingPre[k] == true) {
5311 v->NotEnoughUrgentLatencyHidingPre = true;
5312 }
5313 }
5314
5315 v->PrefetchSupported[i][j] = true;
5316 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5317 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5318 v->PrefetchSupported[i][j] = false;
5319 }
5320 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5321 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5322 || v->NoTimeForPrefetch[i][j][k] == true) {
5323 v->PrefetchSupported[i][j] = false;
5324 }
5325 }
5326
5327 v->DynamicMetadataSupported[i][j] = true;
5328 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5329 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5330 v->DynamicMetadataSupported[i][j] = false;
5331 }
5332 }
5333
5334 v->VRatioInPrefetchSupported[i][j] = true;
5335 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5336 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5337 v->VRatioInPrefetchSupported[i][j] = false;
5338 }
5339 }
5340 v->AnyLinesForVMOrRowTooLarge = false;
5341 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5342 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5343 v->AnyLinesForVMOrRowTooLarge = true;
5344 }
5345 }
5346
5347 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5348
5349 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5350 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5351 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5352 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5353 - dml_max(
5354 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5355 v->NoOfDPP[i][j][k]
5356 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5357 * v->UrgentBurstFactorLumaPre[k]
5358 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5359 * v->UrgentBurstFactorChromaPre[k])
5360 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5361 }
5362 v->TotImmediateFlipBytes = 0.0;
5363 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5364 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5365 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5366 + v->DPTEBytesPerRow[i][j][k]);
5367 }
5368
5369 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5370 CalculateFlipSchedule(
5371 mode_lib,
5372 k,
5373 HostVMInefficiencyFactor,
5374 v->ExtraLatency,
5375 v->UrgLatency[i],
5376 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5377 v->MetaRowBytes[i][j][k],
5378 v->DPTEBytesPerRow[i][j][k]);
5379 }
5380 v->total_dcn_read_bw_with_flip = 0.0;
5381 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5382 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5383 + dml_max3(
5384 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5385 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5386 + v->VActiveCursorBandwidth[i][j][k],
5387 v->NoOfDPP[i][j][k]
5388 * (v->final_flip_bw[k]
5389 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5390 * v->UrgentBurstFactorLumaPre[k]
5391 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5392 * v->UrgentBurstFactorChromaPre[k])
5393 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5394 }
5395 v->ImmediateFlipSupportedForState[i][j] = true;
5396 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5397 v->ImmediateFlipSupportedForState[i][j] = false;
5398 }
5399 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5400 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5401 v->ImmediateFlipSupportedForState[i][j] = false;
5402 }
5403 }
5404 } else {
5405 v->ImmediateFlipSupportedForState[i][j] = false;
5406 }
5407
5408 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5409 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5410 NextPrefetchModeState = NextPrefetchModeState + 1;
5411 } else {
5412 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5413 }
5414 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5415 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5416 && ((v->HostVMEnable == false &&
5417 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5418 || v->ImmediateFlipSupportedForState[i][j] == true))
5419 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5420
5421 CalculateUnboundedRequestAndCompressedBufferSize(
5422 v->DETBufferSizeInKByte[0],
5423 v->ConfigReturnBufferSizeInKByte,
5424 v->UseUnboundedRequesting,
5425 v->TotalNumberOfActiveDPP[i][j],
5426 NoChroma,
5427 v->MaxNumDPP,
5428 v->CompressedBufferSegmentSizeInkByte,
5429 v->Output,
5430 &UnboundedRequestEnabledThisState,
5431 &CompressedBufferSizeInkByteThisState);
5432
5433 CalculateWatermarksAndDRAMSpeedChangeSupport(
5434 mode_lib,
5435 v->PrefetchModePerState[i][j],
5436 v->DCFCLKState[i][j],
5437 v->ReturnBWPerState[i][j],
5438 v->UrgLatency[i],
5439 v->ExtraLatency,
5440 v->SOCCLKPerState[i],
5441 v->ProjectedDCFCLKDeepSleep[i][j],
5442 v->DETBufferSizeYThisState,
5443 v->DETBufferSizeCThisState,
5444 v->SwathHeightYThisState,
5445 v->SwathHeightCThisState,
5446 v->SwathWidthYThisState,
5447 v->SwathWidthCThisState,
5448 v->NoOfDPPThisState,
5449 v->BytePerPixelInDETY,
5450 v->BytePerPixelInDETC,
5451 UnboundedRequestEnabledThisState,
5452 CompressedBufferSizeInkByteThisState,
5453 &v->DRAMClockChangeSupport[i][j],
5454 &dummy,
5455 &dummy,
5456 &dummy,
5457 &dummy);
5458 }
5459 }
5460
5461 /*PTE Buffer Size Check*/
5462 for (i = 0; i < v->soc.num_states; i++) {
5463 for (j = 0; j < 2; j++) {
5464 v->PTEBufferSizeNotExceeded[i][j] = true;
5465 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5466 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5467 v->PTEBufferSizeNotExceeded[i][j] = false;
5468 }
5469 }
5470 }
5471 }
5472
5473 /*Cursor Support Check*/
5474 v->CursorSupport = true;
5475 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5476 if (v->CursorWidth[k][0] > 0.0) {
5477 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5478 v->CursorSupport = false;
5479 }
5480 }
5481 }
5482
5483 /*Valid Pitch Check*/
5484 v->PitchSupport = true;
5485 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5486 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5487 if (v->DCCEnable[k] == true) {
5488 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5489 } else {
5490 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5491 }
5492 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5493 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5494 && v->SourcePixelFormat[k] != dm_mono_8) {
5495 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5496 if (v->DCCEnable[k] == true) {
5497 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5498 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5499 64.0 * v->Read256BlockWidthC[k]);
5500 } else {
5501 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5502 }
5503 } else {
5504 v->AlignedCPitch[k] = v->PitchC[k];
5505 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5506 }
5507 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5508 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5509 v->PitchSupport = false;
5510 }
5511 }
5512
5513 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5514 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5515 ViewportExceedsSurface = true;
5516 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5517 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5518 && v->SourcePixelFormat[k] != dm_rgbe) {
5519 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5520 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5521 ViewportExceedsSurface = true;
5522 }
5523 }
5524 }
5525 }
5526
5527 /*Mode Support, Voltage State and SOC Configuration*/
5528 for (i = v->soc.num_states - 1; i >= 0; i--) {
5529 for (j = 0; j < 2; j++) {
5530 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5531 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5532 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5533 && v->DTBCLKRequiredMoreThanSupported[i] == false
5534 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5535 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5536 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5537 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5538 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5539 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5540 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5541 && ((v->HostVMEnable == false
5542 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5543 || v->ImmediateFlipSupportedForState[i][j] == true)
5544 && FMTBufferExceeded == false) {
5545 v->ModeSupport[i][j] = true;
5546 } else {
5547 v->ModeSupport[i][j] = false;
5548 }
5549 }
5550 }
5551 for (i = v->soc.num_states; i >= 0; i--) {
5552 for (j = 0; j < 2; j++) {
5553 enum dm_validation_status status = DML_VALIDATION_OK;
5554
5555 if (!v->ScaleRatioAndTapsSupport) {
5556 status = DML_FAIL_SCALE_RATIO_TAP;
5557 } else if (!v->SourceFormatPixelAndScanSupport) {
5558 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5559 } else if (!v->ViewportSizeSupport[i][j]) {
5560 status = DML_FAIL_VIEWPORT_SIZE;
5561 } else if (P2IWith420) {
5562 status = DML_FAIL_P2I_WITH_420;
5563 } else if (DSCOnlyIfNecessaryWithBPP) {
5564 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5565 } else if (DSC422NativeNotSupported) {
5566 status = DML_FAIL_NOT_DSC422_NATIVE;
5567 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5568 status = DML_FAIL_ODM_COMBINE4TO1;
5569 } else if (v->NotEnoughDSCUnits[i]) {
5570 status = DML_FAIL_NOT_ENOUGH_DSC;
5571 } else if (!v->ROBSupport[i][j]) {
5572 status = DML_FAIL_REORDERING_BUFFER;
5573 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5574 status = DML_FAIL_DISPCLK_DPPCLK;
5575 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5576 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5577 } else if (!EnoughWritebackUnits) {
5578 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5579 } else if (!v->WritebackLatencySupport) {
5580 status = DML_FAIL_WRITEBACK_LATENCY;
5581 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5582 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5583 } else if (!v->CursorSupport) {
5584 status = DML_FAIL_CURSOR_SUPPORT;
5585 } else if (!v->PitchSupport) {
5586 status = DML_FAIL_PITCH_SUPPORT;
5587 } else if (ViewportExceedsSurface) {
5588 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5589 } else if (!v->PrefetchSupported[i][j]) {
5590 status = DML_FAIL_PREFETCH_SUPPORT;
5591 } else if (!v->DynamicMetadataSupported[i][j]) {
5592 status = DML_FAIL_DYNAMIC_METADATA;
5593 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5594 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5595 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5596 status = DML_FAIL_V_RATIO_PREFETCH;
5597 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5598 status = DML_FAIL_PTE_BUFFER_SIZE;
5599 } else if (v->NonsupportedDSCInputBPC) {
5600 status = DML_FAIL_DSC_INPUT_BPC;
5601 } else if ((v->HostVMEnable
5602 && !v->ImmediateFlipSupportedForState[i][j])) {
5603 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5604 } else if (FMTBufferExceeded) {
5605 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5606 }
5607 mode_lib->vba.ValidationStatus[i] = status;
5608 }
5609 }
5610
5611 {
5612 unsigned int MaximumMPCCombine = 0;
5613
5614 for (i = v->soc.num_states; i >= 0; i--) {
5615 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5616 v->VoltageLevel = i;
5617 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5618 if (v->ModeSupport[i][0] == true) {
5619 MaximumMPCCombine = 0;
5620 } else {
5621 MaximumMPCCombine = 1;
5622 }
5623 }
5624 }
5625 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5626 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5627 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5628 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5629 }
5630 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5631 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5632 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5633 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5634 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5635 v->maxMpcComb = MaximumMPCCombine;
5636 }
5637 }
5638
5639 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5640 struct display_mode_lib *mode_lib,
5641 unsigned int PrefetchMode,
5642 double DCFCLK,
5643 double ReturnBW,
5644 double UrgentLatency,
5645 double ExtraLatency,
5646 double SOCCLK,
5647 double DCFCLKDeepSleep,
5648 unsigned int DETBufferSizeY[],
5649 unsigned int DETBufferSizeC[],
5650 unsigned int SwathHeightY[],
5651 unsigned int SwathHeightC[],
5652 double SwathWidthY[],
5653 double SwathWidthC[],
5654 unsigned int DPPPerPlane[],
5655 double BytePerPixelDETY[],
5656 double BytePerPixelDETC[],
5657 bool UnboundedRequestEnabled,
5658 unsigned int CompressedBufferSizeInkByte,
5659 enum clock_change_support *DRAMClockChangeSupport,
5660 double *StutterExitWatermark,
5661 double *StutterEnterPlusExitWatermark,
5662 double *Z8StutterExitWatermark,
5663 double *Z8StutterEnterPlusExitWatermark)
5664 {
5665 struct vba_vars_st *v = &mode_lib->vba;
5666 double EffectiveLBLatencyHidingY;
5667 double EffectiveLBLatencyHidingC;
5668 double LinesInDETY[DC__NUM_DPP__MAX];
5669 double LinesInDETC;
5670 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5671 unsigned int LinesInDETCRoundedDownToSwath;
5672 double FullDETBufferingTimeY;
5673 double FullDETBufferingTimeC;
5674 double ActiveDRAMClockChangeLatencyMarginY;
5675 double ActiveDRAMClockChangeLatencyMarginC;
5676 double WritebackDRAMClockChangeLatencyMargin;
5677 double PlaneWithMinActiveDRAMClockChangeMargin;
5678 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5679 double WritebackDRAMClockChangeLatencyHiding;
5680 double TotalPixelBW = 0.0;
5681 int k, j;
5682
5683 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5684
5685 #ifdef __DML_VBA_DEBUG__
5686 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5687 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5688 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5689 #endif
5690
5691 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5692
5693 #ifdef __DML_VBA_DEBUG__
5694 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5695 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5696 #endif
5697
5698 v->TotalActiveWriteback = 0;
5699 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5700 if (v->WritebackEnable[k] == true) {
5701 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5702 }
5703 }
5704
5705 if (v->TotalActiveWriteback <= 1) {
5706 v->WritebackUrgentWatermark = v->WritebackLatency;
5707 } else {
5708 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5709 }
5710
5711 if (v->TotalActiveWriteback <= 1) {
5712 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5713 } else {
5714 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5715 }
5716
5717 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5718 TotalPixelBW = TotalPixelBW
5719 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5720 / (v->HTotal[k] / v->PixelClock[k]);
5721 }
5722
5723 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5724 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5725
5726 v->LBLatencyHidingSourceLinesY = dml_min(
5727 (double) v->MaxLineBufferLines,
5728 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5729
5730 v->LBLatencyHidingSourceLinesC = dml_min(
5731 (double) v->MaxLineBufferLines,
5732 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5733
5734 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5735
5736 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5737
5738 if (UnboundedRequestEnabled) {
5739 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5740 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5741 }
5742
5743 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5744 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5745 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5746 if (BytePerPixelDETC[k] > 0) {
5747 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5748 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5749 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5750 } else {
5751 LinesInDETC = 0;
5752 FullDETBufferingTimeC = 999999;
5753 }
5754
5755 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5756 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5757
5758 if (v->NumberOfActivePlanes > 1) {
5759 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5760 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5761 }
5762
5763 if (BytePerPixelDETC[k] > 0) {
5764 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5765 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5766
5767 if (v->NumberOfActivePlanes > 1) {
5768 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5769 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5770 }
5771 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5772 } else {
5773 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5774 }
5775
5776 if (v->WritebackEnable[k] == true) {
5777 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5778 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5779 if (v->WritebackPixelFormat[k] == dm_444_64) {
5780 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5781 }
5782 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5783 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5784 }
5785 }
5786
5787 v->MinActiveDRAMClockChangeMargin = 999999;
5788 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5789 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5790 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5791 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5792 if (v->BlendingAndTiming[k] == k) {
5793 PlaneWithMinActiveDRAMClockChangeMargin = k;
5794 } else {
5795 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5796 if (v->BlendingAndTiming[k] == j) {
5797 PlaneWithMinActiveDRAMClockChangeMargin = j;
5798 }
5799 }
5800 }
5801 }
5802 }
5803
5804 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5805
5806 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5807 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5808 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5809 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5810 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5811 }
5812 }
5813
5814 v->TotalNumberOfActiveOTG = 0;
5815
5816 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5817 if (v->BlendingAndTiming[k] == k) {
5818 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5819 }
5820 }
5821
5822 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5823 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5824 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5825 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5826 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5827 } else {
5828 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5829 }
5830
5831 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5832 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5833 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5834 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5835
5836 #ifdef __DML_VBA_DEBUG__
5837 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5838 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5839 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5840 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5841 #endif
5842 }
5843
5844 static void CalculateDCFCLKDeepSleep(
5845 struct display_mode_lib *mode_lib,
5846 unsigned int NumberOfActivePlanes,
5847 int BytePerPixelY[],
5848 int BytePerPixelC[],
5849 double VRatio[],
5850 double VRatioChroma[],
5851 double SwathWidthY[],
5852 double SwathWidthC[],
5853 unsigned int DPPPerPlane[],
5854 double HRatio[],
5855 double HRatioChroma[],
5856 double PixelClock[],
5857 double PSCL_THROUGHPUT[],
5858 double PSCL_THROUGHPUT_CHROMA[],
5859 double DPPCLK[],
5860 double ReadBandwidthLuma[],
5861 double ReadBandwidthChroma[],
5862 int ReturnBusWidth,
5863 double *DCFCLKDeepSleep)
5864 {
5865 struct vba_vars_st *v = &mode_lib->vba;
5866 double DisplayPipeLineDeliveryTimeLuma;
5867 double DisplayPipeLineDeliveryTimeChroma;
5868 double ReadBandwidth = 0.0;
5869 int k;
5870
5871 for (k = 0; k < NumberOfActivePlanes; ++k) {
5872
5873 if (VRatio[k] <= 1) {
5874 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5875 } else {
5876 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5877 }
5878 if (BytePerPixelC[k] == 0) {
5879 DisplayPipeLineDeliveryTimeChroma = 0;
5880 } else {
5881 if (VRatioChroma[k] <= 1) {
5882 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5883 } else {
5884 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5885 }
5886 }
5887
5888 if (BytePerPixelC[k] > 0) {
5889 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5890 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5891 } else {
5892 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5893 }
5894 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5895
5896 }
5897
5898 for (k = 0; k < NumberOfActivePlanes; ++k) {
5899 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5900 }
5901
5902 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5903
5904 for (k = 0; k < NumberOfActivePlanes; ++k) {
5905 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5906 }
5907 }
5908
5909 static void CalculateUrgentBurstFactor(
5910 int swath_width_luma_ub,
5911 int swath_width_chroma_ub,
5912 unsigned int SwathHeightY,
5913 unsigned int SwathHeightC,
5914 double LineTime,
5915 double UrgentLatency,
5916 double CursorBufferSize,
5917 unsigned int CursorWidth,
5918 unsigned int CursorBPP,
5919 double VRatio,
5920 double VRatioC,
5921 double BytePerPixelInDETY,
5922 double BytePerPixelInDETC,
5923 double DETBufferSizeY,
5924 double DETBufferSizeC,
5925 double *UrgentBurstFactorCursor,
5926 double *UrgentBurstFactorLuma,
5927 double *UrgentBurstFactorChroma,
5928 bool *NotEnoughUrgentLatencyHiding)
5929 {
5930 double LinesInDETLuma;
5931 double LinesInDETChroma;
5932 unsigned int LinesInCursorBuffer;
5933 double CursorBufferSizeInTime;
5934 double DETBufferSizeInTimeLuma;
5935 double DETBufferSizeInTimeChroma;
5936
5937 *NotEnoughUrgentLatencyHiding = 0;
5938
5939 if (CursorWidth > 0) {
5940 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5941 if (VRatio > 0) {
5942 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5943 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5944 *NotEnoughUrgentLatencyHiding = 1;
5945 *UrgentBurstFactorCursor = 0;
5946 } else {
5947 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5948 }
5949 } else {
5950 *UrgentBurstFactorCursor = 1;
5951 }
5952 }
5953
5954 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5955 if (VRatio > 0) {
5956 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5957 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5958 *NotEnoughUrgentLatencyHiding = 1;
5959 *UrgentBurstFactorLuma = 0;
5960 } else {
5961 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5962 }
5963 } else {
5964 *UrgentBurstFactorLuma = 1;
5965 }
5966
5967 if (BytePerPixelInDETC > 0) {
5968 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5969 if (VRatio > 0) {
5970 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5971 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5972 *NotEnoughUrgentLatencyHiding = 1;
5973 *UrgentBurstFactorChroma = 0;
5974 } else {
5975 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5976 }
5977 } else {
5978 *UrgentBurstFactorChroma = 1;
5979 }
5980 }
5981 }
5982
5983 static void CalculatePixelDeliveryTimes(
5984 unsigned int NumberOfActivePlanes,
5985 double VRatio[],
5986 double VRatioChroma[],
5987 double VRatioPrefetchY[],
5988 double VRatioPrefetchC[],
5989 unsigned int swath_width_luma_ub[],
5990 unsigned int swath_width_chroma_ub[],
5991 unsigned int DPPPerPlane[],
5992 double HRatio[],
5993 double HRatioChroma[],
5994 double PixelClock[],
5995 double PSCL_THROUGHPUT[],
5996 double PSCL_THROUGHPUT_CHROMA[],
5997 double DPPCLK[],
5998 int BytePerPixelC[],
5999 enum scan_direction_class SourceScan[],
6000 unsigned int NumberOfCursors[],
6001 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6002 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6003 unsigned int BlockWidth256BytesY[],
6004 unsigned int BlockHeight256BytesY[],
6005 unsigned int BlockWidth256BytesC[],
6006 unsigned int BlockHeight256BytesC[],
6007 double DisplayPipeLineDeliveryTimeLuma[],
6008 double DisplayPipeLineDeliveryTimeChroma[],
6009 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6010 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6011 double DisplayPipeRequestDeliveryTimeLuma[],
6012 double DisplayPipeRequestDeliveryTimeChroma[],
6013 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6014 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6015 double CursorRequestDeliveryTime[],
6016 double CursorRequestDeliveryTimePrefetch[])
6017 {
6018 double req_per_swath_ub;
6019 int k;
6020
6021 for (k = 0; k < NumberOfActivePlanes; ++k) {
6022 if (VRatio[k] <= 1) {
6023 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6024 } else {
6025 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6026 }
6027
6028 if (BytePerPixelC[k] == 0) {
6029 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6030 } else {
6031 if (VRatioChroma[k] <= 1) {
6032 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6033 } else {
6034 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6035 }
6036 }
6037
6038 if (VRatioPrefetchY[k] <= 1) {
6039 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6040 } else {
6041 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6042 }
6043
6044 if (BytePerPixelC[k] == 0) {
6045 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6046 } else {
6047 if (VRatioPrefetchC[k] <= 1) {
6048 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6049 } else {
6050 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6051 }
6052 }
6053 }
6054
6055 for (k = 0; k < NumberOfActivePlanes; ++k) {
6056 if (SourceScan[k] != dm_vert) {
6057 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6058 } else {
6059 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6060 }
6061 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6062 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6063 if (BytePerPixelC[k] == 0) {
6064 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6065 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6066 } else {
6067 if (SourceScan[k] != dm_vert) {
6068 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6069 } else {
6070 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6071 }
6072 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6073 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6074 }
6075 #ifdef __DML_VBA_DEBUG__
6076 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6077 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6078 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6079 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6080 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6081 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6082 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6083 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6084 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6085 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6086 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6087 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6088 #endif
6089 }
6090
6091 for (k = 0; k < NumberOfActivePlanes; ++k) {
6092 int cursor_req_per_width;
6093
6094 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6095 if (NumberOfCursors[k] > 0) {
6096 if (VRatio[k] <= 1) {
6097 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6098 } else {
6099 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6100 }
6101 if (VRatioPrefetchY[k] <= 1) {
6102 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6103 } else {
6104 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6105 }
6106 } else {
6107 CursorRequestDeliveryTime[k] = 0;
6108 CursorRequestDeliveryTimePrefetch[k] = 0;
6109 }
6110 #ifdef __DML_VBA_DEBUG__
6111 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6112 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6113 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6114 #endif
6115 }
6116 }
6117
6118 static void CalculateMetaAndPTETimes(
6119 int NumberOfActivePlanes,
6120 bool GPUVMEnable,
6121 int MetaChunkSize,
6122 int MinMetaChunkSizeBytes,
6123 int HTotal[],
6124 double VRatio[],
6125 double VRatioChroma[],
6126 double DestinationLinesToRequestRowInVBlank[],
6127 double DestinationLinesToRequestRowInImmediateFlip[],
6128 bool DCCEnable[],
6129 double PixelClock[],
6130 int BytePerPixelY[],
6131 int BytePerPixelC[],
6132 enum scan_direction_class SourceScan[],
6133 int dpte_row_height[],
6134 int dpte_row_height_chroma[],
6135 int meta_row_width[],
6136 int meta_row_width_chroma[],
6137 int meta_row_height[],
6138 int meta_row_height_chroma[],
6139 int meta_req_width[],
6140 int meta_req_width_chroma[],
6141 int meta_req_height[],
6142 int meta_req_height_chroma[],
6143 int dpte_group_bytes[],
6144 int PTERequestSizeY[],
6145 int PTERequestSizeC[],
6146 int PixelPTEReqWidthY[],
6147 int PixelPTEReqHeightY[],
6148 int PixelPTEReqWidthC[],
6149 int PixelPTEReqHeightC[],
6150 int dpte_row_width_luma_ub[],
6151 int dpte_row_width_chroma_ub[],
6152 double DST_Y_PER_PTE_ROW_NOM_L[],
6153 double DST_Y_PER_PTE_ROW_NOM_C[],
6154 double DST_Y_PER_META_ROW_NOM_L[],
6155 double DST_Y_PER_META_ROW_NOM_C[],
6156 double TimePerMetaChunkNominal[],
6157 double TimePerChromaMetaChunkNominal[],
6158 double TimePerMetaChunkVBlank[],
6159 double TimePerChromaMetaChunkVBlank[],
6160 double TimePerMetaChunkFlip[],
6161 double TimePerChromaMetaChunkFlip[],
6162 double time_per_pte_group_nom_luma[],
6163 double time_per_pte_group_vblank_luma[],
6164 double time_per_pte_group_flip_luma[],
6165 double time_per_pte_group_nom_chroma[],
6166 double time_per_pte_group_vblank_chroma[],
6167 double time_per_pte_group_flip_chroma[])
6168 {
6169 unsigned int meta_chunk_width;
6170 unsigned int min_meta_chunk_width;
6171 unsigned int meta_chunk_per_row_int;
6172 unsigned int meta_row_remainder;
6173 unsigned int meta_chunk_threshold;
6174 unsigned int meta_chunks_per_row_ub;
6175 unsigned int meta_chunk_width_chroma;
6176 unsigned int min_meta_chunk_width_chroma;
6177 unsigned int meta_chunk_per_row_int_chroma;
6178 unsigned int meta_row_remainder_chroma;
6179 unsigned int meta_chunk_threshold_chroma;
6180 unsigned int meta_chunks_per_row_ub_chroma;
6181 unsigned int dpte_group_width_luma;
6182 unsigned int dpte_groups_per_row_luma_ub;
6183 unsigned int dpte_group_width_chroma;
6184 unsigned int dpte_groups_per_row_chroma_ub;
6185 int k;
6186
6187 for (k = 0; k < NumberOfActivePlanes; ++k) {
6188 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6189 if (BytePerPixelC[k] == 0) {
6190 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6191 } else {
6192 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6193 }
6194 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6195 if (BytePerPixelC[k] == 0) {
6196 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6197 } else {
6198 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6199 }
6200 }
6201
6202 for (k = 0; k < NumberOfActivePlanes; ++k) {
6203 if (DCCEnable[k] == true) {
6204 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6205 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6206 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6207 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6208 if (SourceScan[k] != dm_vert) {
6209 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6210 } else {
6211 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6212 }
6213 if (meta_row_remainder <= meta_chunk_threshold) {
6214 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6215 } else {
6216 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6217 }
6218 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6219 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6220 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6221 if (BytePerPixelC[k] == 0) {
6222 TimePerChromaMetaChunkNominal[k] = 0;
6223 TimePerChromaMetaChunkVBlank[k] = 0;
6224 TimePerChromaMetaChunkFlip[k] = 0;
6225 } else {
6226 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6227 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6228 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6229 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6230 if (SourceScan[k] != dm_vert) {
6231 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6232 } else {
6233 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6234 }
6235 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6236 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6237 } else {
6238 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6239 }
6240 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6241 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6242 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6243 }
6244 } else {
6245 TimePerMetaChunkNominal[k] = 0;
6246 TimePerMetaChunkVBlank[k] = 0;
6247 TimePerMetaChunkFlip[k] = 0;
6248 TimePerChromaMetaChunkNominal[k] = 0;
6249 TimePerChromaMetaChunkVBlank[k] = 0;
6250 TimePerChromaMetaChunkFlip[k] = 0;
6251 }
6252 }
6253
6254 for (k = 0; k < NumberOfActivePlanes; ++k) {
6255 if (GPUVMEnable == true) {
6256 if (SourceScan[k] != dm_vert) {
6257 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6258 } else {
6259 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6260 }
6261 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6262 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6263 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6264 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6265 if (BytePerPixelC[k] == 0) {
6266 time_per_pte_group_nom_chroma[k] = 0;
6267 time_per_pte_group_vblank_chroma[k] = 0;
6268 time_per_pte_group_flip_chroma[k] = 0;
6269 } else {
6270 if (SourceScan[k] != dm_vert) {
6271 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6272 } else {
6273 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6274 }
6275 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6276 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6277 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6278 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6279 }
6280 } else {
6281 time_per_pte_group_nom_luma[k] = 0;
6282 time_per_pte_group_vblank_luma[k] = 0;
6283 time_per_pte_group_flip_luma[k] = 0;
6284 time_per_pte_group_nom_chroma[k] = 0;
6285 time_per_pte_group_vblank_chroma[k] = 0;
6286 time_per_pte_group_flip_chroma[k] = 0;
6287 }
6288 }
6289 }
6290
6291 static void CalculateVMGroupAndRequestTimes(
6292 unsigned int NumberOfActivePlanes,
6293 bool GPUVMEnable,
6294 unsigned int GPUVMMaxPageTableLevels,
6295 unsigned int HTotal[],
6296 int BytePerPixelC[],
6297 double DestinationLinesToRequestVMInVBlank[],
6298 double DestinationLinesToRequestVMInImmediateFlip[],
6299 bool DCCEnable[],
6300 double PixelClock[],
6301 int dpte_row_width_luma_ub[],
6302 int dpte_row_width_chroma_ub[],
6303 int vm_group_bytes[],
6304 unsigned int dpde0_bytes_per_frame_ub_l[],
6305 unsigned int dpde0_bytes_per_frame_ub_c[],
6306 int meta_pte_bytes_per_frame_ub_l[],
6307 int meta_pte_bytes_per_frame_ub_c[],
6308 double TimePerVMGroupVBlank[],
6309 double TimePerVMGroupFlip[],
6310 double TimePerVMRequestVBlank[],
6311 double TimePerVMRequestFlip[])
6312 {
6313 int num_group_per_lower_vm_stage;
6314 int num_req_per_lower_vm_stage;
6315 int k;
6316
6317 for (k = 0; k < NumberOfActivePlanes; ++k) {
6318 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6319 if (DCCEnable[k] == false) {
6320 if (BytePerPixelC[k] > 0) {
6321 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6322 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6323 } else {
6324 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6325 }
6326 } else {
6327 if (GPUVMMaxPageTableLevels == 1) {
6328 if (BytePerPixelC[k] > 0) {
6329 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6330 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6331 } else {
6332 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6333 }
6334 } else {
6335 if (BytePerPixelC[k] > 0) {
6336 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6337 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6338 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6339 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6340 } else {
6341 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6342 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6343 }
6344 }
6345 }
6346
6347 if (DCCEnable[k] == false) {
6348 if (BytePerPixelC[k] > 0) {
6349 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6350 } else {
6351 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6352 }
6353 } else {
6354 if (GPUVMMaxPageTableLevels == 1) {
6355 if (BytePerPixelC[k] > 0) {
6356 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6357 } else {
6358 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6359 }
6360 } else {
6361 if (BytePerPixelC[k] > 0) {
6362 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6363 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6364 } else {
6365 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6366 }
6367 }
6368 }
6369
6370 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6371 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6372 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6373 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6374
6375 if (GPUVMMaxPageTableLevels > 2) {
6376 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6377 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6378 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6379 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6380 }
6381
6382 } else {
6383 TimePerVMGroupVBlank[k] = 0;
6384 TimePerVMGroupFlip[k] = 0;
6385 TimePerVMRequestVBlank[k] = 0;
6386 TimePerVMRequestFlip[k] = 0;
6387 }
6388 }
6389 }
6390
6391 static void CalculateStutterEfficiency(
6392 struct display_mode_lib *mode_lib,
6393 int CompressedBufferSizeInkByte,
6394 bool UnboundedRequestEnabled,
6395 int ConfigReturnBufferSizeInKByte,
6396 int MetaFIFOSizeInKEntries,
6397 int ZeroSizeBufferEntries,
6398 int NumberOfActivePlanes,
6399 int ROBBufferSizeInKByte,
6400 double TotalDataReadBandwidth,
6401 double DCFCLK,
6402 double ReturnBW,
6403 double COMPBUF_RESERVED_SPACE_64B,
6404 double COMPBUF_RESERVED_SPACE_ZS,
6405 double SRExitTime,
6406 double SRExitZ8Time,
6407 bool SynchronizedVBlank,
6408 double Z8StutterEnterPlusExitWatermark,
6409 double StutterEnterPlusExitWatermark,
6410 bool ProgressiveToInterlaceUnitInOPP,
6411 bool Interlace[],
6412 double MinTTUVBlank[],
6413 int DPPPerPlane[],
6414 unsigned int DETBufferSizeY[],
6415 int BytePerPixelY[],
6416 double BytePerPixelDETY[],
6417 double SwathWidthY[],
6418 int SwathHeightY[],
6419 int SwathHeightC[],
6420 double NetDCCRateLuma[],
6421 double NetDCCRateChroma[],
6422 double DCCFractionOfZeroSizeRequestsLuma[],
6423 double DCCFractionOfZeroSizeRequestsChroma[],
6424 int HTotal[],
6425 int VTotal[],
6426 double PixelClock[],
6427 double VRatio[],
6428 enum scan_direction_class SourceScan[],
6429 int BlockHeight256BytesY[],
6430 int BlockWidth256BytesY[],
6431 int BlockHeight256BytesC[],
6432 int BlockWidth256BytesC[],
6433 int DCCYMaxUncompressedBlock[],
6434 int DCCCMaxUncompressedBlock[],
6435 int VActive[],
6436 bool DCCEnable[],
6437 bool WritebackEnable[],
6438 double ReadBandwidthPlaneLuma[],
6439 double ReadBandwidthPlaneChroma[],
6440 double meta_row_bw[],
6441 double dpte_row_bw[],
6442 double *StutterEfficiencyNotIncludingVBlank,
6443 double *StutterEfficiency,
6444 int *NumberOfStutterBurstsPerFrame,
6445 double *Z8StutterEfficiencyNotIncludingVBlank,
6446 double *Z8StutterEfficiency,
6447 int *Z8NumberOfStutterBurstsPerFrame,
6448 double *StutterPeriod)
6449 {
6450 struct vba_vars_st *v = &mode_lib->vba;
6451
6452 double DETBufferingTimeY;
6453 double SwathWidthYCriticalPlane = 0;
6454 double VActiveTimeCriticalPlane = 0;
6455 double FrameTimeCriticalPlane = 0;
6456 int BytePerPixelYCriticalPlane = 0;
6457 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6458 double MinTTUVBlankCriticalPlane = 0;
6459 double TotalCompressedReadBandwidth;
6460 double TotalRowReadBandwidth;
6461 double AverageDCCCompressionRate;
6462 double EffectiveCompressedBufferSize;
6463 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6464 double StutterBurstTime;
6465 int TotalActiveWriteback;
6466 double LinesInDETY;
6467 double LinesInDETYRoundedDownToSwath;
6468 double MaximumEffectiveCompressionLuma;
6469 double MaximumEffectiveCompressionChroma;
6470 double TotalZeroSizeRequestReadBandwidth;
6471 double TotalZeroSizeCompressedReadBandwidth;
6472 double AverageDCCZeroSizeFraction;
6473 double AverageZeroSizeCompressionRate;
6474 int TotalNumberOfActiveOTG = 0;
6475 double LastStutterPeriod = 0.0;
6476 double LastZ8StutterPeriod = 0.0;
6477 int k;
6478
6479 TotalZeroSizeRequestReadBandwidth = 0;
6480 TotalZeroSizeCompressedReadBandwidth = 0;
6481 TotalRowReadBandwidth = 0;
6482 TotalCompressedReadBandwidth = 0;
6483
6484 for (k = 0; k < NumberOfActivePlanes; ++k) {
6485 if (DCCEnable[k] == true) {
6486 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6487 || DCCYMaxUncompressedBlock[k] < 256) {
6488 MaximumEffectiveCompressionLuma = 2;
6489 } else {
6490 MaximumEffectiveCompressionLuma = 4;
6491 }
6492 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6493 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6494 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6495 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6496 if (ReadBandwidthPlaneChroma[k] > 0) {
6497 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6498 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6499 MaximumEffectiveCompressionChroma = 2;
6500 } else {
6501 MaximumEffectiveCompressionChroma = 4;
6502 }
6503 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6504 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6505 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6506 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6507 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6508 }
6509 } else {
6510 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6511 }
6512 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6513 }
6514
6515 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6516 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6517
6518 #ifdef __DML_VBA_DEBUG__
6519 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6520 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6521 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6522 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6523 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6524 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6525 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6526 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6527 #endif
6528
6529 if (AverageDCCZeroSizeFraction == 1) {
6530 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6531 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6532 } else if (AverageDCCZeroSizeFraction > 0) {
6533 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6534 EffectiveCompressedBufferSize = dml_min(
6535 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6536 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6537 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6538 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6539 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6540 dml_print(
6541 "DML::%s: min 2 = %f\n",
6542 __func__,
6543 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6544 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6545 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6546 } else {
6547 EffectiveCompressedBufferSize = dml_min(
6548 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6549 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6550 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6551 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6552 }
6553
6554 #ifdef __DML_VBA_DEBUG__
6555 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6556 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6557 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6558 #endif
6559
6560 *StutterPeriod = 0;
6561 for (k = 0; k < NumberOfActivePlanes; ++k) {
6562 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6563 / BytePerPixelDETY[k] / SwathWidthY[k];
6564 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6565 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6566 #ifdef __DML_VBA_DEBUG__
6567 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6568 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6569 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6570 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6571 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6572 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6573 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6574 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6575 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6576 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6577 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6578 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6579 #endif
6580
6581 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6582 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6583
6584 *StutterPeriod = DETBufferingTimeY;
6585 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6586 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6587 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6588 SwathWidthYCriticalPlane = SwathWidthY[k];
6589 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6590 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6591
6592 #ifdef __DML_VBA_DEBUG__
6593 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6594 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6595 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6596 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6597 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6598 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6599 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6600 #endif
6601 }
6602 }
6603
6604 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6605 #ifdef __DML_VBA_DEBUG__
6606 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6607 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6608 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6609 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6610 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6611 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6612 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6613 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6614 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6615 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6616 #endif
6617
6618 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6619 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6620 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6621 #ifdef __DML_VBA_DEBUG__
6622 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6623 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6624 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6625 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6626 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6627 #endif
6628 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6629
6630 dml_print(
6631 "DML::%s: Time to finish residue swath=%f\n",
6632 __func__,
6633 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6634
6635 TotalActiveWriteback = 0;
6636 for (k = 0; k < NumberOfActivePlanes; ++k) {
6637 if (WritebackEnable[k]) {
6638 TotalActiveWriteback = TotalActiveWriteback + 1;
6639 }
6640 }
6641
6642 if (TotalActiveWriteback == 0) {
6643 #ifdef __DML_VBA_DEBUG__
6644 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6645 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6646 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6647 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6648 #endif
6649 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6650 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6651 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6652 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6653 } else {
6654 *StutterEfficiencyNotIncludingVBlank = 0.;
6655 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6656 *NumberOfStutterBurstsPerFrame = 0;
6657 *Z8NumberOfStutterBurstsPerFrame = 0;
6658 }
6659 #ifdef __DML_VBA_DEBUG__
6660 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6661 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6662 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6663 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6664 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6665 #endif
6666
6667 for (k = 0; k < NumberOfActivePlanes; ++k) {
6668 if (v->BlendingAndTiming[k] == k) {
6669 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6670 }
6671 }
6672
6673 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6674 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6675
6676 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6677 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6678 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6679 } else {
6680 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6681 }
6682 } else {
6683 *StutterEfficiency = 0;
6684 }
6685
6686 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6687 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6688 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6689 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6690 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6691 } else {
6692 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6693 }
6694 } else {
6695 *Z8StutterEfficiency = 0.;
6696 }
6697
6698 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6699 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6700 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6701 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6702 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6703 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6704 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6705 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6706 }
6707
6708 static void CalculateSwathAndDETConfiguration(
6709 bool ForceSingleDPP,
6710 int NumberOfActivePlanes,
6711 unsigned int DETBufferSizeInKByte,
6712 double MaximumSwathWidthLuma[],
6713 double MaximumSwathWidthChroma[],
6714 enum scan_direction_class SourceScan[],
6715 enum source_format_class SourcePixelFormat[],
6716 enum dm_swizzle_mode SurfaceTiling[],
6717 int ViewportWidth[],
6718 int ViewportHeight[],
6719 int SurfaceWidthY[],
6720 int SurfaceWidthC[],
6721 int SurfaceHeightY[],
6722 int SurfaceHeightC[],
6723 int Read256BytesBlockHeightY[],
6724 int Read256BytesBlockHeightC[],
6725 int Read256BytesBlockWidthY[],
6726 int Read256BytesBlockWidthC[],
6727 enum odm_combine_mode ODMCombineEnabled[],
6728 int BlendingAndTiming[],
6729 int BytePerPixY[],
6730 int BytePerPixC[],
6731 double BytePerPixDETY[],
6732 double BytePerPixDETC[],
6733 int HActive[],
6734 double HRatio[],
6735 double HRatioChroma[],
6736 int DPPPerPlane[],
6737 int swath_width_luma_ub[],
6738 int swath_width_chroma_ub[],
6739 double SwathWidth[],
6740 double SwathWidthChroma[],
6741 int SwathHeightY[],
6742 int SwathHeightC[],
6743 unsigned int DETBufferSizeY[],
6744 unsigned int DETBufferSizeC[],
6745 bool ViewportSizeSupportPerPlane[],
6746 bool *ViewportSizeSupport)
6747 {
6748 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6749 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6750 int MinimumSwathHeightY;
6751 int MinimumSwathHeightC;
6752 int RoundedUpMaxSwathSizeBytesY;
6753 int RoundedUpMaxSwathSizeBytesC;
6754 int RoundedUpMinSwathSizeBytesY;
6755 int RoundedUpMinSwathSizeBytesC;
6756 int RoundedUpSwathSizeBytesY;
6757 int RoundedUpSwathSizeBytesC;
6758 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6759 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6760 int k;
6761
6762 CalculateSwathWidth(
6763 ForceSingleDPP,
6764 NumberOfActivePlanes,
6765 SourcePixelFormat,
6766 SourceScan,
6767 ViewportWidth,
6768 ViewportHeight,
6769 SurfaceWidthY,
6770 SurfaceWidthC,
6771 SurfaceHeightY,
6772 SurfaceHeightC,
6773 ODMCombineEnabled,
6774 BytePerPixY,
6775 BytePerPixC,
6776 Read256BytesBlockHeightY,
6777 Read256BytesBlockHeightC,
6778 Read256BytesBlockWidthY,
6779 Read256BytesBlockWidthC,
6780 BlendingAndTiming,
6781 HActive,
6782 HRatio,
6783 DPPPerPlane,
6784 SwathWidthSingleDPP,
6785 SwathWidthSingleDPPChroma,
6786 SwathWidth,
6787 SwathWidthChroma,
6788 MaximumSwathHeightY,
6789 MaximumSwathHeightC,
6790 swath_width_luma_ub,
6791 swath_width_chroma_ub);
6792
6793 *ViewportSizeSupport = true;
6794 for (k = 0; k < NumberOfActivePlanes; ++k) {
6795 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6796 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6797 if (SurfaceTiling[k] == dm_sw_linear
6798 || (SourcePixelFormat[k] == dm_444_64
6799 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6800 && SourceScan[k] != dm_vert)) {
6801 MinimumSwathHeightY = MaximumSwathHeightY[k];
6802 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6803 MinimumSwathHeightY = MaximumSwathHeightY[k];
6804 } else {
6805 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6806 }
6807 MinimumSwathHeightC = MaximumSwathHeightC[k];
6808 } else {
6809 if (SurfaceTiling[k] == dm_sw_linear) {
6810 MinimumSwathHeightY = MaximumSwathHeightY[k];
6811 MinimumSwathHeightC = MaximumSwathHeightC[k];
6812 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6813 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6814 MinimumSwathHeightC = MaximumSwathHeightC[k];
6815 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6816 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6817 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6818 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6819 MinimumSwathHeightY = MaximumSwathHeightY[k];
6820 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6821 } else {
6822 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6823 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6824 }
6825 }
6826
6827 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6828 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6829 if (SourcePixelFormat[k] == dm_420_10) {
6830 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6831 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6832 }
6833 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6834 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6835 if (SourcePixelFormat[k] == dm_420_10) {
6836 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6837 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6838 }
6839
6840 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6841 SwathHeightY[k] = MaximumSwathHeightY[k];
6842 SwathHeightC[k] = MaximumSwathHeightC[k];
6843 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6844 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6845 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6846 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6847 SwathHeightY[k] = MinimumSwathHeightY;
6848 SwathHeightC[k] = MaximumSwathHeightC[k];
6849 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6850 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6851 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6852 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6853 SwathHeightY[k] = MaximumSwathHeightY[k];
6854 SwathHeightC[k] = MinimumSwathHeightC;
6855 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6856 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6857 } else {
6858 SwathHeightY[k] = MinimumSwathHeightY;
6859 SwathHeightC[k] = MinimumSwathHeightC;
6860 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6861 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6862 }
6863 {
6864 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6865
6866 if (SwathHeightC[k] == 0) {
6867 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6868 DETBufferSizeC[k] = 0;
6869 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6870 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6871 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6872 } else {
6873 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6874 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6875 }
6876
6877 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6878 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6879 *ViewportSizeSupport = false;
6880 ViewportSizeSupportPerPlane[k] = false;
6881 } else {
6882 ViewportSizeSupportPerPlane[k] = true;
6883 }
6884 }
6885 }
6886 }
6887
6888 static void CalculateSwathWidth(
6889 bool ForceSingleDPP,
6890 int NumberOfActivePlanes,
6891 enum source_format_class SourcePixelFormat[],
6892 enum scan_direction_class SourceScan[],
6893 int ViewportWidth[],
6894 int ViewportHeight[],
6895 int SurfaceWidthY[],
6896 int SurfaceWidthC[],
6897 int SurfaceHeightY[],
6898 int SurfaceHeightC[],
6899 enum odm_combine_mode ODMCombineEnabled[],
6900 int BytePerPixY[],
6901 int BytePerPixC[],
6902 int Read256BytesBlockHeightY[],
6903 int Read256BytesBlockHeightC[],
6904 int Read256BytesBlockWidthY[],
6905 int Read256BytesBlockWidthC[],
6906 int BlendingAndTiming[],
6907 int HActive[],
6908 double HRatio[],
6909 int DPPPerPlane[],
6910 double SwathWidthSingleDPPY[],
6911 double SwathWidthSingleDPPC[],
6912 double SwathWidthY[],
6913 double SwathWidthC[],
6914 int MaximumSwathHeightY[],
6915 int MaximumSwathHeightC[],
6916 int swath_width_luma_ub[],
6917 int swath_width_chroma_ub[])
6918 {
6919 enum odm_combine_mode MainPlaneODMCombine;
6920 int j, k;
6921
6922 #ifdef __DML_VBA_DEBUG__
6923 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6924 #endif
6925
6926 for (k = 0; k < NumberOfActivePlanes; ++k) {
6927 if (SourceScan[k] != dm_vert) {
6928 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6929 } else {
6930 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6931 }
6932
6933 #ifdef __DML_VBA_DEBUG__
6934 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6935 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6936 #endif
6937
6938 MainPlaneODMCombine = ODMCombineEnabled[k];
6939 for (j = 0; j < NumberOfActivePlanes; ++j) {
6940 if (BlendingAndTiming[k] == j) {
6941 MainPlaneODMCombine = ODMCombineEnabled[j];
6942 }
6943 }
6944
6945 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6946 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6947 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6948 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6949 else if (DPPPerPlane[k] == 2)
6950 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6951 else
6952 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6953
6954 #ifdef __DML_VBA_DEBUG__
6955 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6956 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6957 #endif
6958
6959 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6960 SwathWidthC[k] = SwathWidthY[k] / 2;
6961 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6962 } else {
6963 SwathWidthC[k] = SwathWidthY[k];
6964 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6965 }
6966
6967 if (ForceSingleDPP == true) {
6968 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6969 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6970 }
6971 {
6972 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6973 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6974
6975 #ifdef __DML_VBA_DEBUG__
6976 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6977 #endif
6978
6979 if (SourceScan[k] != dm_vert) {
6980 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6981 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6982 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6983 if (BytePerPixC[k] > 0) {
6984 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6985
6986 swath_width_chroma_ub[k] = dml_min(
6987 surface_width_ub_c,
6988 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6989 } else {
6990 swath_width_chroma_ub[k] = 0;
6991 }
6992 } else {
6993 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6994 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6995 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6996 if (BytePerPixC[k] > 0) {
6997 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6998
6999 swath_width_chroma_ub[k] = dml_min(
7000 surface_height_ub_c,
7001 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7002 } else {
7003 swath_width_chroma_ub[k] = 0;
7004 }
7005 }
7006 }
7007 }
7008 }
7009
7010 static double CalculateExtraLatency(
7011 int RoundTripPingLatencyCycles,
7012 int ReorderingBytes,
7013 double DCFCLK,
7014 int TotalNumberOfActiveDPP,
7015 int PixelChunkSizeInKByte,
7016 int TotalNumberOfDCCActiveDPP,
7017 int MetaChunkSize,
7018 double ReturnBW,
7019 bool GPUVMEnable,
7020 bool HostVMEnable,
7021 int NumberOfActivePlanes,
7022 int NumberOfDPP[],
7023 int dpte_group_bytes[],
7024 double HostVMInefficiencyFactor,
7025 double HostVMMinPageSize,
7026 int HostVMMaxNonCachedPageTableLevels)
7027 {
7028 double ExtraLatencyBytes;
7029 double ExtraLatency;
7030
7031 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7032 ReorderingBytes,
7033 TotalNumberOfActiveDPP,
7034 PixelChunkSizeInKByte,
7035 TotalNumberOfDCCActiveDPP,
7036 MetaChunkSize,
7037 GPUVMEnable,
7038 HostVMEnable,
7039 NumberOfActivePlanes,
7040 NumberOfDPP,
7041 dpte_group_bytes,
7042 HostVMInefficiencyFactor,
7043 HostVMMinPageSize,
7044 HostVMMaxNonCachedPageTableLevels);
7045
7046 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7047
7048 #ifdef __DML_VBA_DEBUG__
7049 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7050 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7051 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7052 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7053 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7054 #endif
7055
7056 return ExtraLatency;
7057 }
7058
7059 static double CalculateExtraLatencyBytes(
7060 int ReorderingBytes,
7061 int TotalNumberOfActiveDPP,
7062 int PixelChunkSizeInKByte,
7063 int TotalNumberOfDCCActiveDPP,
7064 int MetaChunkSize,
7065 bool GPUVMEnable,
7066 bool HostVMEnable,
7067 int NumberOfActivePlanes,
7068 int NumberOfDPP[],
7069 int dpte_group_bytes[],
7070 double HostVMInefficiencyFactor,
7071 double HostVMMinPageSize,
7072 int HostVMMaxNonCachedPageTableLevels)
7073 {
7074 double ret;
7075 int HostVMDynamicLevels = 0, k;
7076
7077 if (GPUVMEnable == true && HostVMEnable == true) {
7078 if (HostVMMinPageSize < 2048)
7079 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7080 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7081 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7082 else
7083 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7084 } else {
7085 HostVMDynamicLevels = 0;
7086 }
7087
7088 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7089
7090 if (GPUVMEnable == true) {
7091 for (k = 0; k < NumberOfActivePlanes; ++k)
7092 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7093 }
7094 return ret;
7095 }
7096
7097 static double CalculateUrgentLatency(
7098 double UrgentLatencyPixelDataOnly,
7099 double UrgentLatencyPixelMixedWithVMData,
7100 double UrgentLatencyVMDataOnly,
7101 bool DoUrgentLatencyAdjustment,
7102 double UrgentLatencyAdjustmentFabricClockComponent,
7103 double UrgentLatencyAdjustmentFabricClockReference,
7104 double FabricClock)
7105 {
7106 double ret;
7107
7108 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7109 if (DoUrgentLatencyAdjustment == true)
7110 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7111 return ret;
7112 }
7113
7114 static noinline_for_stack void UseMinimumDCFCLK(
7115 struct display_mode_lib *mode_lib,
7116 int MaxPrefetchMode,
7117 int ReorderingBytes)
7118 {
7119 struct vba_vars_st *v = &mode_lib->vba;
7120 int dummy1, i, j, k;
7121 double NormalEfficiency, dummy2, dummy3;
7122 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7123
7124 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7125 for (i = 0; i < v->soc.num_states; ++i) {
7126 for (j = 0; j <= 1; ++j) {
7127 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7128 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7129 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7130 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7131 double MinimumTWait;
7132 double NonDPTEBandwidth;
7133 double DPTEBandwidth;
7134 double DCFCLKRequiredForAverageBandwidth;
7135 double ExtraLatencyBytes;
7136 double ExtraLatencyCycles;
7137 double DCFCLKRequiredForPeakBandwidth;
7138 int NoOfDPPState[DC__NUM_DPP__MAX];
7139 double MinimumTvmPlus2Tr0;
7140
7141 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7142 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7143 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7144 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7145 }
7146
7147 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7148 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7149
7150 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7151 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7152 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7153 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7154 DCFCLKRequiredForAverageBandwidth = dml_max3(
7155 v->ProjectedDCFCLKDeepSleep[i][j],
7156 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7157 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7158 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7159
7160 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7161 ReorderingBytes,
7162 v->TotalNumberOfActiveDPP[i][j],
7163 v->PixelChunkSizeInKByte,
7164 v->TotalNumberOfDCCActiveDPP[i][j],
7165 v->MetaChunkSize,
7166 v->GPUVMEnable,
7167 v->HostVMEnable,
7168 v->NumberOfActivePlanes,
7169 NoOfDPPState,
7170 v->dpte_group_bytes,
7171 1,
7172 v->HostVMMinPageSize,
7173 v->HostVMMaxNonCachedPageTableLevels);
7174 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7175 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7176 double DCFCLKCyclesRequiredInPrefetch;
7177 double ExpectedPrefetchBWAcceleration;
7178 double PrefetchTime;
7179
7180 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7181 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7182 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7183 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7184 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7185 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7186 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7187 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7188 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7189 DynamicMetadataVMExtraLatency[k] =
7190 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7191 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7192 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7193 - v->UrgLatency[i]
7194 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7195 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7196 - DynamicMetadataVMExtraLatency[k];
7197
7198 if (PrefetchTime > 0) {
7199 double ExpectedVRatioPrefetch;
7200
7201 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7202 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7203 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7204 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7205 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7206 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7207 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7208 }
7209 } else {
7210 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7211 }
7212 if (v->DynamicMetadataEnable[k] == true) {
7213 double TSetupPipe;
7214 double TdmbfPipe;
7215 double TdmsksPipe;
7216 double TdmecPipe;
7217 double AllowedTimeForUrgentExtraLatency;
7218
7219 CalculateVupdateAndDynamicMetadataParameters(
7220 v->MaxInterDCNTileRepeaters,
7221 v->RequiredDPPCLK[i][j][k],
7222 v->RequiredDISPCLK[i][j],
7223 v->ProjectedDCFCLKDeepSleep[i][j],
7224 v->PixelClock[k],
7225 v->HTotal[k],
7226 v->VTotal[k] - v->VActive[k],
7227 v->DynamicMetadataTransmittedBytes[k],
7228 v->DynamicMetadataLinesBeforeActiveRequired[k],
7229 v->Interlace[k],
7230 v->ProgressiveToInterlaceUnitInOPP,
7231 &TSetupPipe,
7232 &TdmbfPipe,
7233 &TdmecPipe,
7234 &TdmsksPipe,
7235 &dummy1,
7236 &dummy2,
7237 &dummy3);
7238 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7239 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7240 if (AllowedTimeForUrgentExtraLatency > 0) {
7241 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7242 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7243 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7244 } else {
7245 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7246 }
7247 }
7248 }
7249 DCFCLKRequiredForPeakBandwidth = 0;
7250 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7251 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7252
7253 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7254 * (v->GPUVMEnable == true ?
7255 (v->HostVMEnable == true ?
7256 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7257 0);
7258 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7259 double MaximumTvmPlus2Tr0PlusTsw;
7260
7261 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7262 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7263 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7264 } else {
7265 DCFCLKRequiredForPeakBandwidth = dml_max3(
7266 DCFCLKRequiredForPeakBandwidth,
7267 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7268 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7269 }
7270 }
7271 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7272 }
7273 }
7274 }
7275
7276 static void CalculateUnboundedRequestAndCompressedBufferSize(
7277 unsigned int DETBufferSizeInKByte,
7278 int ConfigReturnBufferSizeInKByte,
7279 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7280 int TotalActiveDPP,
7281 bool NoChromaPlanes,
7282 int MaxNumDPP,
7283 int CompressedBufferSegmentSizeInkByteFinal,
7284 enum output_encoder_class *Output,
7285 bool *UnboundedRequestEnabled,
7286 int *CompressedBufferSizeInkByte)
7287 {
7288 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7289
7290 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7291 *CompressedBufferSizeInkByte = (
7292 *UnboundedRequestEnabled == true ?
7293 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7294 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7295 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7296
7297 #ifdef __DML_VBA_DEBUG__
7298 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7299 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7300 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7301 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7302 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7303 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7304 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7305 #endif
7306 }
7307
7308 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7309 {
7310 bool ret_val = false;
7311
7312 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7313 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7314 ret_val = false;
7315 return ret_val;
7316 }
7317
7318 static unsigned int CalculateMaxVStartup(
7319 unsigned int VTotal,
7320 unsigned int VActive,
7321 unsigned int VBlankNom,
7322 unsigned int HTotal,
7323 double PixelClock,
7324 bool ProgressiveTointerlaceUnitinOPP,
7325 bool Interlace,
7326 unsigned int VBlankNomDefaultUS,
7327 double WritebackDelayTime)
7328 {
7329 unsigned int MaxVStartup = 0;
7330 unsigned int vblank_size = 0;
7331 double line_time_us = HTotal / PixelClock;
7332 unsigned int vblank_actual = VTotal - VActive;
7333 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7334 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7335 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7336
7337 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7338 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7339 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7340 else
7341 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7342 if (MaxVStartup > 1023)
7343 MaxVStartup = 1023;
7344 return MaxVStartup;
7345 }
7346