1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #define UNIT_TEST 0
28 #if !UNIT_TEST
29 #include "dc.h"
30 #endif
31 #include "../display_mode_lib.h"
32 #include "display_mode_vba_314.h"
33 #include "../dml_inline_defs.h"
34
35 /*
36 * NOTE:
37 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 *
39 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
40 * ways. Unless there is something clearly wrong with it the code should
41 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 */
43
44 #define BPP_INVALID 0
45 #define BPP_BLENDED_PIPE 0xffffffff
46 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
47 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
48
49 // For DML-C changes that hasn't been propagated to VBA yet
50 //#define __DML_VBA_ALLOW_DELTA__
51
52 // Move these to ip parameters/constant
53
54 // At which vstartup the DML start to try if the mode can be supported
55 #define __DML_VBA_MIN_VSTARTUP__ 9
56
57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
59
60 // fudge factor for min dcfclk calclation
61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
62
63 typedef struct {
64 double DPPCLK;
65 double DISPCLK;
66 double PixelClock;
67 double DCFCLKDeepSleep;
68 unsigned int DPPPerPlane;
69 bool ScalerEnabled;
70 double VRatio;
71 double VRatioChroma;
72 enum scan_direction_class SourceScan;
73 unsigned int BlockWidth256BytesY;
74 unsigned int BlockHeight256BytesY;
75 unsigned int BlockWidth256BytesC;
76 unsigned int BlockHeight256BytesC;
77 unsigned int InterlaceEnable;
78 unsigned int NumberOfCursors;
79 unsigned int VBlank;
80 unsigned int HTotal;
81 unsigned int DCCEnable;
82 bool ODMCombineIsEnabled;
83 enum source_format_class SourcePixelFormat;
84 int BytePerPixelY;
85 int BytePerPixelC;
86 bool ProgressiveToInterlaceUnitInOPP;
87 } Pipe;
88
89 #define BPP_INVALID 0
90 #define BPP_BLENDED_PIPE 0xffffffff
91
92 static bool CalculateBytePerPixelAnd256BBlockSizes(
93 enum source_format_class SourcePixelFormat,
94 enum dm_swizzle_mode SurfaceTiling,
95 unsigned int *BytePerPixelY,
96 unsigned int *BytePerPixelC,
97 double *BytePerPixelDETY,
98 double *BytePerPixelDETC,
99 unsigned int *BlockHeight256BytesY,
100 unsigned int *BlockHeight256BytesC,
101 unsigned int *BlockWidth256BytesY,
102 unsigned int *BlockWidth256BytesC);
103 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
104 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
105 static unsigned int dscceComputeDelay(
106 unsigned int bpc,
107 double BPP,
108 unsigned int sliceWidth,
109 unsigned int numSlices,
110 enum output_format_class pixelFormat,
111 enum output_encoder_class Output);
112 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
113 static bool CalculatePrefetchSchedule(
114 struct display_mode_lib *mode_lib,
115 double HostVMInefficiencyFactor,
116 Pipe *myPipe,
117 unsigned int DSCDelay,
118 double DPPCLKDelaySubtotalPlusCNVCFormater,
119 double DPPCLKDelaySCL,
120 double DPPCLKDelaySCLLBOnly,
121 double DPPCLKDelayCNVCCursor,
122 double DISPCLKDelaySubtotal,
123 unsigned int DPP_RECOUT_WIDTH,
124 enum output_format_class OutputFormat,
125 unsigned int MaxInterDCNTileRepeaters,
126 unsigned int VStartup,
127 unsigned int MaxVStartup,
128 unsigned int GPUVMPageTableLevels,
129 bool GPUVMEnable,
130 bool HostVMEnable,
131 unsigned int HostVMMaxNonCachedPageTableLevels,
132 double HostVMMinPageSize,
133 bool DynamicMetadataEnable,
134 bool DynamicMetadataVMEnabled,
135 int DynamicMetadataLinesBeforeActiveRequired,
136 unsigned int DynamicMetadataTransmittedBytes,
137 double UrgentLatency,
138 double UrgentExtraLatency,
139 double TCalc,
140 unsigned int PDEAndMetaPTEBytesFrame,
141 unsigned int MetaRowByte,
142 unsigned int PixelPTEBytesPerRow,
143 double PrefetchSourceLinesY,
144 unsigned int SwathWidthY,
145 double VInitPreFillY,
146 unsigned int MaxNumSwathY,
147 double PrefetchSourceLinesC,
148 unsigned int SwathWidthC,
149 double VInitPreFillC,
150 unsigned int MaxNumSwathC,
151 int swath_width_luma_ub,
152 int swath_width_chroma_ub,
153 unsigned int SwathHeightY,
154 unsigned int SwathHeightC,
155 double TWait,
156 double *DSTXAfterScaler,
157 double *DSTYAfterScaler,
158 double *DestinationLinesForPrefetch,
159 double *PrefetchBandwidth,
160 double *DestinationLinesToRequestVMInVBlank,
161 double *DestinationLinesToRequestRowInVBlank,
162 double *VRatioPrefetchY,
163 double *VRatioPrefetchC,
164 double *RequiredPrefetchPixDataBWLuma,
165 double *RequiredPrefetchPixDataBWChroma,
166 bool *NotEnoughTimeForDynamicMetadata,
167 double *Tno_bw,
168 double *prefetch_vmrow_bw,
169 double *Tdmdl_vm,
170 double *Tdmdl,
171 double *TSetup,
172 int *VUpdateOffsetPix,
173 double *VUpdateWidthPix,
174 double *VReadyOffsetPix);
175 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
176 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
177 static void CalculateDCCConfiguration(
178 bool DCCEnabled,
179 bool DCCProgrammingAssumesScanDirectionUnknown,
180 enum source_format_class SourcePixelFormat,
181 unsigned int SurfaceWidthLuma,
182 unsigned int SurfaceWidthChroma,
183 unsigned int SurfaceHeightLuma,
184 unsigned int SurfaceHeightChroma,
185 double DETBufferSize,
186 unsigned int RequestHeight256ByteLuma,
187 unsigned int RequestHeight256ByteChroma,
188 enum dm_swizzle_mode TilingFormat,
189 unsigned int BytePerPixelY,
190 unsigned int BytePerPixelC,
191 double BytePerPixelDETY,
192 double BytePerPixelDETC,
193 enum scan_direction_class ScanOrientation,
194 unsigned int *MaxUncompressedBlockLuma,
195 unsigned int *MaxUncompressedBlockChroma,
196 unsigned int *MaxCompressedBlockLuma,
197 unsigned int *MaxCompressedBlockChroma,
198 unsigned int *IndependentBlockLuma,
199 unsigned int *IndependentBlockChroma);
200 static double CalculatePrefetchSourceLines(
201 struct display_mode_lib *mode_lib,
202 double VRatio,
203 double vtaps,
204 bool Interlace,
205 bool ProgressiveToInterlaceUnitInOPP,
206 unsigned int SwathHeight,
207 unsigned int ViewportYStart,
208 double *VInitPreFill,
209 unsigned int *MaxNumSwath);
210 static unsigned int CalculateVMAndRowBytes(
211 struct display_mode_lib *mode_lib,
212 bool DCCEnable,
213 unsigned int BlockHeight256Bytes,
214 unsigned int BlockWidth256Bytes,
215 enum source_format_class SourcePixelFormat,
216 unsigned int SurfaceTiling,
217 unsigned int BytePerPixel,
218 enum scan_direction_class ScanDirection,
219 unsigned int SwathWidth,
220 unsigned int ViewportHeight,
221 bool GPUVMEnable,
222 bool HostVMEnable,
223 unsigned int HostVMMaxNonCachedPageTableLevels,
224 unsigned int GPUVMMinPageSize,
225 unsigned int HostVMMinPageSize,
226 unsigned int PTEBufferSizeInRequests,
227 unsigned int Pitch,
228 unsigned int DCCMetaPitch,
229 unsigned int *MacroTileWidth,
230 unsigned int *MetaRowByte,
231 unsigned int *PixelPTEBytesPerRow,
232 bool *PTEBufferSizeNotExceeded,
233 int *dpte_row_width_ub,
234 unsigned int *dpte_row_height,
235 unsigned int *MetaRequestWidth,
236 unsigned int *MetaRequestHeight,
237 unsigned int *meta_row_width,
238 unsigned int *meta_row_height,
239 int *vm_group_bytes,
240 unsigned int *dpte_group_bytes,
241 unsigned int *PixelPTEReqWidth,
242 unsigned int *PixelPTEReqHeight,
243 unsigned int *PTERequestSize,
244 int *DPDE0BytesFrame,
245 int *MetaPTEBytesFrame);
246 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
247 static void CalculateRowBandwidth(
248 bool GPUVMEnable,
249 enum source_format_class SourcePixelFormat,
250 double VRatio,
251 double VRatioChroma,
252 bool DCCEnable,
253 double LineTime,
254 unsigned int MetaRowByteLuma,
255 unsigned int MetaRowByteChroma,
256 unsigned int meta_row_height_luma,
257 unsigned int meta_row_height_chroma,
258 unsigned int PixelPTEBytesPerRowLuma,
259 unsigned int PixelPTEBytesPerRowChroma,
260 unsigned int dpte_row_height_luma,
261 unsigned int dpte_row_height_chroma,
262 double *meta_row_bw,
263 double *dpte_row_bw);
264
265 static void CalculateFlipSchedule(
266 struct display_mode_lib *mode_lib,
267 unsigned int k,
268 double HostVMInefficiencyFactor,
269 double UrgentExtraLatency,
270 double UrgentLatency,
271 double PDEAndMetaPTEBytesPerFrame,
272 double MetaRowBytes,
273 double DPTEBytesPerRow);
274 static double CalculateWriteBackDelay(
275 enum source_format_class WritebackPixelFormat,
276 double WritebackHRatio,
277 double WritebackVRatio,
278 unsigned int WritebackVTaps,
279 int WritebackDestinationWidth,
280 int WritebackDestinationHeight,
281 int WritebackSourceHeight,
282 unsigned int HTotal);
283
284 static void CalculateVupdateAndDynamicMetadataParameters(
285 int MaxInterDCNTileRepeaters,
286 double DPPCLK,
287 double DISPCLK,
288 double DCFClkDeepSleep,
289 double PixelClock,
290 int HTotal,
291 int VBlank,
292 int DynamicMetadataTransmittedBytes,
293 int DynamicMetadataLinesBeforeActiveRequired,
294 int InterlaceEnable,
295 bool ProgressiveToInterlaceUnitInOPP,
296 double *TSetup,
297 double *Tdmbf,
298 double *Tdmec,
299 double *Tdmsks,
300 int *VUpdateOffsetPix,
301 double *VUpdateWidthPix,
302 double *VReadyOffsetPix);
303
304 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
305 struct display_mode_lib *mode_lib,
306 unsigned int PrefetchMode,
307 double DCFCLK,
308 double ReturnBW,
309 double UrgentLatency,
310 double ExtraLatency,
311 double SOCCLK,
312 double DCFCLKDeepSleep,
313 unsigned int DETBufferSizeY[],
314 unsigned int DETBufferSizeC[],
315 unsigned int SwathHeightY[],
316 unsigned int SwathHeightC[],
317 double SwathWidthY[],
318 double SwathWidthC[],
319 unsigned int DPPPerPlane[],
320 double BytePerPixelDETY[],
321 double BytePerPixelDETC[],
322 bool UnboundedRequestEnabled,
323 unsigned int CompressedBufferSizeInkByte,
324 enum clock_change_support *DRAMClockChangeSupport,
325 double *StutterExitWatermark,
326 double *StutterEnterPlusExitWatermark,
327 double *Z8StutterExitWatermark,
328 double *Z8StutterEnterPlusExitWatermark);
329
330 static void CalculateDCFCLKDeepSleep(
331 struct display_mode_lib *mode_lib,
332 unsigned int NumberOfActivePlanes,
333 int BytePerPixelY[],
334 int BytePerPixelC[],
335 double VRatio[],
336 double VRatioChroma[],
337 double SwathWidthY[],
338 double SwathWidthC[],
339 unsigned int DPPPerPlane[],
340 double HRatio[],
341 double HRatioChroma[],
342 double PixelClock[],
343 double PSCL_THROUGHPUT[],
344 double PSCL_THROUGHPUT_CHROMA[],
345 double DPPCLK[],
346 double ReadBandwidthLuma[],
347 double ReadBandwidthChroma[],
348 int ReturnBusWidth,
349 double *DCFCLKDeepSleep);
350
351 static void CalculateUrgentBurstFactor(
352 int swath_width_luma_ub,
353 int swath_width_chroma_ub,
354 unsigned int SwathHeightY,
355 unsigned int SwathHeightC,
356 double LineTime,
357 double UrgentLatency,
358 double CursorBufferSize,
359 unsigned int CursorWidth,
360 unsigned int CursorBPP,
361 double VRatio,
362 double VRatioC,
363 double BytePerPixelInDETY,
364 double BytePerPixelInDETC,
365 double DETBufferSizeY,
366 double DETBufferSizeC,
367 double *UrgentBurstFactorCursor,
368 double *UrgentBurstFactorLuma,
369 double *UrgentBurstFactorChroma,
370 bool *NotEnoughUrgentLatencyHiding);
371
372 static void UseMinimumDCFCLK(
373 struct display_mode_lib *mode_lib,
374 int MaxPrefetchMode,
375 int ReorderingBytes);
376
377 static void CalculatePixelDeliveryTimes(
378 unsigned int NumberOfActivePlanes,
379 double VRatio[],
380 double VRatioChroma[],
381 double VRatioPrefetchY[],
382 double VRatioPrefetchC[],
383 unsigned int swath_width_luma_ub[],
384 unsigned int swath_width_chroma_ub[],
385 unsigned int DPPPerPlane[],
386 double HRatio[],
387 double HRatioChroma[],
388 double PixelClock[],
389 double PSCL_THROUGHPUT[],
390 double PSCL_THROUGHPUT_CHROMA[],
391 double DPPCLK[],
392 int BytePerPixelC[],
393 enum scan_direction_class SourceScan[],
394 unsigned int NumberOfCursors[],
395 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
396 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
397 unsigned int BlockWidth256BytesY[],
398 unsigned int BlockHeight256BytesY[],
399 unsigned int BlockWidth256BytesC[],
400 unsigned int BlockHeight256BytesC[],
401 double DisplayPipeLineDeliveryTimeLuma[],
402 double DisplayPipeLineDeliveryTimeChroma[],
403 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
404 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeLuma[],
406 double DisplayPipeRequestDeliveryTimeChroma[],
407 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
408 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
409 double CursorRequestDeliveryTime[],
410 double CursorRequestDeliveryTimePrefetch[]);
411
412 static void CalculateMetaAndPTETimes(
413 int NumberOfActivePlanes,
414 bool GPUVMEnable,
415 int MetaChunkSize,
416 int MinMetaChunkSizeBytes,
417 int HTotal[],
418 double VRatio[],
419 double VRatioChroma[],
420 double DestinationLinesToRequestRowInVBlank[],
421 double DestinationLinesToRequestRowInImmediateFlip[],
422 bool DCCEnable[],
423 double PixelClock[],
424 int BytePerPixelY[],
425 int BytePerPixelC[],
426 enum scan_direction_class SourceScan[],
427 int dpte_row_height[],
428 int dpte_row_height_chroma[],
429 int meta_row_width[],
430 int meta_row_width_chroma[],
431 int meta_row_height[],
432 int meta_row_height_chroma[],
433 int meta_req_width[],
434 int meta_req_width_chroma[],
435 int meta_req_height[],
436 int meta_req_height_chroma[],
437 int dpte_group_bytes[],
438 int PTERequestSizeY[],
439 int PTERequestSizeC[],
440 int PixelPTEReqWidthY[],
441 int PixelPTEReqHeightY[],
442 int PixelPTEReqWidthC[],
443 int PixelPTEReqHeightC[],
444 int dpte_row_width_luma_ub[],
445 int dpte_row_width_chroma_ub[],
446 double DST_Y_PER_PTE_ROW_NOM_L[],
447 double DST_Y_PER_PTE_ROW_NOM_C[],
448 double DST_Y_PER_META_ROW_NOM_L[],
449 double DST_Y_PER_META_ROW_NOM_C[],
450 double TimePerMetaChunkNominal[],
451 double TimePerChromaMetaChunkNominal[],
452 double TimePerMetaChunkVBlank[],
453 double TimePerChromaMetaChunkVBlank[],
454 double TimePerMetaChunkFlip[],
455 double TimePerChromaMetaChunkFlip[],
456 double time_per_pte_group_nom_luma[],
457 double time_per_pte_group_vblank_luma[],
458 double time_per_pte_group_flip_luma[],
459 double time_per_pte_group_nom_chroma[],
460 double time_per_pte_group_vblank_chroma[],
461 double time_per_pte_group_flip_chroma[]);
462
463 static void CalculateVMGroupAndRequestTimes(
464 unsigned int NumberOfActivePlanes,
465 bool GPUVMEnable,
466 unsigned int GPUVMMaxPageTableLevels,
467 unsigned int HTotal[],
468 int BytePerPixelC[],
469 double DestinationLinesToRequestVMInVBlank[],
470 double DestinationLinesToRequestVMInImmediateFlip[],
471 bool DCCEnable[],
472 double PixelClock[],
473 int dpte_row_width_luma_ub[],
474 int dpte_row_width_chroma_ub[],
475 int vm_group_bytes[],
476 unsigned int dpde0_bytes_per_frame_ub_l[],
477 unsigned int dpde0_bytes_per_frame_ub_c[],
478 int meta_pte_bytes_per_frame_ub_l[],
479 int meta_pte_bytes_per_frame_ub_c[],
480 double TimePerVMGroupVBlank[],
481 double TimePerVMGroupFlip[],
482 double TimePerVMRequestVBlank[],
483 double TimePerVMRequestFlip[]);
484
485 static void CalculateStutterEfficiency(
486 struct display_mode_lib *mode_lib,
487 int CompressedBufferSizeInkByte,
488 bool UnboundedRequestEnabled,
489 int ConfigReturnBufferSizeInKByte,
490 int MetaFIFOSizeInKEntries,
491 int ZeroSizeBufferEntries,
492 int NumberOfActivePlanes,
493 int ROBBufferSizeInKByte,
494 double TotalDataReadBandwidth,
495 double DCFCLK,
496 double ReturnBW,
497 double COMPBUF_RESERVED_SPACE_64B,
498 double COMPBUF_RESERVED_SPACE_ZS,
499 double SRExitTime,
500 double SRExitZ8Time,
501 bool SynchronizedVBlank,
502 double Z8StutterEnterPlusExitWatermark,
503 double StutterEnterPlusExitWatermark,
504 bool ProgressiveToInterlaceUnitInOPP,
505 bool Interlace[],
506 double MinTTUVBlank[],
507 int DPPPerPlane[],
508 unsigned int DETBufferSizeY[],
509 int BytePerPixelY[],
510 double BytePerPixelDETY[],
511 double SwathWidthY[],
512 int SwathHeightY[],
513 int SwathHeightC[],
514 double NetDCCRateLuma[],
515 double NetDCCRateChroma[],
516 double DCCFractionOfZeroSizeRequestsLuma[],
517 double DCCFractionOfZeroSizeRequestsChroma[],
518 int HTotal[],
519 int VTotal[],
520 double PixelClock[],
521 double VRatio[],
522 enum scan_direction_class SourceScan[],
523 int BlockHeight256BytesY[],
524 int BlockWidth256BytesY[],
525 int BlockHeight256BytesC[],
526 int BlockWidth256BytesC[],
527 int DCCYMaxUncompressedBlock[],
528 int DCCCMaxUncompressedBlock[],
529 int VActive[],
530 bool DCCEnable[],
531 bool WritebackEnable[],
532 double ReadBandwidthPlaneLuma[],
533 double ReadBandwidthPlaneChroma[],
534 double meta_row_bw[],
535 double dpte_row_bw[],
536 double *StutterEfficiencyNotIncludingVBlank,
537 double *StutterEfficiency,
538 int *NumberOfStutterBurstsPerFrame,
539 double *Z8StutterEfficiencyNotIncludingVBlank,
540 double *Z8StutterEfficiency,
541 int *Z8NumberOfStutterBurstsPerFrame,
542 double *StutterPeriod);
543
544 static void CalculateSwathAndDETConfiguration(
545 bool ForceSingleDPP,
546 int NumberOfActivePlanes,
547 unsigned int DETBufferSizeInKByte,
548 double MaximumSwathWidthLuma[],
549 double MaximumSwathWidthChroma[],
550 enum scan_direction_class SourceScan[],
551 enum source_format_class SourcePixelFormat[],
552 enum dm_swizzle_mode SurfaceTiling[],
553 int ViewportWidth[],
554 int ViewportHeight[],
555 int SurfaceWidthY[],
556 int SurfaceWidthC[],
557 int SurfaceHeightY[],
558 int SurfaceHeightC[],
559 int Read256BytesBlockHeightY[],
560 int Read256BytesBlockHeightC[],
561 int Read256BytesBlockWidthY[],
562 int Read256BytesBlockWidthC[],
563 enum odm_combine_mode ODMCombineEnabled[],
564 int BlendingAndTiming[],
565 int BytePerPixY[],
566 int BytePerPixC[],
567 double BytePerPixDETY[],
568 double BytePerPixDETC[],
569 int HActive[],
570 double HRatio[],
571 double HRatioChroma[],
572 int DPPPerPlane[],
573 int swath_width_luma_ub[],
574 int swath_width_chroma_ub[],
575 double SwathWidth[],
576 double SwathWidthChroma[],
577 int SwathHeightY[],
578 int SwathHeightC[],
579 unsigned int DETBufferSizeY[],
580 unsigned int DETBufferSizeC[],
581 bool ViewportSizeSupportPerPlane[],
582 bool *ViewportSizeSupport);
583 static void CalculateSwathWidth(
584 bool ForceSingleDPP,
585 int NumberOfActivePlanes,
586 enum source_format_class SourcePixelFormat[],
587 enum scan_direction_class SourceScan[],
588 int ViewportWidth[],
589 int ViewportHeight[],
590 int SurfaceWidthY[],
591 int SurfaceWidthC[],
592 int SurfaceHeightY[],
593 int SurfaceHeightC[],
594 enum odm_combine_mode ODMCombineEnabled[],
595 int BytePerPixY[],
596 int BytePerPixC[],
597 int Read256BytesBlockHeightY[],
598 int Read256BytesBlockHeightC[],
599 int Read256BytesBlockWidthY[],
600 int Read256BytesBlockWidthC[],
601 int BlendingAndTiming[],
602 int HActive[],
603 double HRatio[],
604 int DPPPerPlane[],
605 double SwathWidthSingleDPPY[],
606 double SwathWidthSingleDPPC[],
607 double SwathWidthY[],
608 double SwathWidthC[],
609 int MaximumSwathHeightY[],
610 int MaximumSwathHeightC[],
611 int swath_width_luma_ub[],
612 int swath_width_chroma_ub[]);
613
614 static double CalculateExtraLatency(
615 int RoundTripPingLatencyCycles,
616 int ReorderingBytes,
617 double DCFCLK,
618 int TotalNumberOfActiveDPP,
619 int PixelChunkSizeInKByte,
620 int TotalNumberOfDCCActiveDPP,
621 int MetaChunkSize,
622 double ReturnBW,
623 bool GPUVMEnable,
624 bool HostVMEnable,
625 int NumberOfActivePlanes,
626 int NumberOfDPP[],
627 int dpte_group_bytes[],
628 double HostVMInefficiencyFactor,
629 double HostVMMinPageSize,
630 int HostVMMaxNonCachedPageTableLevels);
631
632 static double CalculateExtraLatencyBytes(
633 int ReorderingBytes,
634 int TotalNumberOfActiveDPP,
635 int PixelChunkSizeInKByte,
636 int TotalNumberOfDCCActiveDPP,
637 int MetaChunkSize,
638 bool GPUVMEnable,
639 bool HostVMEnable,
640 int NumberOfActivePlanes,
641 int NumberOfDPP[],
642 int dpte_group_bytes[],
643 double HostVMInefficiencyFactor,
644 double HostVMMinPageSize,
645 int HostVMMaxNonCachedPageTableLevels);
646
647 static double CalculateUrgentLatency(
648 double UrgentLatencyPixelDataOnly,
649 double UrgentLatencyPixelMixedWithVMData,
650 double UrgentLatencyVMDataOnly,
651 bool DoUrgentLatencyAdjustment,
652 double UrgentLatencyAdjustmentFabricClockComponent,
653 double UrgentLatencyAdjustmentFabricClockReference,
654 double FabricClockSingle);
655
656 static void CalculateUnboundedRequestAndCompressedBufferSize(
657 unsigned int DETBufferSizeInKByte,
658 int ConfigReturnBufferSizeInKByte,
659 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
660 int TotalActiveDPP,
661 bool NoChromaPlanes,
662 int MaxNumDPP,
663 int CompressedBufferSegmentSizeInkByteFinal,
664 enum output_encoder_class *Output,
665 bool *UnboundedRequestEnabled,
666 int *CompressedBufferSizeInkByte);
667
668 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
669 static unsigned int CalculateMaxVStartup(
670 unsigned int VTotal,
671 unsigned int VActive,
672 unsigned int VBlankNom,
673 unsigned int HTotal,
674 double PixelClock,
675 bool ProgressiveTointerlaceUnitinOPP,
676 bool Interlace,
677 unsigned int VBlankNomDefaultUS,
678 double WritebackDelayTime);
679
dml314_recalculate(struct display_mode_lib * mode_lib)680 void dml314_recalculate(struct display_mode_lib *mode_lib)
681 {
682 ModeSupportAndSystemConfiguration(mode_lib);
683 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
684 DisplayPipeConfiguration(mode_lib);
685 #ifdef __DML_VBA_DEBUG__
686 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
687 #endif
688 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
689 }
690
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)691 static unsigned int dscceComputeDelay(
692 unsigned int bpc,
693 double BPP,
694 unsigned int sliceWidth,
695 unsigned int numSlices,
696 enum output_format_class pixelFormat,
697 enum output_encoder_class Output)
698 {
699 // valid bpc = source bits per component in the set of {8, 10, 12}
700 // valid bpp = increments of 1/16 of a bit
701 // min = 6/7/8 in N420/N422/444, respectively
702 // max = such that compression is 1:1
703 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
704 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
705 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
706
707 // fixed value
708 unsigned int rcModelSize = 8192;
709
710 // N422/N420 operate at 2 pixels per clock
711 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
712
713 if (pixelFormat == dm_420)
714 pixelsPerClock = 2;
715 else if (pixelFormat == dm_444)
716 pixelsPerClock = 1;
717 else if (pixelFormat == dm_n422)
718 pixelsPerClock = 2;
719 // #all other modes operate at 1 pixel per clock
720 else
721 pixelsPerClock = 1;
722
723 //initial transmit delay as per PPS
724 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
725
726 //compute ssm delay
727 if (bpc == 8)
728 D = 81;
729 else if (bpc == 10)
730 D = 89;
731 else
732 D = 113;
733
734 //divide by pixel per cycle to compute slice width as seen by DSC
735 w = sliceWidth / pixelsPerClock;
736
737 //422 mode has an additional cycle of delay
738 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
739 s = 0;
740 else
741 s = 1;
742
743 //main calculation for the dscce
744 ix = initalXmitDelay + 45;
745 wx = (w + 2) / 3;
746 P = 3 * wx - w;
747 l0 = ix / w;
748 a = ix + P * l0;
749 ax = (a + 2) / 3 + D + 6 + 1;
750 L = (ax + wx - 1) / wx;
751 if ((ix % w) == 0 && P != 0)
752 lstall = 1;
753 else
754 lstall = 0;
755 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
756
757 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
758 pixels = Delay * 3 * pixelsPerClock;
759 return pixels;
760 }
761
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)762 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
763 {
764 unsigned int Delay = 0;
765
766 if (pixelFormat == dm_420) {
767 // sfr
768 Delay = Delay + 2;
769 // dsccif
770 Delay = Delay + 0;
771 // dscc - input deserializer
772 Delay = Delay + 3;
773 // dscc gets pixels every other cycle
774 Delay = Delay + 2;
775 // dscc - input cdc fifo
776 Delay = Delay + 12;
777 // dscc gets pixels every other cycle
778 Delay = Delay + 13;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 7;
783 // dscc gets pixels every other cycle
784 Delay = Delay + 3;
785 // dscc - cdc uncertainty
786 Delay = Delay + 2;
787 // dscc - output serializer
788 Delay = Delay + 1;
789 // sft
790 Delay = Delay + 1;
791 } else if (pixelFormat == dm_n422) {
792 // sfr
793 Delay = Delay + 2;
794 // dsccif
795 Delay = Delay + 1;
796 // dscc - input deserializer
797 Delay = Delay + 5;
798 // dscc - input cdc fifo
799 Delay = Delay + 25;
800 // dscc - cdc uncertainty
801 Delay = Delay + 2;
802 // dscc - output cdc fifo
803 Delay = Delay + 10;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // dscc - output serializer
807 Delay = Delay + 1;
808 // sft
809 Delay = Delay + 1;
810 } else {
811 // sfr
812 Delay = Delay + 2;
813 // dsccif
814 Delay = Delay + 0;
815 // dscc - input deserializer
816 Delay = Delay + 3;
817 // dscc - input cdc fifo
818 Delay = Delay + 12;
819 // dscc - cdc uncertainty
820 Delay = Delay + 2;
821 // dscc - output cdc fifo
822 Delay = Delay + 7;
823 // dscc - output serializer
824 Delay = Delay + 1;
825 // dscc - cdc uncertainty
826 Delay = Delay + 2;
827 // sft
828 Delay = Delay + 1;
829 }
830
831 return Delay;
832 }
833
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)834 static bool CalculatePrefetchSchedule(
835 struct display_mode_lib *mode_lib,
836 double HostVMInefficiencyFactor,
837 Pipe *myPipe,
838 unsigned int DSCDelay,
839 double DPPCLKDelaySubtotalPlusCNVCFormater,
840 double DPPCLKDelaySCL,
841 double DPPCLKDelaySCLLBOnly,
842 double DPPCLKDelayCNVCCursor,
843 double DISPCLKDelaySubtotal,
844 unsigned int DPP_RECOUT_WIDTH,
845 enum output_format_class OutputFormat,
846 unsigned int MaxInterDCNTileRepeaters,
847 unsigned int VStartup,
848 unsigned int MaxVStartup,
849 unsigned int GPUVMPageTableLevels,
850 bool GPUVMEnable,
851 bool HostVMEnable,
852 unsigned int HostVMMaxNonCachedPageTableLevels,
853 double HostVMMinPageSize,
854 bool DynamicMetadataEnable,
855 bool DynamicMetadataVMEnabled,
856 int DynamicMetadataLinesBeforeActiveRequired,
857 unsigned int DynamicMetadataTransmittedBytes,
858 double UrgentLatency,
859 double UrgentExtraLatency,
860 double TCalc,
861 unsigned int PDEAndMetaPTEBytesFrame,
862 unsigned int MetaRowByte,
863 unsigned int PixelPTEBytesPerRow,
864 double PrefetchSourceLinesY,
865 unsigned int SwathWidthY,
866 double VInitPreFillY,
867 unsigned int MaxNumSwathY,
868 double PrefetchSourceLinesC,
869 unsigned int SwathWidthC,
870 double VInitPreFillC,
871 unsigned int MaxNumSwathC,
872 int swath_width_luma_ub,
873 int swath_width_chroma_ub,
874 unsigned int SwathHeightY,
875 unsigned int SwathHeightC,
876 double TWait,
877 double *DSTXAfterScaler,
878 double *DSTYAfterScaler,
879 double *DestinationLinesForPrefetch,
880 double *PrefetchBandwidth,
881 double *DestinationLinesToRequestVMInVBlank,
882 double *DestinationLinesToRequestRowInVBlank,
883 double *VRatioPrefetchY,
884 double *VRatioPrefetchC,
885 double *RequiredPrefetchPixDataBWLuma,
886 double *RequiredPrefetchPixDataBWChroma,
887 bool *NotEnoughTimeForDynamicMetadata,
888 double *Tno_bw,
889 double *prefetch_vmrow_bw,
890 double *Tdmdl_vm,
891 double *Tdmdl,
892 double *TSetup,
893 int *VUpdateOffsetPix,
894 double *VUpdateWidthPix,
895 double *VReadyOffsetPix)
896 {
897 bool MyError = false;
898 unsigned int DPPCycles, DISPCLKCycles;
899 double DSTTotalPixelsAfterScaler;
900 double LineTime;
901 double dst_y_prefetch_equ;
902 #ifdef __DML_VBA_DEBUG__
903 double Tsw_oto;
904 #endif
905 double prefetch_bw_oto;
906 double prefetch_bw_pr;
907 double Tvm_oto;
908 double Tr0_oto;
909 double Tvm_oto_lines;
910 double Tr0_oto_lines;
911 double dst_y_prefetch_oto;
912 double TimeForFetchingMetaPTE = 0;
913 double TimeForFetchingRowInVBlank = 0;
914 double LinesToRequestPrefetchPixelData = 0;
915 unsigned int HostVMDynamicLevelsTrips;
916 double trip_to_mem;
917 double Tvm_trips;
918 double Tr0_trips;
919 double Tvm_trips_rounded;
920 double Tr0_trips_rounded;
921 double Lsw_oto;
922 double Tpre_rounded;
923 double prefetch_bw_equ;
924 double Tvm_equ;
925 double Tr0_equ;
926 double Tdmbf;
927 double Tdmec;
928 double Tdmsks;
929 double prefetch_sw_bytes;
930 double bytes_pp;
931 double dep_bytes;
932 int max_vratio_pre = 4;
933 double min_Lsw;
934 double Tsw_est1 = 0;
935 double Tsw_est3 = 0;
936 double max_Tsw = 0;
937
938 if (GPUVMEnable == true && HostVMEnable == true) {
939 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
940 } else {
941 HostVMDynamicLevelsTrips = 0;
942 }
943 #ifdef __DML_VBA_DEBUG__
944 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
945 #endif
946 CalculateVupdateAndDynamicMetadataParameters(
947 MaxInterDCNTileRepeaters,
948 myPipe->DPPCLK,
949 myPipe->DISPCLK,
950 myPipe->DCFCLKDeepSleep,
951 myPipe->PixelClock,
952 myPipe->HTotal,
953 myPipe->VBlank,
954 DynamicMetadataTransmittedBytes,
955 DynamicMetadataLinesBeforeActiveRequired,
956 myPipe->InterlaceEnable,
957 myPipe->ProgressiveToInterlaceUnitInOPP,
958 TSetup,
959 &Tdmbf,
960 &Tdmec,
961 &Tdmsks,
962 VUpdateOffsetPix,
963 VUpdateWidthPix,
964 VReadyOffsetPix);
965
966 LineTime = myPipe->HTotal / myPipe->PixelClock;
967 trip_to_mem = UrgentLatency;
968 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
969
970 #ifdef __DML_VBA_ALLOW_DELTA__
971 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
972 #else
973 if (DynamicMetadataVMEnabled == true) {
974 #endif
975 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
976 } else {
977 *Tdmdl = TWait + UrgentExtraLatency;
978 }
979
980 #ifdef __DML_VBA_ALLOW_DELTA__
981 if (DynamicMetadataEnable == false) {
982 *Tdmdl = 0.0;
983 }
984 #endif
985
986 if (DynamicMetadataEnable == true) {
987 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
988 *NotEnoughTimeForDynamicMetadata = true;
989 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
990 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
991 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
992 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
993 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
994 } else {
995 *NotEnoughTimeForDynamicMetadata = false;
996 }
997 } else {
998 *NotEnoughTimeForDynamicMetadata = false;
999 }
1000
1001 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1002
1003 if (myPipe->ScalerEnabled)
1004 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1005 else
1006 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1007
1008 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1009
1010 DISPCLKCycles = DISPCLKDelaySubtotal;
1011
1012 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1013 return true;
1014
1015 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1016
1017 #ifdef __DML_VBA_DEBUG__
1018 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1019 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1020 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1021 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1022 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1023 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1024 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1025 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1026 #endif
1027
1028 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1029
1030 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1031 *DSTYAfterScaler = 1;
1032 else
1033 *DSTYAfterScaler = 0;
1034
1035 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1036 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1037 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1038
1039 #ifdef __DML_VBA_DEBUG__
1040 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1041 #endif
1042
1043 MyError = false;
1044
1045 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1046 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1047 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1048
1049 #ifdef __DML_VBA_ALLOW_DELTA__
1050 if (!myPipe->DCCEnable) {
1051 Tr0_trips = 0.0;
1052 Tr0_trips_rounded = 0.0;
1053 }
1054 #endif
1055
1056 if (!GPUVMEnable) {
1057 Tvm_trips = 0.0;
1058 Tvm_trips_rounded = 0.0;
1059 }
1060
1061 if (GPUVMEnable) {
1062 if (GPUVMPageTableLevels >= 3) {
1063 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1064 } else {
1065 *Tno_bw = 0;
1066 }
1067 } else if (!myPipe->DCCEnable) {
1068 *Tno_bw = LineTime;
1069 } else {
1070 *Tno_bw = LineTime / 4;
1071 }
1072
1073 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1074 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1075 else
1076 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1077 /*rev 99*/
1078 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1079 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1080 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1081 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1082 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1083
1084 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1085 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1086 #ifdef __DML_VBA_DEBUG__
1087 Tsw_oto = Lsw_oto * LineTime;
1088 #endif
1089
1090
1091 #ifdef __DML_VBA_DEBUG__
1092 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1093 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1094 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1095 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1096 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1097 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1098 #endif
1099
1100 if (GPUVMEnable == true)
1101 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1102 else
1103 Tvm_oto = LineTime / 4.0;
1104
1105 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1106 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1107 LineTime - Tvm_oto,
1108 LineTime / 4);
1109 } else {
1110 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1111 }
1112
1113 #ifdef __DML_VBA_DEBUG__
1114 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1115 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1116 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1117 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1118 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1119 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1120 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1121 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1122 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1123 #endif
1124
1125 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1126 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1127 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1128 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1129 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1130 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1131
1132 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1133
1134 if (prefetch_sw_bytes < dep_bytes)
1135 prefetch_sw_bytes = 2 * dep_bytes;
1136
1137 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1138 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1139 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1140 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1141 dml_print("DML: LineTime: %f\n", LineTime);
1142 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1143
1144 dml_print("DML: LineTime: %f\n", LineTime);
1145 dml_print("DML: VStartup: %d\n", VStartup);
1146 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1147 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1148 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1149 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1150 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1151 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1152 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1153 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1154 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1155 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1156 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1157
1158 *PrefetchBandwidth = 0;
1159 *DestinationLinesToRequestVMInVBlank = 0;
1160 *DestinationLinesToRequestRowInVBlank = 0;
1161 *VRatioPrefetchY = 0;
1162 *VRatioPrefetchC = 0;
1163 *RequiredPrefetchPixDataBWLuma = 0;
1164 if (dst_y_prefetch_equ > 1) {
1165 double PrefetchBandwidth1;
1166 double PrefetchBandwidth2;
1167 double PrefetchBandwidth3;
1168 double PrefetchBandwidth4;
1169
1170 if (Tpre_rounded - *Tno_bw > 0) {
1171 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1172 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1173 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1174 } else {
1175 PrefetchBandwidth1 = 0;
1176 }
1177
1178 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1179 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1180 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1181 }
1182
1183 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1184 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1185 else
1186 PrefetchBandwidth2 = 0;
1187
1188 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1189 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1190 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1191 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1192 } else {
1193 PrefetchBandwidth3 = 0;
1194 }
1195
1196 #ifdef __DML_VBA_DEBUG__
1197 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1198 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1199 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1200 #endif
1201 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1202 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1203 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1204 }
1205
1206 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1207 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1208 else
1209 PrefetchBandwidth4 = 0;
1210
1211 {
1212 bool Case1OK;
1213 bool Case2OK;
1214 bool Case3OK;
1215
1216 if (PrefetchBandwidth1 > 0) {
1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1219 Case1OK = true;
1220 } else {
1221 Case1OK = false;
1222 }
1223 } else {
1224 Case1OK = false;
1225 }
1226
1227 if (PrefetchBandwidth2 > 0) {
1228 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1229 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1230 Case2OK = true;
1231 } else {
1232 Case2OK = false;
1233 }
1234 } else {
1235 Case2OK = false;
1236 }
1237
1238 if (PrefetchBandwidth3 > 0) {
1239 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1240 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1241 Case3OK = true;
1242 } else {
1243 Case3OK = false;
1244 }
1245 } else {
1246 Case3OK = false;
1247 }
1248
1249 if (Case1OK) {
1250 prefetch_bw_equ = PrefetchBandwidth1;
1251 } else if (Case2OK) {
1252 prefetch_bw_equ = PrefetchBandwidth2;
1253 } else if (Case3OK) {
1254 prefetch_bw_equ = PrefetchBandwidth3;
1255 } else {
1256 prefetch_bw_equ = PrefetchBandwidth4;
1257 }
1258
1259 #ifdef __DML_VBA_DEBUG__
1260 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1261 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1262 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1263 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1264 #endif
1265
1266 if (prefetch_bw_equ > 0) {
1267 if (GPUVMEnable == true) {
1268 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1269 } else {
1270 Tvm_equ = LineTime / 4;
1271 }
1272
1273 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1274 Tr0_equ = dml_max4(
1275 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1276 Tr0_trips,
1277 (LineTime - Tvm_equ) / 2,
1278 LineTime / 4);
1279 } else {
1280 Tr0_equ = (LineTime - Tvm_equ) / 2;
1281 }
1282 } else {
1283 Tvm_equ = 0;
1284 Tr0_equ = 0;
1285 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1286 }
1287 }
1288
1289 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1290 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1291 TimeForFetchingMetaPTE = Tvm_oto;
1292 TimeForFetchingRowInVBlank = Tr0_oto;
1293 *PrefetchBandwidth = prefetch_bw_oto;
1294 } else {
1295 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1296 TimeForFetchingMetaPTE = Tvm_equ;
1297 TimeForFetchingRowInVBlank = Tr0_equ;
1298 *PrefetchBandwidth = prefetch_bw_equ;
1299 }
1300
1301 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1302
1303 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1304
1305 #ifdef __DML_VBA_ALLOW_DELTA__
1306 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1307 // See note above dated 5/30/2018
1308 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1309 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1310 #else
1311 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1312 #endif
1313
1314 #ifdef __DML_VBA_DEBUG__
1315 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1316 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1317 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1318 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1319 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1320 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1321 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1322 #endif
1323
1324 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1325
1326 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1327 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1328 #ifdef __DML_VBA_DEBUG__
1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1330 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1331 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1332 #endif
1333 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1334 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1335 *VRatioPrefetchY = dml_max(
1336 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1337 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1338 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1339 } else {
1340 MyError = true;
1341 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1342 *VRatioPrefetchY = 0;
1343 }
1344 #ifdef __DML_VBA_DEBUG__
1345 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1346 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1347 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1348 #endif
1349 }
1350
1351 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1352 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1353
1354 #ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1356 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1357 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1358 #endif
1359 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1360 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1361 *VRatioPrefetchC = dml_max(
1362 *VRatioPrefetchC,
1363 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1364 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1365 } else {
1366 MyError = true;
1367 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1368 *VRatioPrefetchC = 0;
1369 }
1370 #ifdef __DML_VBA_DEBUG__
1371 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1372 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1373 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1374 #endif
1375 }
1376
1377 #ifdef __DML_VBA_DEBUG__
1378 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1379 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1380 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1381 #endif
1382
1383 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1384
1385 #ifdef __DML_VBA_DEBUG__
1386 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1387 #endif
1388
1389 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1390 / LineTime;
1391 } else {
1392 MyError = true;
1393 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1394 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1395 *VRatioPrefetchY = 0;
1396 *VRatioPrefetchC = 0;
1397 *RequiredPrefetchPixDataBWLuma = 0;
1398 *RequiredPrefetchPixDataBWChroma = 0;
1399 }
1400
1401 dml_print(
1402 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1403 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1404 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1405 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1406 dml_print(
1407 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1408 (double) LinesToRequestPrefetchPixelData * LineTime);
1409 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1410 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1411 dml_print(
1412 "DML: Tslack(pre): %fus - time left over in schedule\n",
1413 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1414 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1415 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1416
1417 } else {
1418 MyError = true;
1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1420 }
1421
1422 {
1423 double prefetch_vm_bw;
1424 double prefetch_row_bw;
1425
1426 if (PDEAndMetaPTEBytesFrame == 0) {
1427 prefetch_vm_bw = 0;
1428 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1429 #ifdef __DML_VBA_DEBUG__
1430 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1431 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1432 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1433 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1434 #endif
1435 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1436 #ifdef __DML_VBA_DEBUG__
1437 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1438 #endif
1439 } else {
1440 prefetch_vm_bw = 0;
1441 MyError = true;
1442 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1443 }
1444
1445 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1446 prefetch_row_bw = 0;
1447 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1448 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1449
1450 #ifdef __DML_VBA_DEBUG__
1451 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1452 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1453 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1454 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1455 #endif
1456 } else {
1457 prefetch_row_bw = 0;
1458 MyError = true;
1459 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1460 }
1461
1462 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1463 }
1464
1465 if (MyError) {
1466 *PrefetchBandwidth = 0;
1467 TimeForFetchingMetaPTE = 0;
1468 TimeForFetchingRowInVBlank = 0;
1469 *DestinationLinesToRequestVMInVBlank = 0;
1470 *DestinationLinesToRequestRowInVBlank = 0;
1471 *DestinationLinesForPrefetch = 0;
1472 LinesToRequestPrefetchPixelData = 0;
1473 *VRatioPrefetchY = 0;
1474 *VRatioPrefetchC = 0;
1475 *RequiredPrefetchPixDataBWLuma = 0;
1476 *RequiredPrefetchPixDataBWChroma = 0;
1477 }
1478
1479 return MyError;
1480 }
1481
1482 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1483 {
1484 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1485 }
1486
1487 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1488 {
1489 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1490 }
1491
1492 static void CalculateDCCConfiguration(
1493 bool DCCEnabled,
1494 bool DCCProgrammingAssumesScanDirectionUnknown,
1495 enum source_format_class SourcePixelFormat,
1496 unsigned int SurfaceWidthLuma,
1497 unsigned int SurfaceWidthChroma,
1498 unsigned int SurfaceHeightLuma,
1499 unsigned int SurfaceHeightChroma,
1500 double DETBufferSize,
1501 unsigned int RequestHeight256ByteLuma,
1502 unsigned int RequestHeight256ByteChroma,
1503 enum dm_swizzle_mode TilingFormat,
1504 unsigned int BytePerPixelY,
1505 unsigned int BytePerPixelC,
1506 double BytePerPixelDETY,
1507 double BytePerPixelDETC,
1508 enum scan_direction_class ScanOrientation,
1509 unsigned int *MaxUncompressedBlockLuma,
1510 unsigned int *MaxUncompressedBlockChroma,
1511 unsigned int *MaxCompressedBlockLuma,
1512 unsigned int *MaxCompressedBlockChroma,
1513 unsigned int *IndependentBlockLuma,
1514 unsigned int *IndependentBlockChroma)
1515 {
1516 int yuv420;
1517 int horz_div_l;
1518 int horz_div_c;
1519 int vert_div_l;
1520 int vert_div_c;
1521
1522 int swath_buf_size;
1523 double detile_buf_vp_horz_limit;
1524 double detile_buf_vp_vert_limit;
1525
1526 int MAS_vp_horz_limit;
1527 int MAS_vp_vert_limit;
1528 int max_vp_horz_width;
1529 int max_vp_vert_height;
1530 int eff_surf_width_l;
1531 int eff_surf_width_c;
1532 int eff_surf_height_l;
1533 int eff_surf_height_c;
1534
1535 int full_swath_bytes_horz_wc_l;
1536 int full_swath_bytes_horz_wc_c;
1537 int full_swath_bytes_vert_wc_l;
1538 int full_swath_bytes_vert_wc_c;
1539 int req128_horz_wc_l;
1540 int req128_horz_wc_c;
1541 int req128_vert_wc_l;
1542 int req128_vert_wc_c;
1543 int segment_order_horz_contiguous_luma;
1544 int segment_order_horz_contiguous_chroma;
1545 int segment_order_vert_contiguous_luma;
1546 int segment_order_vert_contiguous_chroma;
1547
1548 typedef enum {
1549 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1550 } RequestType;
1551 RequestType RequestLuma;
1552 RequestType RequestChroma;
1553
1554 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1555 horz_div_l = 1;
1556 horz_div_c = 1;
1557 vert_div_l = 1;
1558 vert_div_c = 1;
1559
1560 if (BytePerPixelY == 1)
1561 vert_div_l = 0;
1562 if (BytePerPixelC == 1)
1563 vert_div_c = 0;
1564 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1565 horz_div_l = 0;
1566 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1567 horz_div_c = 0;
1568
1569 if (BytePerPixelC == 0) {
1570 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1571 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1572 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1573 } else {
1574 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1575 detile_buf_vp_horz_limit = (double) swath_buf_size
1576 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1577 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1578 detile_buf_vp_vert_limit = (double) swath_buf_size
1579 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1580 }
1581
1582 if (SourcePixelFormat == dm_420_10) {
1583 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1584 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1585 }
1586
1587 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1588 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1589
1590 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1591 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1592 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1593 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1594 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1595 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1596 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1597 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1598
1599 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1600 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1601 if (BytePerPixelC > 0) {
1602 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1603 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1604 } else {
1605 full_swath_bytes_horz_wc_c = 0;
1606 full_swath_bytes_vert_wc_c = 0;
1607 }
1608
1609 if (SourcePixelFormat == dm_420_10) {
1610 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1611 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1612 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1613 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1614 }
1615
1616 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1617 req128_horz_wc_l = 0;
1618 req128_horz_wc_c = 0;
1619 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1620 req128_horz_wc_l = 0;
1621 req128_horz_wc_c = 1;
1622 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1623 req128_horz_wc_l = 1;
1624 req128_horz_wc_c = 0;
1625 } else {
1626 req128_horz_wc_l = 1;
1627 req128_horz_wc_c = 1;
1628 }
1629
1630 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1631 req128_vert_wc_l = 0;
1632 req128_vert_wc_c = 0;
1633 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1634 req128_vert_wc_l = 0;
1635 req128_vert_wc_c = 1;
1636 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1637 req128_vert_wc_l = 1;
1638 req128_vert_wc_c = 0;
1639 } else {
1640 req128_vert_wc_l = 1;
1641 req128_vert_wc_c = 1;
1642 }
1643
1644 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1645 segment_order_horz_contiguous_luma = 0;
1646 } else {
1647 segment_order_horz_contiguous_luma = 1;
1648 }
1649 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1650 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1651 segment_order_vert_contiguous_luma = 0;
1652 } else {
1653 segment_order_vert_contiguous_luma = 1;
1654 }
1655 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1656 segment_order_horz_contiguous_chroma = 0;
1657 } else {
1658 segment_order_horz_contiguous_chroma = 1;
1659 }
1660 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1661 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1662 segment_order_vert_contiguous_chroma = 0;
1663 } else {
1664 segment_order_vert_contiguous_chroma = 1;
1665 }
1666
1667 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1668 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1669 RequestLuma = REQ_256Bytes;
1670 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1671 RequestLuma = REQ_128BytesNonContiguous;
1672 } else {
1673 RequestLuma = REQ_128BytesContiguous;
1674 }
1675 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1676 RequestChroma = REQ_256Bytes;
1677 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1678 RequestChroma = REQ_128BytesNonContiguous;
1679 } else {
1680 RequestChroma = REQ_128BytesContiguous;
1681 }
1682 } else if (ScanOrientation != dm_vert) {
1683 if (req128_horz_wc_l == 0) {
1684 RequestLuma = REQ_256Bytes;
1685 } else if (segment_order_horz_contiguous_luma == 0) {
1686 RequestLuma = REQ_128BytesNonContiguous;
1687 } else {
1688 RequestLuma = REQ_128BytesContiguous;
1689 }
1690 if (req128_horz_wc_c == 0) {
1691 RequestChroma = REQ_256Bytes;
1692 } else if (segment_order_horz_contiguous_chroma == 0) {
1693 RequestChroma = REQ_128BytesNonContiguous;
1694 } else {
1695 RequestChroma = REQ_128BytesContiguous;
1696 }
1697 } else {
1698 if (req128_vert_wc_l == 0) {
1699 RequestLuma = REQ_256Bytes;
1700 } else if (segment_order_vert_contiguous_luma == 0) {
1701 RequestLuma = REQ_128BytesNonContiguous;
1702 } else {
1703 RequestLuma = REQ_128BytesContiguous;
1704 }
1705 if (req128_vert_wc_c == 0) {
1706 RequestChroma = REQ_256Bytes;
1707 } else if (segment_order_vert_contiguous_chroma == 0) {
1708 RequestChroma = REQ_128BytesNonContiguous;
1709 } else {
1710 RequestChroma = REQ_128BytesContiguous;
1711 }
1712 }
1713
1714 if (RequestLuma == REQ_256Bytes) {
1715 *MaxUncompressedBlockLuma = 256;
1716 *MaxCompressedBlockLuma = 256;
1717 *IndependentBlockLuma = 0;
1718 } else if (RequestLuma == REQ_128BytesContiguous) {
1719 *MaxUncompressedBlockLuma = 256;
1720 *MaxCompressedBlockLuma = 128;
1721 *IndependentBlockLuma = 128;
1722 } else {
1723 *MaxUncompressedBlockLuma = 256;
1724 *MaxCompressedBlockLuma = 64;
1725 *IndependentBlockLuma = 64;
1726 }
1727
1728 if (RequestChroma == REQ_256Bytes) {
1729 *MaxUncompressedBlockChroma = 256;
1730 *MaxCompressedBlockChroma = 256;
1731 *IndependentBlockChroma = 0;
1732 } else if (RequestChroma == REQ_128BytesContiguous) {
1733 *MaxUncompressedBlockChroma = 256;
1734 *MaxCompressedBlockChroma = 128;
1735 *IndependentBlockChroma = 128;
1736 } else {
1737 *MaxUncompressedBlockChroma = 256;
1738 *MaxCompressedBlockChroma = 64;
1739 *IndependentBlockChroma = 64;
1740 }
1741
1742 if (DCCEnabled != true || BytePerPixelC == 0) {
1743 *MaxUncompressedBlockChroma = 0;
1744 *MaxCompressedBlockChroma = 0;
1745 *IndependentBlockChroma = 0;
1746 }
1747
1748 if (DCCEnabled != true) {
1749 *MaxUncompressedBlockLuma = 0;
1750 *MaxCompressedBlockLuma = 0;
1751 *IndependentBlockLuma = 0;
1752 }
1753 }
1754
1755 static double CalculatePrefetchSourceLines(
1756 struct display_mode_lib *mode_lib,
1757 double VRatio,
1758 double vtaps,
1759 bool Interlace,
1760 bool ProgressiveToInterlaceUnitInOPP,
1761 unsigned int SwathHeight,
1762 unsigned int ViewportYStart,
1763 double *VInitPreFill,
1764 unsigned int *MaxNumSwath)
1765 {
1766 struct vba_vars_st *v = &mode_lib->vba;
1767 unsigned int MaxPartialSwath;
1768
1769 if (ProgressiveToInterlaceUnitInOPP)
1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1771 else
1772 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1773
1774 if (!v->IgnoreViewportPositioning) {
1775
1776 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1777
1778 if (*VInitPreFill > 1.0)
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1780 else
1781 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1782 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1783
1784 } else {
1785
1786 if (ViewportYStart != 0)
1787 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1788
1789 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1790
1791 if (*VInitPreFill > 1.0)
1792 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1793 else
1794 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1795 }
1796
1797 #ifdef __DML_VBA_DEBUG__
1798 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1799 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1800 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1801 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1802 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1803 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1804 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1805 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1806 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1807 #endif
1808 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1809 }
1810
1811 static unsigned int CalculateVMAndRowBytes(
1812 struct display_mode_lib *mode_lib,
1813 bool DCCEnable,
1814 unsigned int BlockHeight256Bytes,
1815 unsigned int BlockWidth256Bytes,
1816 enum source_format_class SourcePixelFormat,
1817 unsigned int SurfaceTiling,
1818 unsigned int BytePerPixel,
1819 enum scan_direction_class ScanDirection,
1820 unsigned int SwathWidth,
1821 unsigned int ViewportHeight,
1822 bool GPUVMEnable,
1823 bool HostVMEnable,
1824 unsigned int HostVMMaxNonCachedPageTableLevels,
1825 unsigned int GPUVMMinPageSize,
1826 unsigned int HostVMMinPageSize,
1827 unsigned int PTEBufferSizeInRequests,
1828 unsigned int Pitch,
1829 unsigned int DCCMetaPitch,
1830 unsigned int *MacroTileWidth,
1831 unsigned int *MetaRowByte,
1832 unsigned int *PixelPTEBytesPerRow,
1833 bool *PTEBufferSizeNotExceeded,
1834 int *dpte_row_width_ub,
1835 unsigned int *dpte_row_height,
1836 unsigned int *MetaRequestWidth,
1837 unsigned int *MetaRequestHeight,
1838 unsigned int *meta_row_width,
1839 unsigned int *meta_row_height,
1840 int *vm_group_bytes,
1841 unsigned int *dpte_group_bytes,
1842 unsigned int *PixelPTEReqWidth,
1843 unsigned int *PixelPTEReqHeight,
1844 unsigned int *PTERequestSize,
1845 int *DPDE0BytesFrame,
1846 int *MetaPTEBytesFrame)
1847 {
1848 struct vba_vars_st *v = &mode_lib->vba;
1849 unsigned int MPDEBytesFrame;
1850 unsigned int DCCMetaSurfaceBytes;
1851 unsigned int MacroTileSizeBytes;
1852 unsigned int MacroTileHeight;
1853 unsigned int ExtraDPDEBytesFrame;
1854 unsigned int PDEAndMetaPTEBytesFrame;
1855 unsigned int PixelPTEReqHeightPTEs = 0;
1856 unsigned int HostVMDynamicLevels = 0;
1857 double FractionOfPTEReturnDrop;
1858
1859 if (GPUVMEnable == true && HostVMEnable == true) {
1860 if (HostVMMinPageSize < 2048) {
1861 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1862 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1864 } else {
1865 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1866 }
1867 }
1868
1869 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1870 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1871 if (ScanDirection != dm_vert) {
1872 *meta_row_height = *MetaRequestHeight;
1873 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1874 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1875 } else {
1876 *meta_row_height = *MetaRequestWidth;
1877 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1878 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1879 }
1880 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1881 if (GPUVMEnable == true) {
1882 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1883 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1884 } else {
1885 *MetaPTEBytesFrame = 0;
1886 MPDEBytesFrame = 0;
1887 }
1888
1889 if (DCCEnable != true) {
1890 *MetaPTEBytesFrame = 0;
1891 MPDEBytesFrame = 0;
1892 *MetaRowByte = 0;
1893 }
1894
1895 if (SurfaceTiling == dm_sw_linear) {
1896 MacroTileSizeBytes = 256;
1897 MacroTileHeight = BlockHeight256Bytes;
1898 } else {
1899 MacroTileSizeBytes = 65536;
1900 MacroTileHeight = 16 * BlockHeight256Bytes;
1901 }
1902 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1903
1904 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1905 if (ScanDirection != dm_vert) {
1906 *DPDE0BytesFrame = 64
1907 * (dml_ceil(
1908 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1909 / (8 * 2097152),
1910 1) + 1);
1911 } else {
1912 *DPDE0BytesFrame = 64
1913 * (dml_ceil(
1914 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1915 / (8 * 2097152),
1916 1) + 1);
1917 }
1918 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1919 } else {
1920 *DPDE0BytesFrame = 0;
1921 ExtraDPDEBytesFrame = 0;
1922 }
1923
1924 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1925
1926 #ifdef __DML_VBA_DEBUG__
1927 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1928 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1929 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1930 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1931 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1932 #endif
1933
1934 if (HostVMEnable == true) {
1935 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1936 }
1937 #ifdef __DML_VBA_DEBUG__
1938 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1939 #endif
1940
1941 if (SurfaceTiling == dm_sw_linear) {
1942 PixelPTEReqHeightPTEs = 1;
1943 *PixelPTEReqHeight = 1;
1944 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1945 *PTERequestSize = 64;
1946 FractionOfPTEReturnDrop = 0;
1947 } else if (MacroTileSizeBytes == 4096) {
1948 PixelPTEReqHeightPTEs = 1;
1949 *PixelPTEReqHeight = MacroTileHeight;
1950 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1951 *PTERequestSize = 64;
1952 if (ScanDirection != dm_vert)
1953 FractionOfPTEReturnDrop = 0;
1954 else
1955 FractionOfPTEReturnDrop = 7 / 8;
1956 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1957 PixelPTEReqHeightPTEs = 16;
1958 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1959 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1960 *PTERequestSize = 128;
1961 FractionOfPTEReturnDrop = 0;
1962 } else {
1963 PixelPTEReqHeightPTEs = 1;
1964 *PixelPTEReqHeight = MacroTileHeight;
1965 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1966 *PTERequestSize = 64;
1967 FractionOfPTEReturnDrop = 0;
1968 }
1969
1970 if (SurfaceTiling == dm_sw_linear) {
1971 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1972 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1973 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1974 } else if (ScanDirection != dm_vert) {
1975 *dpte_row_height = *PixelPTEReqHeight;
1976 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1977 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1978 } else {
1979 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1980 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1981 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1982 }
1983
1984 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1985 *PTEBufferSizeNotExceeded = true;
1986 } else {
1987 *PTEBufferSizeNotExceeded = false;
1988 }
1989
1990 if (GPUVMEnable != true) {
1991 *PixelPTEBytesPerRow = 0;
1992 *PTEBufferSizeNotExceeded = true;
1993 }
1994
1995 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1996
1997 if (HostVMEnable == true) {
1998 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1999 }
2000
2001 if (HostVMEnable == true) {
2002 *vm_group_bytes = 512;
2003 *dpte_group_bytes = 512;
2004 } else if (GPUVMEnable == true) {
2005 *vm_group_bytes = 2048;
2006 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2007 *dpte_group_bytes = 512;
2008 } else {
2009 *dpte_group_bytes = 2048;
2010 }
2011 } else {
2012 *vm_group_bytes = 0;
2013 *dpte_group_bytes = 0;
2014 }
2015 return PDEAndMetaPTEBytesFrame;
2016 }
2017
2018 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2019 {
2020 struct vba_vars_st *v = &mode_lib->vba;
2021 unsigned int j, k;
2022 double HostVMInefficiencyFactor = 1.0;
2023 bool NoChromaPlanes = true;
2024 int ReorderBytes;
2025 double VMDataOnlyReturnBW;
2026 double MaxTotalRDBandwidth = 0;
2027 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2028
2029 v->WritebackDISPCLK = 0.0;
2030 v->DISPCLKWithRamping = 0;
2031 v->DISPCLKWithoutRamping = 0;
2032 v->GlobalDPPCLK = 0.0;
2033 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2034 {
2035 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2036 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2037 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2039
2040 if (v->HostVMEnable != true) {
2041 v->ReturnBW = dml_min(
2042 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2043 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2044 } else {
2045 v->ReturnBW = dml_min(
2046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2048 }
2049 }
2050 /* End DAL custom code */
2051
2052 // DISPCLK and DPPCLK Calculation
2053 //
2054 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2055 if (v->WritebackEnable[k]) {
2056 v->WritebackDISPCLK = dml_max(
2057 v->WritebackDISPCLK,
2058 dml314_CalculateWriteBackDISPCLK(
2059 v->WritebackPixelFormat[k],
2060 v->PixelClock[k],
2061 v->WritebackHRatio[k],
2062 v->WritebackVRatio[k],
2063 v->WritebackHTaps[k],
2064 v->WritebackVTaps[k],
2065 v->WritebackSourceWidth[k],
2066 v->WritebackDestinationWidth[k],
2067 v->HTotal[k],
2068 v->WritebackLineBufferSize));
2069 }
2070 }
2071
2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2073 if (v->HRatio[k] > 1) {
2074 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2075 v->MaxDCHUBToPSCLThroughput,
2076 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2077 } else {
2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2079 }
2080
2081 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2082 * dml_max(
2083 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2084 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2085
2086 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2087 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2088 }
2089
2090 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2091 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2092 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2093 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2094 } else {
2095 if (v->HRatioChroma[k] > 1) {
2096 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2097 v->MaxDCHUBToPSCLThroughput,
2098 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2099 } else {
2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2101 }
2102 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2103 * dml_max3(
2104 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2105 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2106 1.0);
2107
2108 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2109 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2110 }
2111
2112 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2113 }
2114 }
2115
2116 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2117 if (v->BlendingAndTiming[k] != k)
2118 continue;
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2120 v->DISPCLKWithRamping = dml_max(
2121 v->DISPCLKWithRamping,
2122 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2123 * (1 + v->DISPCLKRampingMargin / 100));
2124 v->DISPCLKWithoutRamping = dml_max(
2125 v->DISPCLKWithoutRamping,
2126 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2127 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2128 v->DISPCLKWithRamping = dml_max(
2129 v->DISPCLKWithRamping,
2130 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2131 * (1 + v->DISPCLKRampingMargin / 100));
2132 v->DISPCLKWithoutRamping = dml_max(
2133 v->DISPCLKWithoutRamping,
2134 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2135 } else {
2136 v->DISPCLKWithRamping = dml_max(
2137 v->DISPCLKWithRamping,
2138 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2139 v->DISPCLKWithoutRamping = dml_max(
2140 v->DISPCLKWithoutRamping,
2141 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2142 }
2143 }
2144
2145 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2146 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2147
2148 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2149 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2150 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2151 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2152 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2153 v->DISPCLKDPPCLKVCOSpeed);
2154 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2155 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2156 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2157 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2158 } else {
2159 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2160 }
2161 v->DISPCLK = v->DISPCLK_calculated;
2162 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2163
2164 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2165 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2166 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2167 }
2168 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2169 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2170 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2171 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2172 }
2173
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2176 }
2177
2178 // Urgent and B P-State/DRAM Clock Change Watermark
2179 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2180 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2181
2182 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2183 CalculateBytePerPixelAnd256BBlockSizes(
2184 v->SourcePixelFormat[k],
2185 v->SurfaceTiling[k],
2186 &v->BytePerPixelY[k],
2187 &v->BytePerPixelC[k],
2188 &v->BytePerPixelDETY[k],
2189 &v->BytePerPixelDETC[k],
2190 &v->BlockHeight256BytesY[k],
2191 &v->BlockHeight256BytesC[k],
2192 &v->BlockWidth256BytesY[k],
2193 &v->BlockWidth256BytesC[k]);
2194 }
2195
2196 CalculateSwathWidth(
2197 false,
2198 v->NumberOfActivePlanes,
2199 v->SourcePixelFormat,
2200 v->SourceScan,
2201 v->ViewportWidth,
2202 v->ViewportHeight,
2203 v->SurfaceWidthY,
2204 v->SurfaceWidthC,
2205 v->SurfaceHeightY,
2206 v->SurfaceHeightC,
2207 v->ODMCombineEnabled,
2208 v->BytePerPixelY,
2209 v->BytePerPixelC,
2210 v->BlockHeight256BytesY,
2211 v->BlockHeight256BytesC,
2212 v->BlockWidth256BytesY,
2213 v->BlockWidth256BytesC,
2214 v->BlendingAndTiming,
2215 v->HActive,
2216 v->HRatio,
2217 v->DPPPerPlane,
2218 v->SwathWidthSingleDPPY,
2219 v->SwathWidthSingleDPPC,
2220 v->SwathWidthY,
2221 v->SwathWidthC,
2222 v->dummyinteger3,
2223 v->dummyinteger4,
2224 v->swath_width_luma_ub,
2225 v->swath_width_chroma_ub);
2226
2227 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2228 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2229 * v->VRatio[k];
2230 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2231 * v->VRatioChroma[k];
2232 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2233 }
2234
2235 // DCFCLK Deep Sleep
2236 CalculateDCFCLKDeepSleep(
2237 mode_lib,
2238 v->NumberOfActivePlanes,
2239 v->BytePerPixelY,
2240 v->BytePerPixelC,
2241 v->VRatio,
2242 v->VRatioChroma,
2243 v->SwathWidthY,
2244 v->SwathWidthC,
2245 v->DPPPerPlane,
2246 v->HRatio,
2247 v->HRatioChroma,
2248 v->PixelClock,
2249 v->PSCL_THROUGHPUT_LUMA,
2250 v->PSCL_THROUGHPUT_CHROMA,
2251 v->DPPCLK,
2252 v->ReadBandwidthPlaneLuma,
2253 v->ReadBandwidthPlaneChroma,
2254 v->ReturnBusWidth,
2255 &v->DCFCLKDeepSleep);
2256
2257 // DSCCLK
2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2259 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2260 v->DSCCLK_calculated[k] = 0.0;
2261 } else {
2262 if (v->OutputFormat[k] == dm_420)
2263 v->DSCFormatFactor = 2;
2264 else if (v->OutputFormat[k] == dm_444)
2265 v->DSCFormatFactor = 1;
2266 else if (v->OutputFormat[k] == dm_n422)
2267 v->DSCFormatFactor = 2;
2268 else
2269 v->DSCFormatFactor = 1;
2270 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2273 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2276 else
2277 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2278 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2279 }
2280 }
2281
2282 // DSC Delay
2283 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2284 double BPP = v->OutputBpp[k];
2285
2286 if (v->DSCEnabled[k] && BPP != 0) {
2287 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2288 v->DSCDelay[k] = dscceComputeDelay(
2289 v->DSCInputBitPerComponent[k],
2290 BPP,
2291 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2292 v->NumberOfDSCSlices[k],
2293 v->OutputFormat[k],
2294 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2295 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2296 v->DSCDelay[k] = 2
2297 * (dscceComputeDelay(
2298 v->DSCInputBitPerComponent[k],
2299 BPP,
2300 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2301 v->NumberOfDSCSlices[k] / 2.0,
2302 v->OutputFormat[k],
2303 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2304 } else {
2305 v->DSCDelay[k] = 4
2306 * (dscceComputeDelay(
2307 v->DSCInputBitPerComponent[k],
2308 BPP,
2309 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2310 v->NumberOfDSCSlices[k] / 4.0,
2311 v->OutputFormat[k],
2312 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2313 }
2314 v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelay[k] / v->HActive[k], 1);
2315 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2316 } else {
2317 v->DSCDelay[k] = 0;
2318 }
2319 }
2320
2321 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2322 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2323 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2324 v->DSCDelay[k] = v->DSCDelay[j];
2325
2326 // Prefetch
2327 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2328 unsigned int PDEAndMetaPTEBytesFrameY;
2329 unsigned int PixelPTEBytesPerRowY;
2330 unsigned int MetaRowByteY;
2331 unsigned int MetaRowByteC;
2332 unsigned int PDEAndMetaPTEBytesFrameC;
2333 unsigned int PixelPTEBytesPerRowC;
2334 bool PTEBufferSizeNotExceededY;
2335 bool PTEBufferSizeNotExceededC;
2336
2337 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2338 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2339 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2340 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2341 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2342 } else {
2343 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2344 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2345 }
2346
2347 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2348 mode_lib,
2349 v->DCCEnable[k],
2350 v->BlockHeight256BytesC[k],
2351 v->BlockWidth256BytesC[k],
2352 v->SourcePixelFormat[k],
2353 v->SurfaceTiling[k],
2354 v->BytePerPixelC[k],
2355 v->SourceScan[k],
2356 v->SwathWidthC[k],
2357 v->ViewportHeightChroma[k],
2358 v->GPUVMEnable,
2359 v->HostVMEnable,
2360 v->HostVMMaxNonCachedPageTableLevels,
2361 v->GPUVMMinPageSize,
2362 v->HostVMMinPageSize,
2363 v->PTEBufferSizeInRequestsForChroma,
2364 v->PitchC[k],
2365 v->DCCMetaPitchC[k],
2366 &v->MacroTileWidthC[k],
2367 &MetaRowByteC,
2368 &PixelPTEBytesPerRowC,
2369 &PTEBufferSizeNotExceededC,
2370 &v->dpte_row_width_chroma_ub[k],
2371 &v->dpte_row_height_chroma[k],
2372 &v->meta_req_width_chroma[k],
2373 &v->meta_req_height_chroma[k],
2374 &v->meta_row_width_chroma[k],
2375 &v->meta_row_height_chroma[k],
2376 &v->dummyinteger1,
2377 &v->dummyinteger2,
2378 &v->PixelPTEReqWidthC[k],
2379 &v->PixelPTEReqHeightC[k],
2380 &v->PTERequestSizeC[k],
2381 &v->dpde0_bytes_per_frame_ub_c[k],
2382 &v->meta_pte_bytes_per_frame_ub_c[k]);
2383
2384 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2385 mode_lib,
2386 v->VRatioChroma[k],
2387 v->VTAPsChroma[k],
2388 v->Interlace[k],
2389 v->ProgressiveToInterlaceUnitInOPP,
2390 v->SwathHeightC[k],
2391 v->ViewportYStartC[k],
2392 &v->VInitPreFillC[k],
2393 &v->MaxNumSwathC[k]);
2394 } else {
2395 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2396 v->PTEBufferSizeInRequestsForChroma = 0;
2397 PixelPTEBytesPerRowC = 0;
2398 PDEAndMetaPTEBytesFrameC = 0;
2399 MetaRowByteC = 0;
2400 v->MaxNumSwathC[k] = 0;
2401 v->PrefetchSourceLinesC[k] = 0;
2402 }
2403
2404 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2405 mode_lib,
2406 v->DCCEnable[k],
2407 v->BlockHeight256BytesY[k],
2408 v->BlockWidth256BytesY[k],
2409 v->SourcePixelFormat[k],
2410 v->SurfaceTiling[k],
2411 v->BytePerPixelY[k],
2412 v->SourceScan[k],
2413 v->SwathWidthY[k],
2414 v->ViewportHeight[k],
2415 v->GPUVMEnable,
2416 v->HostVMEnable,
2417 v->HostVMMaxNonCachedPageTableLevels,
2418 v->GPUVMMinPageSize,
2419 v->HostVMMinPageSize,
2420 v->PTEBufferSizeInRequestsForLuma,
2421 v->PitchY[k],
2422 v->DCCMetaPitchY[k],
2423 &v->MacroTileWidthY[k],
2424 &MetaRowByteY,
2425 &PixelPTEBytesPerRowY,
2426 &PTEBufferSizeNotExceededY,
2427 &v->dpte_row_width_luma_ub[k],
2428 &v->dpte_row_height[k],
2429 &v->meta_req_width[k],
2430 &v->meta_req_height[k],
2431 &v->meta_row_width[k],
2432 &v->meta_row_height[k],
2433 &v->vm_group_bytes[k],
2434 &v->dpte_group_bytes[k],
2435 &v->PixelPTEReqWidthY[k],
2436 &v->PixelPTEReqHeightY[k],
2437 &v->PTERequestSizeY[k],
2438 &v->dpde0_bytes_per_frame_ub_l[k],
2439 &v->meta_pte_bytes_per_frame_ub_l[k]);
2440
2441 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2442 mode_lib,
2443 v->VRatio[k],
2444 v->vtaps[k],
2445 v->Interlace[k],
2446 v->ProgressiveToInterlaceUnitInOPP,
2447 v->SwathHeightY[k],
2448 v->ViewportYStartY[k],
2449 &v->VInitPreFillY[k],
2450 &v->MaxNumSwathY[k]);
2451 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2452 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2453 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2454
2455 CalculateRowBandwidth(
2456 v->GPUVMEnable,
2457 v->SourcePixelFormat[k],
2458 v->VRatio[k],
2459 v->VRatioChroma[k],
2460 v->DCCEnable[k],
2461 v->HTotal[k] / v->PixelClock[k],
2462 MetaRowByteY,
2463 MetaRowByteC,
2464 v->meta_row_height[k],
2465 v->meta_row_height_chroma[k],
2466 PixelPTEBytesPerRowY,
2467 PixelPTEBytesPerRowC,
2468 v->dpte_row_height[k],
2469 v->dpte_row_height_chroma[k],
2470 &v->meta_row_bw[k],
2471 &v->dpte_row_bw[k]);
2472 }
2473
2474 v->TotalDCCActiveDPP = 0;
2475 v->TotalActiveDPP = 0;
2476 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2477 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2478 if (v->DCCEnable[k])
2479 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2480 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2481 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2482 NoChromaPlanes = false;
2483 }
2484
2485 ReorderBytes = v->NumberOfChannels
2486 * dml_max3(
2487 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2488 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2489 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2490
2491 VMDataOnlyReturnBW = dml_min(
2492 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2493 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2494 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2495 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2496
2497 #ifdef __DML_VBA_DEBUG__
2498 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2499 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2500 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2501 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2502 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2503 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2504 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2505 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2506 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2507 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2508 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2509 #endif
2510
2511 if (v->GPUVMEnable && v->HostVMEnable)
2512 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2513
2514 v->UrgentExtraLatency = CalculateExtraLatency(
2515 v->RoundTripPingLatencyCycles,
2516 ReorderBytes,
2517 v->DCFCLK,
2518 v->TotalActiveDPP,
2519 v->PixelChunkSizeInKByte,
2520 v->TotalDCCActiveDPP,
2521 v->MetaChunkSize,
2522 v->ReturnBW,
2523 v->GPUVMEnable,
2524 v->HostVMEnable,
2525 v->NumberOfActivePlanes,
2526 v->DPPPerPlane,
2527 v->dpte_group_bytes,
2528 HostVMInefficiencyFactor,
2529 v->HostVMMinPageSize,
2530 v->HostVMMaxNonCachedPageTableLevels);
2531
2532 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2533
2534 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2535 if (v->BlendingAndTiming[k] == k) {
2536 if (v->WritebackEnable[k] == true) {
2537 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2538 + CalculateWriteBackDelay(
2539 v->WritebackPixelFormat[k],
2540 v->WritebackHRatio[k],
2541 v->WritebackVRatio[k],
2542 v->WritebackVTaps[k],
2543 v->WritebackDestinationWidth[k],
2544 v->WritebackDestinationHeight[k],
2545 v->WritebackSourceHeight[k],
2546 v->HTotal[k]) / v->DISPCLK;
2547 } else
2548 v->WritebackDelay[v->VoltageLevel][k] = 0;
2549 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2550 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2551 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2552 v->WritebackDelay[v->VoltageLevel][k],
2553 v->WritebackLatency
2554 + CalculateWriteBackDelay(
2555 v->WritebackPixelFormat[j],
2556 v->WritebackHRatio[j],
2557 v->WritebackVRatio[j],
2558 v->WritebackVTaps[j],
2559 v->WritebackDestinationWidth[j],
2560 v->WritebackDestinationHeight[j],
2561 v->WritebackSourceHeight[j],
2562 v->HTotal[k]) / v->DISPCLK);
2563 }
2564 }
2565 }
2566 }
2567
2568 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2569 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2570 if (v->BlendingAndTiming[k] == j)
2571 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2572
2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2574 v->MaxVStartupLines[k] =
2575 CalculateMaxVStartup(
2576 v->VTotal[k],
2577 v->VActive[k],
2578 v->VBlankNom[k],
2579 v->HTotal[k],
2580 v->PixelClock[k],
2581 v->ProgressiveToInterlaceUnitInOPP,
2582 v->Interlace[k],
2583 v->ip.VBlankNomDefaultUS,
2584 v->WritebackDelay[v->VoltageLevel][k]);
2585
2586 #ifdef __DML_VBA_DEBUG__
2587 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2588 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2589 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2590 #endif
2591 }
2592
2593 v->MaximumMaxVStartupLines = 0;
2594 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2595 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2596
2597 // VBA_DELTA
2598 // We don't really care to iterate between the various prefetch modes
2599 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2600
2601 v->UrgentLatency = CalculateUrgentLatency(
2602 v->UrgentLatencyPixelDataOnly,
2603 v->UrgentLatencyPixelMixedWithVMData,
2604 v->UrgentLatencyVMDataOnly,
2605 v->DoUrgentLatencyAdjustment,
2606 v->UrgentLatencyAdjustmentFabricClockComponent,
2607 v->UrgentLatencyAdjustmentFabricClockReference,
2608 v->FabricClock);
2609
2610 v->FractionOfUrgentBandwidth = 0.0;
2611 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2612
2613 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2614
2615 do {
2616 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2617 bool DestinationLineTimesForPrefetchLessThan2 = false;
2618 bool VRatioPrefetchMoreThan4 = false;
2619 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2620
2621 MaxTotalRDBandwidth = 0;
2622
2623 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2624
2625 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2626 Pipe myPipe;
2627
2628 myPipe.DPPCLK = v->DPPCLK[k];
2629 myPipe.DISPCLK = v->DISPCLK;
2630 myPipe.PixelClock = v->PixelClock[k];
2631 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2632 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2633 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2634 myPipe.VRatio = v->VRatio[k];
2635 myPipe.VRatioChroma = v->VRatioChroma[k];
2636 myPipe.SourceScan = v->SourceScan[k];
2637 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2638 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2639 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2640 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2641 myPipe.InterlaceEnable = v->Interlace[k];
2642 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2643 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2644 myPipe.HTotal = v->HTotal[k];
2645 myPipe.DCCEnable = v->DCCEnable[k];
2646 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2647 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2648 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2649 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2650 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2651 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2652 v->ErrorResult[k] = CalculatePrefetchSchedule(
2653 mode_lib,
2654 HostVMInefficiencyFactor,
2655 &myPipe,
2656 v->DSCDelay[k],
2657 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2658 v->DPPCLKDelaySCL,
2659 v->DPPCLKDelaySCLLBOnly,
2660 v->DPPCLKDelayCNVCCursor,
2661 v->DISPCLKDelaySubtotal,
2662 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2663 v->OutputFormat[k],
2664 v->MaxInterDCNTileRepeaters,
2665 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2666 v->MaxVStartupLines[k],
2667 v->GPUVMMaxPageTableLevels,
2668 v->GPUVMEnable,
2669 v->HostVMEnable,
2670 v->HostVMMaxNonCachedPageTableLevels,
2671 v->HostVMMinPageSize,
2672 v->DynamicMetadataEnable[k],
2673 v->DynamicMetadataVMEnabled,
2674 v->DynamicMetadataLinesBeforeActiveRequired[k],
2675 v->DynamicMetadataTransmittedBytes[k],
2676 v->UrgentLatency,
2677 v->UrgentExtraLatency,
2678 v->TCalc,
2679 v->PDEAndMetaPTEBytesFrame[k],
2680 v->MetaRowByte[k],
2681 v->PixelPTEBytesPerRow[k],
2682 v->PrefetchSourceLinesY[k],
2683 v->SwathWidthY[k],
2684 v->VInitPreFillY[k],
2685 v->MaxNumSwathY[k],
2686 v->PrefetchSourceLinesC[k],
2687 v->SwathWidthC[k],
2688 v->VInitPreFillC[k],
2689 v->MaxNumSwathC[k],
2690 v->swath_width_luma_ub[k],
2691 v->swath_width_chroma_ub[k],
2692 v->SwathHeightY[k],
2693 v->SwathHeightC[k],
2694 TWait,
2695 &v->DSTXAfterScaler[k],
2696 &v->DSTYAfterScaler[k],
2697 &v->DestinationLinesForPrefetch[k],
2698 &v->PrefetchBandwidth[k],
2699 &v->DestinationLinesToRequestVMInVBlank[k],
2700 &v->DestinationLinesToRequestRowInVBlank[k],
2701 &v->VRatioPrefetchY[k],
2702 &v->VRatioPrefetchC[k],
2703 &v->RequiredPrefetchPixDataBWLuma[k],
2704 &v->RequiredPrefetchPixDataBWChroma[k],
2705 &v->NotEnoughTimeForDynamicMetadata[k],
2706 &v->Tno_bw[k],
2707 &v->prefetch_vmrow_bw[k],
2708 &v->Tdmdl_vm[k],
2709 &v->Tdmdl[k],
2710 &v->TSetup[k],
2711 &v->VUpdateOffsetPix[k],
2712 &v->VUpdateWidthPix[k],
2713 &v->VReadyOffsetPix[k]);
2714
2715 #ifdef __DML_VBA_DEBUG__
2716 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2717 #endif
2718 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2719 }
2720
2721 v->NoEnoughUrgentLatencyHiding = false;
2722 v->NoEnoughUrgentLatencyHidingPre = false;
2723
2724 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2725 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2726 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2727 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2728 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2729
2730 CalculateUrgentBurstFactor(
2731 v->swath_width_luma_ub[k],
2732 v->swath_width_chroma_ub[k],
2733 v->SwathHeightY[k],
2734 v->SwathHeightC[k],
2735 v->HTotal[k] / v->PixelClock[k],
2736 v->UrgentLatency,
2737 v->CursorBufferSize,
2738 v->CursorWidth[k][0],
2739 v->CursorBPP[k][0],
2740 v->VRatio[k],
2741 v->VRatioChroma[k],
2742 v->BytePerPixelDETY[k],
2743 v->BytePerPixelDETC[k],
2744 v->DETBufferSizeY[k],
2745 v->DETBufferSizeC[k],
2746 &v->UrgBurstFactorCursor[k],
2747 &v->UrgBurstFactorLuma[k],
2748 &v->UrgBurstFactorChroma[k],
2749 &v->NoUrgentLatencyHiding[k]);
2750
2751 CalculateUrgentBurstFactor(
2752 v->swath_width_luma_ub[k],
2753 v->swath_width_chroma_ub[k],
2754 v->SwathHeightY[k],
2755 v->SwathHeightC[k],
2756 v->HTotal[k] / v->PixelClock[k],
2757 v->UrgentLatency,
2758 v->CursorBufferSize,
2759 v->CursorWidth[k][0],
2760 v->CursorBPP[k][0],
2761 v->VRatioPrefetchY[k],
2762 v->VRatioPrefetchC[k],
2763 v->BytePerPixelDETY[k],
2764 v->BytePerPixelDETC[k],
2765 v->DETBufferSizeY[k],
2766 v->DETBufferSizeC[k],
2767 &v->UrgBurstFactorCursorPre[k],
2768 &v->UrgBurstFactorLumaPre[k],
2769 &v->UrgBurstFactorChromaPre[k],
2770 &v->NoUrgentLatencyHidingPre[k]);
2771
2772 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2773 + dml_max3(
2774 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2775 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2776 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2777 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2778 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2779 v->DPPPerPlane[k]
2780 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2781 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2782 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2783
2784 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2785 + dml_max3(
2786 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2787 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2788 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2789 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2790 + v->cursor_bw_pre[k]);
2791
2792 #ifdef __DML_VBA_DEBUG__
2793 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2794 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2795 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2796 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2797 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2798
2799 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2800 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2801
2802 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2803 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2804 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2805 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2806 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2807 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2808 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2809 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2810 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2811 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2812 #endif
2813
2814 if (v->DestinationLinesForPrefetch[k] < 2)
2815 DestinationLineTimesForPrefetchLessThan2 = true;
2816
2817 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2818 VRatioPrefetchMoreThan4 = true;
2819
2820 if (v->NoUrgentLatencyHiding[k] == true)
2821 v->NoEnoughUrgentLatencyHiding = true;
2822
2823 if (v->NoUrgentLatencyHidingPre[k] == true)
2824 v->NoEnoughUrgentLatencyHidingPre = true;
2825 }
2826
2827 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2828
2829 #ifdef __DML_VBA_DEBUG__
2830 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2831 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2832 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2833 #endif
2834
2835 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2836 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2837 v->PrefetchModeSupported = true;
2838 else {
2839 v->PrefetchModeSupported = false;
2840 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2841 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2842 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2843 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2844 }
2845
2846 // PREVIOUS_ERROR
2847 // This error result check was done after the PrefetchModeSupported. So we will
2848 // still try to calculate flip schedule even prefetch mode not supported
2849 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2850 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2851 v->PrefetchModeSupported = false;
2852 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2853 }
2854 }
2855
2856 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2857 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2859 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2860 - dml_max(
2861 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2862 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2863 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2864 v->DPPPerPlane[k]
2865 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2866 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2867 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2868 }
2869
2870 v->TotImmediateFlipBytes = 0;
2871 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2872 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2873 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2874 }
2875 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2876 CalculateFlipSchedule(
2877 mode_lib,
2878 k,
2879 HostVMInefficiencyFactor,
2880 v->UrgentExtraLatency,
2881 v->UrgentLatency,
2882 v->PDEAndMetaPTEBytesFrame[k],
2883 v->MetaRowByte[k],
2884 v->PixelPTEBytesPerRow[k]);
2885 }
2886
2887 v->total_dcn_read_bw_with_flip = 0.0;
2888 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2889 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2890 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2891 + dml_max3(
2892 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2893 v->DPPPerPlane[k] * v->final_flip_bw[k]
2894 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2895 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2896 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2897 v->DPPPerPlane[k]
2898 * (v->final_flip_bw[k]
2899 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2900 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2901 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2902 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2903 + dml_max3(
2904 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2905 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2906 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2907 v->DPPPerPlane[k]
2908 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2909 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2910 }
2911 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2912
2913 v->ImmediateFlipSupported = true;
2914 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2915 #ifdef __DML_VBA_DEBUG__
2916 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2917 #endif
2918 v->ImmediateFlipSupported = false;
2919 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2920 }
2921 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2922 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2923 #ifdef __DML_VBA_DEBUG__
2924 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2925 #endif
2926 v->ImmediateFlipSupported = false;
2927 }
2928 }
2929 } else {
2930 v->ImmediateFlipSupported = false;
2931 }
2932
2933 v->PrefetchAndImmediateFlipSupported =
2934 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2935 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2936 v->ImmediateFlipSupported)) ? true : false;
2937 #ifdef __DML_VBA_DEBUG__
2938 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2939 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2940 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2941 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2942 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2943 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2944 #endif
2945 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2946
2947 v->VStartupLines = v->VStartupLines + 1;
2948 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2949 ASSERT(v->PrefetchAndImmediateFlipSupported);
2950
2951 // Unbounded Request Enabled
2952 CalculateUnboundedRequestAndCompressedBufferSize(
2953 v->DETBufferSizeInKByte[0],
2954 v->ConfigReturnBufferSizeInKByte,
2955 v->UseUnboundedRequesting,
2956 v->TotalActiveDPP,
2957 NoChromaPlanes,
2958 v->MaxNumDPP,
2959 v->CompressedBufferSegmentSizeInkByte,
2960 v->Output,
2961 &v->UnboundedRequestEnabled,
2962 &v->CompressedBufferSizeInkByte);
2963
2964 //Watermarks and NB P-State/DRAM Clock Change Support
2965 {
2966 enum clock_change_support DRAMClockChangeSupport; // dummy
2967
2968 CalculateWatermarksAndDRAMSpeedChangeSupport(
2969 mode_lib,
2970 PrefetchMode,
2971 v->DCFCLK,
2972 v->ReturnBW,
2973 v->UrgentLatency,
2974 v->UrgentExtraLatency,
2975 v->SOCCLK,
2976 v->DCFCLKDeepSleep,
2977 v->DETBufferSizeY,
2978 v->DETBufferSizeC,
2979 v->SwathHeightY,
2980 v->SwathHeightC,
2981 v->SwathWidthY,
2982 v->SwathWidthC,
2983 v->DPPPerPlane,
2984 v->BytePerPixelDETY,
2985 v->BytePerPixelDETC,
2986 v->UnboundedRequestEnabled,
2987 v->CompressedBufferSizeInkByte,
2988 &DRAMClockChangeSupport,
2989 &v->StutterExitWatermark,
2990 &v->StutterEnterPlusExitWatermark,
2991 &v->Z8StutterExitWatermark,
2992 &v->Z8StutterEnterPlusExitWatermark);
2993
2994 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2995 if (v->WritebackEnable[k] == true) {
2996 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2997 0,
2998 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2999 } else {
3000 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3001 }
3002 }
3003 }
3004
3005 //Display Pipeline Delivery Time in Prefetch, Groups
3006 CalculatePixelDeliveryTimes(
3007 v->NumberOfActivePlanes,
3008 v->VRatio,
3009 v->VRatioChroma,
3010 v->VRatioPrefetchY,
3011 v->VRatioPrefetchC,
3012 v->swath_width_luma_ub,
3013 v->swath_width_chroma_ub,
3014 v->DPPPerPlane,
3015 v->HRatio,
3016 v->HRatioChroma,
3017 v->PixelClock,
3018 v->PSCL_THROUGHPUT_LUMA,
3019 v->PSCL_THROUGHPUT_CHROMA,
3020 v->DPPCLK,
3021 v->BytePerPixelC,
3022 v->SourceScan,
3023 v->NumberOfCursors,
3024 v->CursorWidth,
3025 v->CursorBPP,
3026 v->BlockWidth256BytesY,
3027 v->BlockHeight256BytesY,
3028 v->BlockWidth256BytesC,
3029 v->BlockHeight256BytesC,
3030 v->DisplayPipeLineDeliveryTimeLuma,
3031 v->DisplayPipeLineDeliveryTimeChroma,
3032 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3033 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3034 v->DisplayPipeRequestDeliveryTimeLuma,
3035 v->DisplayPipeRequestDeliveryTimeChroma,
3036 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3037 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3038 v->CursorRequestDeliveryTime,
3039 v->CursorRequestDeliveryTimePrefetch);
3040
3041 CalculateMetaAndPTETimes(
3042 v->NumberOfActivePlanes,
3043 v->GPUVMEnable,
3044 v->MetaChunkSize,
3045 v->MinMetaChunkSizeBytes,
3046 v->HTotal,
3047 v->VRatio,
3048 v->VRatioChroma,
3049 v->DestinationLinesToRequestRowInVBlank,
3050 v->DestinationLinesToRequestRowInImmediateFlip,
3051 v->DCCEnable,
3052 v->PixelClock,
3053 v->BytePerPixelY,
3054 v->BytePerPixelC,
3055 v->SourceScan,
3056 v->dpte_row_height,
3057 v->dpte_row_height_chroma,
3058 v->meta_row_width,
3059 v->meta_row_width_chroma,
3060 v->meta_row_height,
3061 v->meta_row_height_chroma,
3062 v->meta_req_width,
3063 v->meta_req_width_chroma,
3064 v->meta_req_height,
3065 v->meta_req_height_chroma,
3066 v->dpte_group_bytes,
3067 v->PTERequestSizeY,
3068 v->PTERequestSizeC,
3069 v->PixelPTEReqWidthY,
3070 v->PixelPTEReqHeightY,
3071 v->PixelPTEReqWidthC,
3072 v->PixelPTEReqHeightC,
3073 v->dpte_row_width_luma_ub,
3074 v->dpte_row_width_chroma_ub,
3075 v->DST_Y_PER_PTE_ROW_NOM_L,
3076 v->DST_Y_PER_PTE_ROW_NOM_C,
3077 v->DST_Y_PER_META_ROW_NOM_L,
3078 v->DST_Y_PER_META_ROW_NOM_C,
3079 v->TimePerMetaChunkNominal,
3080 v->TimePerChromaMetaChunkNominal,
3081 v->TimePerMetaChunkVBlank,
3082 v->TimePerChromaMetaChunkVBlank,
3083 v->TimePerMetaChunkFlip,
3084 v->TimePerChromaMetaChunkFlip,
3085 v->time_per_pte_group_nom_luma,
3086 v->time_per_pte_group_vblank_luma,
3087 v->time_per_pte_group_flip_luma,
3088 v->time_per_pte_group_nom_chroma,
3089 v->time_per_pte_group_vblank_chroma,
3090 v->time_per_pte_group_flip_chroma);
3091
3092 CalculateVMGroupAndRequestTimes(
3093 v->NumberOfActivePlanes,
3094 v->GPUVMEnable,
3095 v->GPUVMMaxPageTableLevels,
3096 v->HTotal,
3097 v->BytePerPixelC,
3098 v->DestinationLinesToRequestVMInVBlank,
3099 v->DestinationLinesToRequestVMInImmediateFlip,
3100 v->DCCEnable,
3101 v->PixelClock,
3102 v->dpte_row_width_luma_ub,
3103 v->dpte_row_width_chroma_ub,
3104 v->vm_group_bytes,
3105 v->dpde0_bytes_per_frame_ub_l,
3106 v->dpde0_bytes_per_frame_ub_c,
3107 v->meta_pte_bytes_per_frame_ub_l,
3108 v->meta_pte_bytes_per_frame_ub_c,
3109 v->TimePerVMGroupVBlank,
3110 v->TimePerVMGroupFlip,
3111 v->TimePerVMRequestVBlank,
3112 v->TimePerVMRequestFlip);
3113
3114 // Min TTUVBlank
3115 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3116 if (PrefetchMode == 0) {
3117 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3118 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3119 v->MinTTUVBlank[k] = dml_max(
3120 v->DRAMClockChangeWatermark,
3121 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3122 } else if (PrefetchMode == 1) {
3123 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3124 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3125 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3126 } else {
3127 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3128 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3129 v->MinTTUVBlank[k] = v->UrgentWatermark;
3130 }
3131 if (!v->DynamicMetadataEnable[k])
3132 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3133 }
3134
3135 // DCC Configuration
3136 v->ActiveDPPs = 0;
3137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3138 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3139 v->SourcePixelFormat[k],
3140 v->SurfaceWidthY[k],
3141 v->SurfaceWidthC[k],
3142 v->SurfaceHeightY[k],
3143 v->SurfaceHeightC[k],
3144 v->DETBufferSizeInKByte[0] * 1024,
3145 v->BlockHeight256BytesY[k],
3146 v->BlockHeight256BytesC[k],
3147 v->SurfaceTiling[k],
3148 v->BytePerPixelY[k],
3149 v->BytePerPixelC[k],
3150 v->BytePerPixelDETY[k],
3151 v->BytePerPixelDETC[k],
3152 v->SourceScan[k],
3153 &v->DCCYMaxUncompressedBlock[k],
3154 &v->DCCCMaxUncompressedBlock[k],
3155 &v->DCCYMaxCompressedBlock[k],
3156 &v->DCCCMaxCompressedBlock[k],
3157 &v->DCCYIndependentBlock[k],
3158 &v->DCCCIndependentBlock[k]);
3159 }
3160
3161 // VStartup Adjustment
3162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3163 bool isInterlaceTiming;
3164 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3165 #ifdef __DML_VBA_DEBUG__
3166 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3167 #endif
3168
3169 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3170
3171 #ifdef __DML_VBA_DEBUG__
3172 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3173 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3174 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3175 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3176 #endif
3177
3178 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3179 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3180 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3181 }
3182
3183 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3184 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3185 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3186 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3187 } else {
3188 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3189 }
3190 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3191 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3192 <= (isInterlaceTiming ?
3193 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3194 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3195 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3196 } else {
3197 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3198 }
3199 #ifdef __DML_VBA_DEBUG__
3200 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3201 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3202 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3203 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3204 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3205 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3206 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3207 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3208 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3209 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3210 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3211 #endif
3212 }
3213
3214 {
3215 //Maximum Bandwidth Used
3216 double TotalWRBandwidth = 0;
3217 double MaxPerPlaneVActiveWRBandwidth = 0;
3218 double WRBandwidth = 0;
3219
3220 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3221 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3222 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3223 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3224 } else if (v->WritebackEnable[k] == true) {
3225 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3226 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3227 }
3228 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3229 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3230 }
3231
3232 v->TotalDataReadBandwidth = 0;
3233 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3234 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3235 }
3236 }
3237 // Stutter Efficiency
3238 CalculateStutterEfficiency(
3239 mode_lib,
3240 v->CompressedBufferSizeInkByte,
3241 v->UnboundedRequestEnabled,
3242 v->ConfigReturnBufferSizeInKByte,
3243 v->MetaFIFOSizeInKEntries,
3244 v->ZeroSizeBufferEntries,
3245 v->NumberOfActivePlanes,
3246 v->ROBBufferSizeInKByte,
3247 v->TotalDataReadBandwidth,
3248 v->DCFCLK,
3249 v->ReturnBW,
3250 v->COMPBUF_RESERVED_SPACE_64B,
3251 v->COMPBUF_RESERVED_SPACE_ZS,
3252 v->SRExitTime,
3253 v->SRExitZ8Time,
3254 v->SynchronizedVBlank,
3255 v->StutterEnterPlusExitWatermark,
3256 v->Z8StutterEnterPlusExitWatermark,
3257 v->ProgressiveToInterlaceUnitInOPP,
3258 v->Interlace,
3259 v->MinTTUVBlank,
3260 v->DPPPerPlane,
3261 v->DETBufferSizeY,
3262 v->BytePerPixelY,
3263 v->BytePerPixelDETY,
3264 v->SwathWidthY,
3265 v->SwathHeightY,
3266 v->SwathHeightC,
3267 v->DCCRateLuma,
3268 v->DCCRateChroma,
3269 v->DCCFractionOfZeroSizeRequestsLuma,
3270 v->DCCFractionOfZeroSizeRequestsChroma,
3271 v->HTotal,
3272 v->VTotal,
3273 v->PixelClock,
3274 v->VRatio,
3275 v->SourceScan,
3276 v->BlockHeight256BytesY,
3277 v->BlockWidth256BytesY,
3278 v->BlockHeight256BytesC,
3279 v->BlockWidth256BytesC,
3280 v->DCCYMaxUncompressedBlock,
3281 v->DCCCMaxUncompressedBlock,
3282 v->VActive,
3283 v->DCCEnable,
3284 v->WritebackEnable,
3285 v->ReadBandwidthPlaneLuma,
3286 v->ReadBandwidthPlaneChroma,
3287 v->meta_row_bw,
3288 v->dpte_row_bw,
3289 &v->StutterEfficiencyNotIncludingVBlank,
3290 &v->StutterEfficiency,
3291 &v->NumberOfStutterBurstsPerFrame,
3292 &v->Z8StutterEfficiencyNotIncludingVBlank,
3293 &v->Z8StutterEfficiency,
3294 &v->Z8NumberOfStutterBurstsPerFrame,
3295 &v->StutterPeriod);
3296 }
3297
3298 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3299 {
3300 struct vba_vars_st *v = &mode_lib->vba;
3301 // Display Pipe Configuration
3302 double BytePerPixDETY[DC__NUM_DPP__MAX];
3303 double BytePerPixDETC[DC__NUM_DPP__MAX];
3304 int BytePerPixY[DC__NUM_DPP__MAX];
3305 int BytePerPixC[DC__NUM_DPP__MAX];
3306 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3307 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3308 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3309 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3310 double dummy1[DC__NUM_DPP__MAX];
3311 double dummy2[DC__NUM_DPP__MAX];
3312 double dummy3[DC__NUM_DPP__MAX];
3313 double dummy4[DC__NUM_DPP__MAX];
3314 int dummy5[DC__NUM_DPP__MAX];
3315 int dummy6[DC__NUM_DPP__MAX];
3316 bool dummy7[DC__NUM_DPP__MAX];
3317 bool dummysinglestring;
3318
3319 unsigned int k;
3320
3321 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3322
3323 CalculateBytePerPixelAnd256BBlockSizes(
3324 v->SourcePixelFormat[k],
3325 v->SurfaceTiling[k],
3326 &BytePerPixY[k],
3327 &BytePerPixC[k],
3328 &BytePerPixDETY[k],
3329 &BytePerPixDETC[k],
3330 &Read256BytesBlockHeightY[k],
3331 &Read256BytesBlockHeightC[k],
3332 &Read256BytesBlockWidthY[k],
3333 &Read256BytesBlockWidthC[k]);
3334 }
3335
3336 CalculateSwathAndDETConfiguration(
3337 false,
3338 v->NumberOfActivePlanes,
3339 v->DETBufferSizeInKByte[0],
3340 dummy1,
3341 dummy2,
3342 v->SourceScan,
3343 v->SourcePixelFormat,
3344 v->SurfaceTiling,
3345 v->ViewportWidth,
3346 v->ViewportHeight,
3347 v->SurfaceWidthY,
3348 v->SurfaceWidthC,
3349 v->SurfaceHeightY,
3350 v->SurfaceHeightC,
3351 Read256BytesBlockHeightY,
3352 Read256BytesBlockHeightC,
3353 Read256BytesBlockWidthY,
3354 Read256BytesBlockWidthC,
3355 v->ODMCombineEnabled,
3356 v->BlendingAndTiming,
3357 BytePerPixY,
3358 BytePerPixC,
3359 BytePerPixDETY,
3360 BytePerPixDETC,
3361 v->HActive,
3362 v->HRatio,
3363 v->HRatioChroma,
3364 v->DPPPerPlane,
3365 dummy5,
3366 dummy6,
3367 dummy3,
3368 dummy4,
3369 v->SwathHeightY,
3370 v->SwathHeightC,
3371 v->DETBufferSizeY,
3372 v->DETBufferSizeC,
3373 dummy7,
3374 &dummysinglestring);
3375 }
3376
3377 static bool CalculateBytePerPixelAnd256BBlockSizes(
3378 enum source_format_class SourcePixelFormat,
3379 enum dm_swizzle_mode SurfaceTiling,
3380 unsigned int *BytePerPixelY,
3381 unsigned int *BytePerPixelC,
3382 double *BytePerPixelDETY,
3383 double *BytePerPixelDETC,
3384 unsigned int *BlockHeight256BytesY,
3385 unsigned int *BlockHeight256BytesC,
3386 unsigned int *BlockWidth256BytesY,
3387 unsigned int *BlockWidth256BytesC)
3388 {
3389 if (SourcePixelFormat == dm_444_64) {
3390 *BytePerPixelDETY = 8;
3391 *BytePerPixelDETC = 0;
3392 *BytePerPixelY = 8;
3393 *BytePerPixelC = 0;
3394 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3395 *BytePerPixelDETY = 4;
3396 *BytePerPixelDETC = 0;
3397 *BytePerPixelY = 4;
3398 *BytePerPixelC = 0;
3399 } else if (SourcePixelFormat == dm_444_16) {
3400 *BytePerPixelDETY = 2;
3401 *BytePerPixelDETC = 0;
3402 *BytePerPixelY = 2;
3403 *BytePerPixelC = 0;
3404 } else if (SourcePixelFormat == dm_444_8) {
3405 *BytePerPixelDETY = 1;
3406 *BytePerPixelDETC = 0;
3407 *BytePerPixelY = 1;
3408 *BytePerPixelC = 0;
3409 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3410 *BytePerPixelDETY = 4;
3411 *BytePerPixelDETC = 1;
3412 *BytePerPixelY = 4;
3413 *BytePerPixelC = 1;
3414 } else if (SourcePixelFormat == dm_420_8) {
3415 *BytePerPixelDETY = 1;
3416 *BytePerPixelDETC = 2;
3417 *BytePerPixelY = 1;
3418 *BytePerPixelC = 2;
3419 } else if (SourcePixelFormat == dm_420_12) {
3420 *BytePerPixelDETY = 2;
3421 *BytePerPixelDETC = 4;
3422 *BytePerPixelY = 2;
3423 *BytePerPixelC = 4;
3424 } else {
3425 *BytePerPixelDETY = 4.0 / 3;
3426 *BytePerPixelDETC = 8.0 / 3;
3427 *BytePerPixelY = 2;
3428 *BytePerPixelC = 4;
3429 }
3430
3431 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3432 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3433 if (SurfaceTiling == dm_sw_linear) {
3434 *BlockHeight256BytesY = 1;
3435 } else if (SourcePixelFormat == dm_444_64) {
3436 *BlockHeight256BytesY = 4;
3437 } else if (SourcePixelFormat == dm_444_8) {
3438 *BlockHeight256BytesY = 16;
3439 } else {
3440 *BlockHeight256BytesY = 8;
3441 }
3442 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3443 *BlockHeight256BytesC = 0;
3444 *BlockWidth256BytesC = 0;
3445 } else {
3446 if (SurfaceTiling == dm_sw_linear) {
3447 *BlockHeight256BytesY = 1;
3448 *BlockHeight256BytesC = 1;
3449 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3450 *BlockHeight256BytesY = 8;
3451 *BlockHeight256BytesC = 16;
3452 } else if (SourcePixelFormat == dm_420_8) {
3453 *BlockHeight256BytesY = 16;
3454 *BlockHeight256BytesC = 8;
3455 } else {
3456 *BlockHeight256BytesY = 8;
3457 *BlockHeight256BytesC = 8;
3458 }
3459 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3460 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3461 }
3462 return true;
3463 }
3464
3465 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3466 {
3467 if (PrefetchMode == 0) {
3468 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3469 } else if (PrefetchMode == 1) {
3470 return dml_max(SREnterPlusExitTime, UrgentLatency);
3471 } else {
3472 return UrgentLatency;
3473 }
3474 }
3475
3476 double dml314_CalculateWriteBackDISPCLK(
3477 enum source_format_class WritebackPixelFormat,
3478 double PixelClock,
3479 double WritebackHRatio,
3480 double WritebackVRatio,
3481 unsigned int WritebackHTaps,
3482 unsigned int WritebackVTaps,
3483 long WritebackSourceWidth,
3484 long WritebackDestinationWidth,
3485 unsigned int HTotal,
3486 unsigned int WritebackLineBufferSize)
3487 {
3488 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3489
3490 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3491 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3492 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3493 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3494 }
3495
3496 static double CalculateWriteBackDelay(
3497 enum source_format_class WritebackPixelFormat,
3498 double WritebackHRatio,
3499 double WritebackVRatio,
3500 unsigned int WritebackVTaps,
3501 int WritebackDestinationWidth,
3502 int WritebackDestinationHeight,
3503 int WritebackSourceHeight,
3504 unsigned int HTotal)
3505 {
3506 double CalculateWriteBackDelay;
3507 double Line_length;
3508 double Output_lines_last_notclamped;
3509 double WritebackVInit;
3510
3511 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3512 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3513 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3514 if (Output_lines_last_notclamped < 0) {
3515 CalculateWriteBackDelay = 0;
3516 } else {
3517 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3518 }
3519 return CalculateWriteBackDelay;
3520 }
3521
3522 static void CalculateVupdateAndDynamicMetadataParameters(
3523 int MaxInterDCNTileRepeaters,
3524 double DPPCLK,
3525 double DISPCLK,
3526 double DCFClkDeepSleep,
3527 double PixelClock,
3528 int HTotal,
3529 int VBlank,
3530 int DynamicMetadataTransmittedBytes,
3531 int DynamicMetadataLinesBeforeActiveRequired,
3532 int InterlaceEnable,
3533 bool ProgressiveToInterlaceUnitInOPP,
3534 double *TSetup,
3535 double *Tdmbf,
3536 double *Tdmec,
3537 double *Tdmsks,
3538 int *VUpdateOffsetPix,
3539 double *VUpdateWidthPix,
3540 double *VReadyOffsetPix)
3541 {
3542 double TotalRepeaterDelayTime;
3543
3544 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3545 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3546 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3547 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3548 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3549 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3550 *Tdmec = HTotal / PixelClock;
3551 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3552 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3553 } else {
3554 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3555 }
3556 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3557 *Tdmsks = *Tdmsks / 2;
3558 }
3559 #ifdef __DML_VBA_DEBUG__
3560 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3561 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3562 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3563 #endif
3564 }
3565
3566 static void CalculateRowBandwidth(
3567 bool GPUVMEnable,
3568 enum source_format_class SourcePixelFormat,
3569 double VRatio,
3570 double VRatioChroma,
3571 bool DCCEnable,
3572 double LineTime,
3573 unsigned int MetaRowByteLuma,
3574 unsigned int MetaRowByteChroma,
3575 unsigned int meta_row_height_luma,
3576 unsigned int meta_row_height_chroma,
3577 unsigned int PixelPTEBytesPerRowLuma,
3578 unsigned int PixelPTEBytesPerRowChroma,
3579 unsigned int dpte_row_height_luma,
3580 unsigned int dpte_row_height_chroma,
3581 double *meta_row_bw,
3582 double *dpte_row_bw)
3583 {
3584 if (DCCEnable != true) {
3585 *meta_row_bw = 0;
3586 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3587 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3588 } else {
3589 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3590 }
3591
3592 if (GPUVMEnable != true) {
3593 *dpte_row_bw = 0;
3594 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3595 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3596 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3597 } else {
3598 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3599 }
3600 }
3601
3602 static void CalculateFlipSchedule(
3603 struct display_mode_lib *mode_lib,
3604 unsigned int k,
3605 double HostVMInefficiencyFactor,
3606 double UrgentExtraLatency,
3607 double UrgentLatency,
3608 double PDEAndMetaPTEBytesPerFrame,
3609 double MetaRowBytes,
3610 double DPTEBytesPerRow)
3611 {
3612 struct vba_vars_st *v = &mode_lib->vba;
3613 double min_row_time = 0.0;
3614 unsigned int HostVMDynamicLevelsTrips;
3615 double TimeForFetchingMetaPTEImmediateFlip;
3616 double TimeForFetchingRowInVBlankImmediateFlip;
3617 double ImmediateFlipBW = 1.0;
3618 double LineTime = v->HTotal[k] / v->PixelClock[k];
3619
3620 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3621 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3622 } else {
3623 HostVMDynamicLevelsTrips = 0;
3624 }
3625
3626 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3627 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3628 }
3629
3630 if (v->GPUVMEnable == true) {
3631 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3632 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3633 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3634 LineTime / 4.0);
3635 } else {
3636 TimeForFetchingMetaPTEImmediateFlip = 0;
3637 }
3638
3639 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3640 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3641 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3642 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3643 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3644 LineTime / 4);
3645 } else {
3646 TimeForFetchingRowInVBlankImmediateFlip = 0;
3647 }
3648
3649 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3650
3651 if (v->GPUVMEnable == true) {
3652 v->final_flip_bw[k] = dml_max(
3653 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3654 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3655 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3656 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3657 } else {
3658 v->final_flip_bw[k] = 0;
3659 }
3660
3661 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3662 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3663 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3664 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3665 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3666 } else {
3667 min_row_time = dml_min4(
3668 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3669 v->meta_row_height[k] * LineTime / v->VRatio[k],
3670 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3671 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3672 }
3673 } else {
3674 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3675 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3676 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3677 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3678 } else {
3679 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3680 }
3681 }
3682
3683 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3684 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3685 v->ImmediateFlipSupportedForPipe[k] = false;
3686 } else {
3687 v->ImmediateFlipSupportedForPipe[k] = true;
3688 }
3689
3690 #ifdef __DML_VBA_DEBUG__
3691 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3692 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3693 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3694 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3695 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3696 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3697 #endif
3698
3699 }
3700
3701 static double TruncToValidBPP(
3702 double LinkBitRate,
3703 int Lanes,
3704 int HTotal,
3705 int HActive,
3706 double PixelClock,
3707 double DesiredBPP,
3708 bool DSCEnable,
3709 enum output_encoder_class Output,
3710 enum output_format_class Format,
3711 unsigned int DSCInputBitPerComponent,
3712 int DSCSlices,
3713 int AudioRate,
3714 int AudioLayout,
3715 enum odm_combine_mode ODMCombine)
3716 {
3717 double MaxLinkBPP;
3718 int MinDSCBPP;
3719 double MaxDSCBPP;
3720 int NonDSCBPP0;
3721 int NonDSCBPP1;
3722 int NonDSCBPP2;
3723
3724 if (Format == dm_420) {
3725 NonDSCBPP0 = 12;
3726 NonDSCBPP1 = 15;
3727 NonDSCBPP2 = 18;
3728 MinDSCBPP = 6;
3729 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3730 } else if (Format == dm_444) {
3731 NonDSCBPP0 = 24;
3732 NonDSCBPP1 = 30;
3733 NonDSCBPP2 = 36;
3734 MinDSCBPP = 8;
3735 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3736 } else {
3737
3738 NonDSCBPP0 = 16;
3739 NonDSCBPP1 = 20;
3740 NonDSCBPP2 = 24;
3741
3742 if (Format == dm_n422) {
3743 MinDSCBPP = 7;
3744 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3745 } else {
3746 MinDSCBPP = 8;
3747 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3748 }
3749 }
3750
3751 if (DSCEnable && Output == dm_dp) {
3752 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3753 } else {
3754 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3755 }
3756
3757 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3758 MaxLinkBPP = 16;
3759 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3760 MaxLinkBPP = 32;
3761 }
3762
3763 if (DesiredBPP == 0) {
3764 if (DSCEnable) {
3765 if (MaxLinkBPP < MinDSCBPP) {
3766 return BPP_INVALID;
3767 } else if (MaxLinkBPP >= MaxDSCBPP) {
3768 return MaxDSCBPP;
3769 } else {
3770 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3771 }
3772 } else {
3773 if (MaxLinkBPP >= NonDSCBPP2) {
3774 return NonDSCBPP2;
3775 } else if (MaxLinkBPP >= NonDSCBPP1) {
3776 return NonDSCBPP1;
3777 } else if (MaxLinkBPP >= NonDSCBPP0) {
3778 return 16.0;
3779 } else {
3780 return BPP_INVALID;
3781 }
3782 }
3783 } else {
3784 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3785 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3786 return BPP_INVALID;
3787 } else {
3788 return DesiredBPP;
3789 }
3790 }
3791 return BPP_INVALID;
3792 }
3793
3794 static noinline void CalculatePrefetchSchedulePerPlane(
3795 struct display_mode_lib *mode_lib,
3796 double HostVMInefficiencyFactor,
3797 int i,
3798 unsigned int j,
3799 unsigned int k)
3800 {
3801 struct vba_vars_st *v = &mode_lib->vba;
3802 Pipe myPipe;
3803
3804 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3805 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3806 myPipe.PixelClock = v->PixelClock[k];
3807 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3808 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3809 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3810 myPipe.VRatio = mode_lib->vba.VRatio[k];
3811 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3812
3813 myPipe.SourceScan = v->SourceScan[k];
3814 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3815 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3816 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3817 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3818 myPipe.InterlaceEnable = v->Interlace[k];
3819 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3820 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3821 myPipe.HTotal = v->HTotal[k];
3822 myPipe.DCCEnable = v->DCCEnable[k];
3823 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3824 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3825 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3826 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3827 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3828 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3829 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3830 mode_lib,
3831 HostVMInefficiencyFactor,
3832 &myPipe,
3833 v->DSCDelayPerState[i][k],
3834 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3835 v->DPPCLKDelaySCL,
3836 v->DPPCLKDelaySCLLBOnly,
3837 v->DPPCLKDelayCNVCCursor,
3838 v->DISPCLKDelaySubtotal,
3839 v->SwathWidthYThisState[k] / v->HRatio[k],
3840 v->OutputFormat[k],
3841 v->MaxInterDCNTileRepeaters,
3842 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3843 v->MaximumVStartup[i][j][k],
3844 v->GPUVMMaxPageTableLevels,
3845 v->GPUVMEnable,
3846 v->HostVMEnable,
3847 v->HostVMMaxNonCachedPageTableLevels,
3848 v->HostVMMinPageSize,
3849 v->DynamicMetadataEnable[k],
3850 v->DynamicMetadataVMEnabled,
3851 v->DynamicMetadataLinesBeforeActiveRequired[k],
3852 v->DynamicMetadataTransmittedBytes[k],
3853 v->UrgLatency[i],
3854 v->ExtraLatency,
3855 v->TimeCalc,
3856 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3857 v->MetaRowBytes[i][j][k],
3858 v->DPTEBytesPerRow[i][j][k],
3859 v->PrefetchLinesY[i][j][k],
3860 v->SwathWidthYThisState[k],
3861 v->PrefillY[k],
3862 v->MaxNumSwY[k],
3863 v->PrefetchLinesC[i][j][k],
3864 v->SwathWidthCThisState[k],
3865 v->PrefillC[k],
3866 v->MaxNumSwC[k],
3867 v->swath_width_luma_ub_this_state[k],
3868 v->swath_width_chroma_ub_this_state[k],
3869 v->SwathHeightYThisState[k],
3870 v->SwathHeightCThisState[k],
3871 v->TWait,
3872 &v->DSTXAfterScaler[k],
3873 &v->DSTYAfterScaler[k],
3874 &v->LineTimesForPrefetch[k],
3875 &v->PrefetchBW[k],
3876 &v->LinesForMetaPTE[k],
3877 &v->LinesForMetaAndDPTERow[k],
3878 &v->VRatioPreY[i][j][k],
3879 &v->VRatioPreC[i][j][k],
3880 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3881 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3882 &v->NoTimeForDynamicMetadata[i][j][k],
3883 &v->Tno_bw[k],
3884 &v->prefetch_vmrow_bw[k],
3885 &v->dummy7[k],
3886 &v->dummy8[k],
3887 &v->dummy13[k],
3888 &v->VUpdateOffsetPix[k],
3889 &v->VUpdateWidthPix[k],
3890 &v->VReadyOffsetPix[k]);
3891 }
3892
3893 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3894 {
3895 struct vba_vars_st *v = &mode_lib->vba;
3896
3897 int i, j;
3898 unsigned int k, m;
3899 int ReorderingBytes;
3900 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3901 bool NoChroma = true;
3902 bool EnoughWritebackUnits = true;
3903 bool P2IWith420 = false;
3904 bool DSCOnlyIfNecessaryWithBPP = false;
3905 bool DSC422NativeNotSupported = false;
3906 double MaxTotalVActiveRDBandwidth;
3907 bool ViewportExceedsSurface = false;
3908 bool FMTBufferExceeded = false;
3909
3910 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3911
3912 CalculateMinAndMaxPrefetchMode(
3913 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3914 &MinPrefetchMode, &MaxPrefetchMode);
3915
3916 /*Scale Ratio, taps Support Check*/
3917
3918 v->ScaleRatioAndTapsSupport = true;
3919 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3920 if (v->ScalerEnabled[k] == false
3921 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3922 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3923 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3924 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3925 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3926 v->ScaleRatioAndTapsSupport = false;
3927 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3928 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3929 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3930 || v->VRatio[k] > v->vtaps[k]
3931 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3932 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3933 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3934 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3935 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3936 || v->HRatioChroma[k] > v->MaxHSCLRatio
3937 || v->VRatioChroma[k] > v->MaxVSCLRatio
3938 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3939 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3940 v->ScaleRatioAndTapsSupport = false;
3941 }
3942 }
3943 /*Source Format, Pixel Format and Scan Support Check*/
3944
3945 v->SourceFormatPixelAndScanSupport = true;
3946 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3947 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3948 v->SourceFormatPixelAndScanSupport = false;
3949 }
3950 }
3951 /*Bandwidth Support Check*/
3952
3953 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3954 CalculateBytePerPixelAnd256BBlockSizes(
3955 v->SourcePixelFormat[k],
3956 v->SurfaceTiling[k],
3957 &v->BytePerPixelY[k],
3958 &v->BytePerPixelC[k],
3959 &v->BytePerPixelInDETY[k],
3960 &v->BytePerPixelInDETC[k],
3961 &v->Read256BlockHeightY[k],
3962 &v->Read256BlockHeightC[k],
3963 &v->Read256BlockWidthY[k],
3964 &v->Read256BlockWidthC[k]);
3965 }
3966 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3967 if (v->SourceScan[k] != dm_vert) {
3968 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3969 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3970 } else {
3971 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3972 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3973 }
3974 }
3975 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3976 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3977 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3978 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3979 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3980 }
3981 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3982 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3983 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3984 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3985 } else if (v->WritebackEnable[k] == true) {
3986 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3987 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3988 } else {
3989 v->WriteBandwidth[k] = 0.0;
3990 }
3991 }
3992
3993 /*Writeback Latency support check*/
3994
3995 v->WritebackLatencySupport = true;
3996 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3997 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3998 v->WritebackLatencySupport = false;
3999 }
4000 }
4001
4002 /*Writeback Mode Support Check*/
4003
4004 v->TotalNumberOfActiveWriteback = 0;
4005 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4006 if (v->WritebackEnable[k] == true) {
4007 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4008 }
4009 }
4010
4011 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4012 EnoughWritebackUnits = false;
4013 }
4014
4015 /*Writeback Scale Ratio and Taps Support Check*/
4016
4017 v->WritebackScaleRatioAndTapsSupport = true;
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 if (v->WritebackEnable[k] == true) {
4020 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4021 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4022 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4023 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4024 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4025 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4026 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4027 v->WritebackScaleRatioAndTapsSupport = false;
4028 }
4029 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4030 v->WritebackScaleRatioAndTapsSupport = false;
4031 }
4032 }
4033 }
4034 /*Maximum DISPCLK/DPPCLK Support check*/
4035
4036 v->WritebackRequiredDISPCLK = 0.0;
4037 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4038 if (v->WritebackEnable[k] == true) {
4039 v->WritebackRequiredDISPCLK = dml_max(
4040 v->WritebackRequiredDISPCLK,
4041 dml314_CalculateWriteBackDISPCLK(
4042 v->WritebackPixelFormat[k],
4043 v->PixelClock[k],
4044 v->WritebackHRatio[k],
4045 v->WritebackVRatio[k],
4046 v->WritebackHTaps[k],
4047 v->WritebackVTaps[k],
4048 v->WritebackSourceWidth[k],
4049 v->WritebackDestinationWidth[k],
4050 v->HTotal[k],
4051 v->WritebackLineBufferSize));
4052 }
4053 }
4054 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4055 if (v->HRatio[k] > 1.0) {
4056 v->PSCL_FACTOR[k] = dml_min(
4057 v->MaxDCHUBToPSCLThroughput,
4058 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4059 } else {
4060 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4061 }
4062 if (v->BytePerPixelC[k] == 0.0) {
4063 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4064 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4065 * dml_max3(
4066 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4067 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4068 1.0);
4069 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4070 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4071 }
4072 } else {
4073 if (v->HRatioChroma[k] > 1.0) {
4074 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4075 v->MaxDCHUBToPSCLThroughput,
4076 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4077 } else {
4078 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4079 }
4080 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4081 * dml_max5(
4082 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4083 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4084 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4085 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4086 1.0);
4087 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4088 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4089 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4090 }
4091 }
4092 }
4093 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4094 int MaximumSwathWidthSupportLuma;
4095 int MaximumSwathWidthSupportChroma;
4096
4097 if (v->SurfaceTiling[k] == dm_sw_linear) {
4098 MaximumSwathWidthSupportLuma = 8192.0;
4099 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4100 MaximumSwathWidthSupportLuma = 2880.0;
4101 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4102 MaximumSwathWidthSupportLuma = 3840.0;
4103 } else {
4104 MaximumSwathWidthSupportLuma = 5760.0;
4105 }
4106
4107 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4108 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4109 } else {
4110 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4111 }
4112 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4113 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4114 if (v->BytePerPixelC[k] == 0.0) {
4115 v->MaximumSwathWidthInLineBufferChroma = 0;
4116 } else {
4117 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4118 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4119 }
4120 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4121 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4122 }
4123
4124 CalculateSwathAndDETConfiguration(
4125 true,
4126 v->NumberOfActivePlanes,
4127 v->DETBufferSizeInKByte[0],
4128 v->MaximumSwathWidthLuma,
4129 v->MaximumSwathWidthChroma,
4130 v->SourceScan,
4131 v->SourcePixelFormat,
4132 v->SurfaceTiling,
4133 v->ViewportWidth,
4134 v->ViewportHeight,
4135 v->SurfaceWidthY,
4136 v->SurfaceWidthC,
4137 v->SurfaceHeightY,
4138 v->SurfaceHeightC,
4139 v->Read256BlockHeightY,
4140 v->Read256BlockHeightC,
4141 v->Read256BlockWidthY,
4142 v->Read256BlockWidthC,
4143 v->odm_combine_dummy,
4144 v->BlendingAndTiming,
4145 v->BytePerPixelY,
4146 v->BytePerPixelC,
4147 v->BytePerPixelInDETY,
4148 v->BytePerPixelInDETC,
4149 v->HActive,
4150 v->HRatio,
4151 v->HRatioChroma,
4152 v->NoOfDPPThisState,
4153 v->swath_width_luma_ub_this_state,
4154 v->swath_width_chroma_ub_this_state,
4155 v->SwathWidthYThisState,
4156 v->SwathWidthCThisState,
4157 v->SwathHeightYThisState,
4158 v->SwathHeightCThisState,
4159 v->DETBufferSizeYThisState,
4160 v->DETBufferSizeCThisState,
4161 v->SingleDPPViewportSizeSupportPerPlane,
4162 &v->ViewportSizeSupport[0][0]);
4163
4164 for (i = 0; i < v->soc.num_states; i++) {
4165 for (j = 0; j < 2; j++) {
4166 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4167 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4168 v->RequiredDISPCLK[i][j] = 0.0;
4169 v->DISPCLK_DPPCLK_Support[i][j] = true;
4170 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4171 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4172 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4173 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4174 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4175 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4176 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4177 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4178 }
4179 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4180 * (1 + v->DISPCLKRampingMargin / 100.0);
4181 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4182 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4183 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4184 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4185 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4186 }
4187 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4188 * (1 + v->DISPCLKRampingMargin / 100.0);
4189 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4190 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4191 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4192 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4193 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4194 }
4195
4196 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4197 || !(v->Output[k] == dm_dp ||
4198 v->Output[k] == dm_dp2p0 ||
4199 v->Output[k] == dm_edp)) {
4200 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4201 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4202
4203 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4204 FMTBufferExceeded = true;
4205 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4206 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4207 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4208 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4209 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4211 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4212 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4213 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4214 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4215 } else {
4216 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4217 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4218 }
4219 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4220 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4221 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4222 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4223 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4224 } else {
4225 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4226 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4227 }
4228 }
4229 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4230 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4231 if (v->Output[k] == dm_hdmi) {
4232 FMTBufferExceeded = true;
4233 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4234 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4235 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4236
4237 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4238 FMTBufferExceeded = true;
4239 } else {
4240 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4241 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4242 }
4243 }
4244 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4245 v->MPCCombine[i][j][k] = false;
4246 v->NoOfDPP[i][j][k] = 4;
4247 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4248 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4249 v->MPCCombine[i][j][k] = false;
4250 v->NoOfDPP[i][j][k] = 2;
4251 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4252 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4253 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4254 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4255 v->MPCCombine[i][j][k] = false;
4256 v->NoOfDPP[i][j][k] = 1;
4257 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4258 } else {
4259 v->MPCCombine[i][j][k] = true;
4260 v->NoOfDPP[i][j][k] = 2;
4261 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4262 }
4263 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4264 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4265 > v->MaxDppclkRoundedDownToDFSGranularity)
4266 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4267 v->DISPCLK_DPPCLK_Support[i][j] = false;
4268 }
4269 }
4270 v->TotalNumberOfActiveDPP[i][j] = 0;
4271 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4272 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4273 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4274 if (v->NoOfDPP[i][j][k] == 1)
4275 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4276 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4277 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4278 NoChroma = false;
4279 }
4280
4281 // UPTO
4282 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4283 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4284 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4285 double BWOfNonSplitPlaneOfMaximumBandwidth;
4286 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4287
4288 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4289 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4290 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4291 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4292 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4293 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4294 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4295 }
4296 }
4297 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4298 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4299 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4300 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4301 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4302 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4303 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4304 }
4305 }
4306 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4307 v->RequiredDISPCLK[i][j] = 0.0;
4308 v->DISPCLK_DPPCLK_Support[i][j] = true;
4309 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4310 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4311 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4312 v->MPCCombine[i][j][k] = true;
4313 v->NoOfDPP[i][j][k] = 2;
4314 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4315 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4316 } else {
4317 v->MPCCombine[i][j][k] = false;
4318 v->NoOfDPP[i][j][k] = 1;
4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4320 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4321 }
4322 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4323 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4324 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4325 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4326 } else {
4327 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4328 }
4329 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4330 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4331 > v->MaxDppclkRoundedDownToDFSGranularity)
4332 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4333 v->DISPCLK_DPPCLK_Support[i][j] = false;
4334 }
4335 }
4336 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4337 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4338 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4339 }
4340 }
4341 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4342 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4343 v->DISPCLK_DPPCLK_Support[i][j] = false;
4344 }
4345 }
4346 }
4347
4348 /*Total Available Pipes Support Check*/
4349
4350 for (i = 0; i < v->soc.num_states; i++) {
4351 for (j = 0; j < 2; j++) {
4352 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4353 v->TotalAvailablePipesSupport[i][j] = true;
4354 } else {
4355 v->TotalAvailablePipesSupport[i][j] = false;
4356 }
4357 }
4358 }
4359 /*Display IO and DSC Support Check*/
4360
4361 v->NonsupportedDSCInputBPC = false;
4362 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4363 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4364 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4365 v->NonsupportedDSCInputBPC = true;
4366 }
4367 }
4368
4369 /*Number Of DSC Slices*/
4370 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4371 if (v->BlendingAndTiming[k] == k) {
4372 if (v->PixelClockBackEnd[k] > 3200) {
4373 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4374 } else if (v->PixelClockBackEnd[k] > 1360) {
4375 v->NumberOfDSCSlices[k] = 8;
4376 } else if (v->PixelClockBackEnd[k] > 680) {
4377 v->NumberOfDSCSlices[k] = 4;
4378 } else if (v->PixelClockBackEnd[k] > 340) {
4379 v->NumberOfDSCSlices[k] = 2;
4380 } else {
4381 v->NumberOfDSCSlices[k] = 1;
4382 }
4383 } else {
4384 v->NumberOfDSCSlices[k] = 0;
4385 }
4386 }
4387
4388 for (i = 0; i < v->soc.num_states; i++) {
4389 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4390 v->RequiresDSC[i][k] = false;
4391 v->RequiresFEC[i][k] = false;
4392 if (v->BlendingAndTiming[k] == k) {
4393 if (v->Output[k] == dm_hdmi) {
4394 v->RequiresDSC[i][k] = false;
4395 v->RequiresFEC[i][k] = false;
4396 v->OutputBppPerState[i][k] = TruncToValidBPP(
4397 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4398 3,
4399 v->HTotal[k],
4400 v->HActive[k],
4401 v->PixelClockBackEnd[k],
4402 v->ForcedOutputLinkBPP[k],
4403 false,
4404 v->Output[k],
4405 v->OutputFormat[k],
4406 v->DSCInputBitPerComponent[k],
4407 v->NumberOfDSCSlices[k],
4408 v->AudioSampleRate[k],
4409 v->AudioSampleLayout[k],
4410 v->ODMCombineEnablePerState[i][k]);
4411 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4412 if (v->DSCEnable[k] == true) {
4413 v->RequiresDSC[i][k] = true;
4414 v->LinkDSCEnable = true;
4415 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4416 v->RequiresFEC[i][k] = true;
4417 } else {
4418 v->RequiresFEC[i][k] = false;
4419 }
4420 } else {
4421 v->RequiresDSC[i][k] = false;
4422 v->LinkDSCEnable = false;
4423 if (v->Output[k] == dm_dp2p0) {
4424 v->RequiresFEC[i][k] = true;
4425 } else {
4426 v->RequiresFEC[i][k] = false;
4427 }
4428 }
4429 if (v->Output[k] == dm_dp2p0) {
4430 v->Outbpp = BPP_INVALID;
4431 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4432 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4433 v->Outbpp = TruncToValidBPP(
4434 (1.0 - v->Downspreading / 100.0) * 10000,
4435 v->OutputLinkDPLanes[k],
4436 v->HTotal[k],
4437 v->HActive[k],
4438 v->PixelClockBackEnd[k],
4439 v->ForcedOutputLinkBPP[k],
4440 v->LinkDSCEnable,
4441 v->Output[k],
4442 v->OutputFormat[k],
4443 v->DSCInputBitPerComponent[k],
4444 v->NumberOfDSCSlices[k],
4445 v->AudioSampleRate[k],
4446 v->AudioSampleLayout[k],
4447 v->ODMCombineEnablePerState[i][k]);
4448 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4449 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4450 v->RequiresDSC[i][k] = true;
4451 v->LinkDSCEnable = true;
4452 v->Outbpp = TruncToValidBPP(
4453 (1.0 - v->Downspreading / 100.0) * 10000,
4454 v->OutputLinkDPLanes[k],
4455 v->HTotal[k],
4456 v->HActive[k],
4457 v->PixelClockBackEnd[k],
4458 v->ForcedOutputLinkBPP[k],
4459 v->LinkDSCEnable,
4460 v->Output[k],
4461 v->OutputFormat[k],
4462 v->DSCInputBitPerComponent[k],
4463 v->NumberOfDSCSlices[k],
4464 v->AudioSampleRate[k],
4465 v->AudioSampleLayout[k],
4466 v->ODMCombineEnablePerState[i][k]);
4467 }
4468 v->OutputBppPerState[i][k] = v->Outbpp;
4469 // TODO: Need some other way to handle this nonsense
4470 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4471 }
4472 if (v->Outbpp == BPP_INVALID &&
4473 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4474 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4475 v->Outbpp = TruncToValidBPP(
4476 (1.0 - v->Downspreading / 100.0) * 13500,
4477 v->OutputLinkDPLanes[k],
4478 v->HTotal[k],
4479 v->HActive[k],
4480 v->PixelClockBackEnd[k],
4481 v->ForcedOutputLinkBPP[k],
4482 v->LinkDSCEnable,
4483 v->Output[k],
4484 v->OutputFormat[k],
4485 v->DSCInputBitPerComponent[k],
4486 v->NumberOfDSCSlices[k],
4487 v->AudioSampleRate[k],
4488 v->AudioSampleLayout[k],
4489 v->ODMCombineEnablePerState[i][k]);
4490 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4491 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4492 v->RequiresDSC[i][k] = true;
4493 v->LinkDSCEnable = true;
4494 v->Outbpp = TruncToValidBPP(
4495 (1.0 - v->Downspreading / 100.0) * 13500,
4496 v->OutputLinkDPLanes[k],
4497 v->HTotal[k],
4498 v->HActive[k],
4499 v->PixelClockBackEnd[k],
4500 v->ForcedOutputLinkBPP[k],
4501 v->LinkDSCEnable,
4502 v->Output[k],
4503 v->OutputFormat[k],
4504 v->DSCInputBitPerComponent[k],
4505 v->NumberOfDSCSlices[k],
4506 v->AudioSampleRate[k],
4507 v->AudioSampleLayout[k],
4508 v->ODMCombineEnablePerState[i][k]);
4509 }
4510 v->OutputBppPerState[i][k] = v->Outbpp;
4511 // TODO: Need some other way to handle this nonsense
4512 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4513 }
4514 if (v->Outbpp == BPP_INVALID &&
4515 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4516 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4517 v->Outbpp = TruncToValidBPP(
4518 (1.0 - v->Downspreading / 100.0) * 20000,
4519 v->OutputLinkDPLanes[k],
4520 v->HTotal[k],
4521 v->HActive[k],
4522 v->PixelClockBackEnd[k],
4523 v->ForcedOutputLinkBPP[k],
4524 v->LinkDSCEnable,
4525 v->Output[k],
4526 v->OutputFormat[k],
4527 v->DSCInputBitPerComponent[k],
4528 v->NumberOfDSCSlices[k],
4529 v->AudioSampleRate[k],
4530 v->AudioSampleLayout[k],
4531 v->ODMCombineEnablePerState[i][k]);
4532 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4533 v->ForcedOutputLinkBPP[k] == 0) {
4534 v->RequiresDSC[i][k] = true;
4535 v->LinkDSCEnable = true;
4536 v->Outbpp = TruncToValidBPP(
4537 (1.0 - v->Downspreading / 100.0) * 20000,
4538 v->OutputLinkDPLanes[k],
4539 v->HTotal[k],
4540 v->HActive[k],
4541 v->PixelClockBackEnd[k],
4542 v->ForcedOutputLinkBPP[k],
4543 v->LinkDSCEnable,
4544 v->Output[k],
4545 v->OutputFormat[k],
4546 v->DSCInputBitPerComponent[k],
4547 v->NumberOfDSCSlices[k],
4548 v->AudioSampleRate[k],
4549 v->AudioSampleLayout[k],
4550 v->ODMCombineEnablePerState[i][k]);
4551 }
4552 v->OutputBppPerState[i][k] = v->Outbpp;
4553 // TODO: Need some other way to handle this nonsense
4554 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4555 }
4556 } else {
4557 v->Outbpp = BPP_INVALID;
4558 if (v->PHYCLKPerState[i] >= 270.0) {
4559 v->Outbpp = TruncToValidBPP(
4560 (1.0 - v->Downspreading / 100.0) * 2700,
4561 v->OutputLinkDPLanes[k],
4562 v->HTotal[k],
4563 v->HActive[k],
4564 v->PixelClockBackEnd[k],
4565 v->ForcedOutputLinkBPP[k],
4566 v->LinkDSCEnable,
4567 v->Output[k],
4568 v->OutputFormat[k],
4569 v->DSCInputBitPerComponent[k],
4570 v->NumberOfDSCSlices[k],
4571 v->AudioSampleRate[k],
4572 v->AudioSampleLayout[k],
4573 v->ODMCombineEnablePerState[i][k]);
4574 v->OutputBppPerState[i][k] = v->Outbpp;
4575 // TODO: Need some other way to handle this nonsense
4576 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4577 }
4578 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4579 v->Outbpp = TruncToValidBPP(
4580 (1.0 - v->Downspreading / 100.0) * 5400,
4581 v->OutputLinkDPLanes[k],
4582 v->HTotal[k],
4583 v->HActive[k],
4584 v->PixelClockBackEnd[k],
4585 v->ForcedOutputLinkBPP[k],
4586 v->LinkDSCEnable,
4587 v->Output[k],
4588 v->OutputFormat[k],
4589 v->DSCInputBitPerComponent[k],
4590 v->NumberOfDSCSlices[k],
4591 v->AudioSampleRate[k],
4592 v->AudioSampleLayout[k],
4593 v->ODMCombineEnablePerState[i][k]);
4594 v->OutputBppPerState[i][k] = v->Outbpp;
4595 // TODO: Need some other way to handle this nonsense
4596 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4597 }
4598 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4599 v->Outbpp = TruncToValidBPP(
4600 (1.0 - v->Downspreading / 100.0) * 8100,
4601 v->OutputLinkDPLanes[k],
4602 v->HTotal[k],
4603 v->HActive[k],
4604 v->PixelClockBackEnd[k],
4605 v->ForcedOutputLinkBPP[k],
4606 v->LinkDSCEnable,
4607 v->Output[k],
4608 v->OutputFormat[k],
4609 v->DSCInputBitPerComponent[k],
4610 v->NumberOfDSCSlices[k],
4611 v->AudioSampleRate[k],
4612 v->AudioSampleLayout[k],
4613 v->ODMCombineEnablePerState[i][k]);
4614 v->OutputBppPerState[i][k] = v->Outbpp;
4615 // TODO: Need some other way to handle this nonsense
4616 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4617 }
4618 }
4619 }
4620 } else {
4621 v->OutputBppPerState[i][k] = 0;
4622 }
4623 }
4624 }
4625
4626 for (i = 0; i < v->soc.num_states; i++) {
4627 v->LinkCapacitySupport[i] = true;
4628 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4629 if (v->BlendingAndTiming[k] == k
4630 && (v->Output[k] == dm_dp ||
4631 v->Output[k] == dm_edp ||
4632 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4633 v->LinkCapacitySupport[i] = false;
4634 }
4635 }
4636 }
4637
4638 // UPTO 2172
4639 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4640 if (v->BlendingAndTiming[k] == k
4641 && (v->Output[k] == dm_dp ||
4642 v->Output[k] == dm_edp ||
4643 v->Output[k] == dm_hdmi)) {
4644 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4645 P2IWith420 = true;
4646 }
4647 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4648 && !v->DSC422NativeSupport) {
4649 DSC422NativeNotSupported = true;
4650 }
4651 }
4652 }
4653
4654
4655 for (i = 0; i < v->soc.num_states; ++i) {
4656 v->ODMCombine4To1SupportCheckOK[i] = true;
4657 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4658 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4659 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4660 || v->Output[k] == dm_hdmi)) {
4661 v->ODMCombine4To1SupportCheckOK[i] = false;
4662 }
4663 }
4664 }
4665
4666 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4667
4668 for (i = 0; i < v->soc.num_states; i++) {
4669 v->NotEnoughDSCUnits[i] = false;
4670 v->TotalDSCUnitsRequired = 0.0;
4671 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4672 if (v->RequiresDSC[i][k] == true) {
4673 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4674 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4675 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4676 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4677 } else {
4678 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4679 }
4680 }
4681 }
4682 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4683 v->NotEnoughDSCUnits[i] = true;
4684 }
4685 }
4686 /*DSC Delay per state*/
4687
4688 for (i = 0; i < v->soc.num_states; i++) {
4689 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4690 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4691 v->BPP = 0.0;
4692 } else {
4693 v->BPP = v->OutputBppPerState[i][k];
4694 }
4695 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4696 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4697 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4698 v->DSCInputBitPerComponent[k],
4699 v->BPP,
4700 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4701 v->NumberOfDSCSlices[k],
4702 v->OutputFormat[k],
4703 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4704 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4705 v->DSCDelayPerState[i][k] = 2.0
4706 * (dscceComputeDelay(
4707 v->DSCInputBitPerComponent[k],
4708 v->BPP,
4709 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4710 v->NumberOfDSCSlices[k] / 2,
4711 v->OutputFormat[k],
4712 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4713 } else {
4714 v->DSCDelayPerState[i][k] = 4.0
4715 * (dscceComputeDelay(
4716 v->DSCInputBitPerComponent[k],
4717 v->BPP,
4718 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4719 v->NumberOfDSCSlices[k] / 4,
4720 v->OutputFormat[k],
4721 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4722 }
4723 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelayPerState[i][k] / v->HActive[k], 1.0);
4724 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4725 } else {
4726 v->DSCDelayPerState[i][k] = 0.0;
4727 }
4728 }
4729 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4730 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4731 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4732 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4733 }
4734 }
4735 }
4736 }
4737
4738 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4739 //
4740 for (i = 0; i < v->soc.num_states; ++i) {
4741 for (j = 0; j <= 1; ++j) {
4742 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4743 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4744 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4745 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4746 }
4747
4748 CalculateSwathAndDETConfiguration(
4749 false,
4750 v->NumberOfActivePlanes,
4751 v->DETBufferSizeInKByte[0],
4752 v->MaximumSwathWidthLuma,
4753 v->MaximumSwathWidthChroma,
4754 v->SourceScan,
4755 v->SourcePixelFormat,
4756 v->SurfaceTiling,
4757 v->ViewportWidth,
4758 v->ViewportHeight,
4759 v->SurfaceWidthY,
4760 v->SurfaceWidthC,
4761 v->SurfaceHeightY,
4762 v->SurfaceHeightC,
4763 v->Read256BlockHeightY,
4764 v->Read256BlockHeightC,
4765 v->Read256BlockWidthY,
4766 v->Read256BlockWidthC,
4767 v->ODMCombineEnableThisState,
4768 v->BlendingAndTiming,
4769 v->BytePerPixelY,
4770 v->BytePerPixelC,
4771 v->BytePerPixelInDETY,
4772 v->BytePerPixelInDETC,
4773 v->HActive,
4774 v->HRatio,
4775 v->HRatioChroma,
4776 v->NoOfDPPThisState,
4777 v->swath_width_luma_ub_this_state,
4778 v->swath_width_chroma_ub_this_state,
4779 v->SwathWidthYThisState,
4780 v->SwathWidthCThisState,
4781 v->SwathHeightYThisState,
4782 v->SwathHeightCThisState,
4783 v->DETBufferSizeYThisState,
4784 v->DETBufferSizeCThisState,
4785 v->dummystring,
4786 &v->ViewportSizeSupport[i][j]);
4787
4788 CalculateDCFCLKDeepSleep(
4789 mode_lib,
4790 v->NumberOfActivePlanes,
4791 v->BytePerPixelY,
4792 v->BytePerPixelC,
4793 v->VRatio,
4794 v->VRatioChroma,
4795 v->SwathWidthYThisState,
4796 v->SwathWidthCThisState,
4797 v->NoOfDPPThisState,
4798 v->HRatio,
4799 v->HRatioChroma,
4800 v->PixelClock,
4801 v->PSCL_FACTOR,
4802 v->PSCL_FACTOR_CHROMA,
4803 v->RequiredDPPCLKThisState,
4804 v->ReadBandwidthLuma,
4805 v->ReadBandwidthChroma,
4806 v->ReturnBusWidth,
4807 &v->ProjectedDCFCLKDeepSleep[i][j]);
4808
4809 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4810 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4811 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4812 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4813 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4814 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4815 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4816 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4817 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4818 }
4819 }
4820 }
4821
4822 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4823 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4824 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4825 }
4826
4827 for (i = 0; i < v->soc.num_states; i++) {
4828 for (j = 0; j < 2; j++) {
4829 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4830
4831 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4832 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4833 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4834 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4835 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4836 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4837 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4838 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4839 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4840 }
4841
4842 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4843 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4844 if (v->DCCEnable[k] == true) {
4845 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4846 }
4847 }
4848
4849 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4850 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4851 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4852
4853 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4854 && v->SourceScan[k] != dm_vert) {
4855 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4856 / 2;
4857 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4858 } else {
4859 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4860 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4861 }
4862
4863 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4864 mode_lib,
4865 v->DCCEnable[k],
4866 v->Read256BlockHeightC[k],
4867 v->Read256BlockWidthC[k],
4868 v->SourcePixelFormat[k],
4869 v->SurfaceTiling[k],
4870 v->BytePerPixelC[k],
4871 v->SourceScan[k],
4872 v->SwathWidthCThisState[k],
4873 v->ViewportHeightChroma[k],
4874 v->GPUVMEnable,
4875 v->HostVMEnable,
4876 v->HostVMMaxNonCachedPageTableLevels,
4877 v->GPUVMMinPageSize,
4878 v->HostVMMinPageSize,
4879 v->PTEBufferSizeInRequestsForChroma,
4880 v->PitchC[k],
4881 0.0,
4882 &v->MacroTileWidthC[k],
4883 &v->MetaRowBytesC,
4884 &v->DPTEBytesPerRowC,
4885 &v->PTEBufferSizeNotExceededC[i][j][k],
4886 &v->dummyinteger7,
4887 &v->dpte_row_height_chroma[k],
4888 &v->dummyinteger28,
4889 &v->dummyinteger26,
4890 &v->dummyinteger23,
4891 &v->meta_row_height_chroma[k],
4892 &v->dummyinteger8,
4893 &v->dummyinteger9,
4894 &v->dummyinteger19,
4895 &v->dummyinteger20,
4896 &v->dummyinteger17,
4897 &v->dummyinteger10,
4898 &v->dummyinteger11);
4899
4900 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4901 mode_lib,
4902 v->VRatioChroma[k],
4903 v->VTAPsChroma[k],
4904 v->Interlace[k],
4905 v->ProgressiveToInterlaceUnitInOPP,
4906 v->SwathHeightCThisState[k],
4907 v->ViewportYStartC[k],
4908 &v->PrefillC[k],
4909 &v->MaxNumSwC[k]);
4910 } else {
4911 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4912 v->PTEBufferSizeInRequestsForChroma = 0;
4913 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4914 v->MetaRowBytesC = 0.0;
4915 v->DPTEBytesPerRowC = 0.0;
4916 v->PrefetchLinesC[i][j][k] = 0.0;
4917 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4918 }
4919 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4920 mode_lib,
4921 v->DCCEnable[k],
4922 v->Read256BlockHeightY[k],
4923 v->Read256BlockWidthY[k],
4924 v->SourcePixelFormat[k],
4925 v->SurfaceTiling[k],
4926 v->BytePerPixelY[k],
4927 v->SourceScan[k],
4928 v->SwathWidthYThisState[k],
4929 v->ViewportHeight[k],
4930 v->GPUVMEnable,
4931 v->HostVMEnable,
4932 v->HostVMMaxNonCachedPageTableLevels,
4933 v->GPUVMMinPageSize,
4934 v->HostVMMinPageSize,
4935 v->PTEBufferSizeInRequestsForLuma,
4936 v->PitchY[k],
4937 v->DCCMetaPitchY[k],
4938 &v->MacroTileWidthY[k],
4939 &v->MetaRowBytesY,
4940 &v->DPTEBytesPerRowY,
4941 &v->PTEBufferSizeNotExceededY[i][j][k],
4942 &v->dummyinteger7,
4943 &v->dpte_row_height[k],
4944 &v->dummyinteger29,
4945 &v->dummyinteger27,
4946 &v->dummyinteger24,
4947 &v->meta_row_height[k],
4948 &v->dummyinteger25,
4949 &v->dpte_group_bytes[k],
4950 &v->dummyinteger21,
4951 &v->dummyinteger22,
4952 &v->dummyinteger18,
4953 &v->dummyinteger5,
4954 &v->dummyinteger6);
4955 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4956 mode_lib,
4957 v->VRatio[k],
4958 v->vtaps[k],
4959 v->Interlace[k],
4960 v->ProgressiveToInterlaceUnitInOPP,
4961 v->SwathHeightYThisState[k],
4962 v->ViewportYStartY[k],
4963 &v->PrefillY[k],
4964 &v->MaxNumSwY[k]);
4965 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4966 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4967 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4968
4969 CalculateRowBandwidth(
4970 v->GPUVMEnable,
4971 v->SourcePixelFormat[k],
4972 v->VRatio[k],
4973 v->VRatioChroma[k],
4974 v->DCCEnable[k],
4975 v->HTotal[k] / v->PixelClock[k],
4976 v->MetaRowBytesY,
4977 v->MetaRowBytesC,
4978 v->meta_row_height[k],
4979 v->meta_row_height_chroma[k],
4980 v->DPTEBytesPerRowY,
4981 v->DPTEBytesPerRowC,
4982 v->dpte_row_height[k],
4983 v->dpte_row_height_chroma[k],
4984 &v->meta_row_bandwidth[i][j][k],
4985 &v->dpte_row_bandwidth[i][j][k]);
4986 }
4987 /*
4988 * DCCMetaBufferSizeSupport(i, j) = True
4989 * For k = 0 To NumberOfActivePlanes - 1
4990 * If MetaRowBytes(i, j, k) > 24064 Then
4991 * DCCMetaBufferSizeSupport(i, j) = False
4992 * End If
4993 * Next k
4994 */
4995 v->DCCMetaBufferSizeSupport[i][j] = true;
4996 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4997 if (v->MetaRowBytes[i][j][k] > 24064)
4998 v->DCCMetaBufferSizeSupport[i][j] = false;
4999 }
5000 v->UrgLatency[i] = CalculateUrgentLatency(
5001 v->UrgentLatencyPixelDataOnly,
5002 v->UrgentLatencyPixelMixedWithVMData,
5003 v->UrgentLatencyVMDataOnly,
5004 v->DoUrgentLatencyAdjustment,
5005 v->UrgentLatencyAdjustmentFabricClockComponent,
5006 v->UrgentLatencyAdjustmentFabricClockReference,
5007 v->FabricClockPerState[i]);
5008
5009 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5010 CalculateUrgentBurstFactor(
5011 v->swath_width_luma_ub_this_state[k],
5012 v->swath_width_chroma_ub_this_state[k],
5013 v->SwathHeightYThisState[k],
5014 v->SwathHeightCThisState[k],
5015 v->HTotal[k] / v->PixelClock[k],
5016 v->UrgLatency[i],
5017 v->CursorBufferSize,
5018 v->CursorWidth[k][0],
5019 v->CursorBPP[k][0],
5020 v->VRatio[k],
5021 v->VRatioChroma[k],
5022 v->BytePerPixelInDETY[k],
5023 v->BytePerPixelInDETC[k],
5024 v->DETBufferSizeYThisState[k],
5025 v->DETBufferSizeCThisState[k],
5026 &v->UrgentBurstFactorCursor[k],
5027 &v->UrgentBurstFactorLuma[k],
5028 &v->UrgentBurstFactorChroma[k],
5029 &NotUrgentLatencyHiding[k]);
5030 }
5031
5032 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5033 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5034 if (NotUrgentLatencyHiding[k]) {
5035 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5036 }
5037 }
5038
5039 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5040 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5041 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5042 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5043 }
5044
5045 v->TotalVActivePixelBandwidth[i][j] = 0;
5046 v->TotalVActiveCursorBandwidth[i][j] = 0;
5047 v->TotalMetaRowBandwidth[i][j] = 0;
5048 v->TotalDPTERowBandwidth[i][j] = 0;
5049 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5050 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5051 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5052 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5053 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5054 }
5055 }
5056 }
5057
5058 //Calculate Return BW
5059 for (i = 0; i < v->soc.num_states; ++i) {
5060 for (j = 0; j <= 1; ++j) {
5061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5062 if (v->BlendingAndTiming[k] == k) {
5063 if (v->WritebackEnable[k] == true) {
5064 v->WritebackDelayTime[k] = v->WritebackLatency
5065 + CalculateWriteBackDelay(
5066 v->WritebackPixelFormat[k],
5067 v->WritebackHRatio[k],
5068 v->WritebackVRatio[k],
5069 v->WritebackVTaps[k],
5070 v->WritebackDestinationWidth[k],
5071 v->WritebackDestinationHeight[k],
5072 v->WritebackSourceHeight[k],
5073 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5074 } else {
5075 v->WritebackDelayTime[k] = 0.0;
5076 }
5077 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5078 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5079 v->WritebackDelayTime[k] = dml_max(
5080 v->WritebackDelayTime[k],
5081 v->WritebackLatency
5082 + CalculateWriteBackDelay(
5083 v->WritebackPixelFormat[m],
5084 v->WritebackHRatio[m],
5085 v->WritebackVRatio[m],
5086 v->WritebackVTaps[m],
5087 v->WritebackDestinationWidth[m],
5088 v->WritebackDestinationHeight[m],
5089 v->WritebackSourceHeight[m],
5090 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5091 }
5092 }
5093 }
5094 }
5095 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5096 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5097 if (v->BlendingAndTiming[k] == m) {
5098 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5099 }
5100 }
5101 }
5102 v->MaxMaxVStartup[i][j] = 0;
5103 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5104 v->MaximumVStartup[i][j][k] =
5105 CalculateMaxVStartup(
5106 v->VTotal[k],
5107 v->VActive[k],
5108 v->VBlankNom[k],
5109 v->HTotal[k],
5110 v->PixelClock[k],
5111 v->ProgressiveToInterlaceUnitInOPP,
5112 v->Interlace[k],
5113 v->ip.VBlankNomDefaultUS,
5114 v->WritebackDelayTime[k]);
5115 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5116 }
5117 }
5118 }
5119
5120 ReorderingBytes = v->NumberOfChannels
5121 * dml_max3(
5122 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5123 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5124 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5125
5126 for (i = 0; i < v->soc.num_states; ++i) {
5127 for (j = 0; j <= 1; ++j) {
5128 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5129 }
5130 }
5131
5132 if (v->UseMinimumRequiredDCFCLK == true)
5133 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5134
5135 for (i = 0; i < v->soc.num_states; ++i) {
5136 for (j = 0; j <= 1; ++j) {
5137 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5138 v->ReturnBusWidth * v->DCFCLKState[i][j],
5139 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5140 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5141 double PixelDataOnlyReturnBWPerState = dml_min(
5142 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5143 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5144 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5145 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5146 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5147
5148 if (v->HostVMEnable != true) {
5149 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5150 } else {
5151 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5152 }
5153 }
5154 }
5155
5156 //Re-ordering Buffer Support Check
5157 for (i = 0; i < v->soc.num_states; ++i) {
5158 for (j = 0; j <= 1; ++j) {
5159 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5160 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5161 v->ROBSupport[i][j] = true;
5162 } else {
5163 v->ROBSupport[i][j] = false;
5164 }
5165 }
5166 }
5167
5168 //Vertical Active BW support check
5169
5170 MaxTotalVActiveRDBandwidth = 0;
5171 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5172 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5173 }
5174
5175 for (i = 0; i < v->soc.num_states; ++i) {
5176 for (j = 0; j <= 1; ++j) {
5177 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5178 dml_min(
5179 v->ReturnBusWidth * v->DCFCLKState[i][j],
5180 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5181 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5182 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5183 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5184
5185 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5186 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5187 } else {
5188 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5189 }
5190 }
5191 }
5192
5193 v->UrgentLatency = CalculateUrgentLatency(
5194 v->UrgentLatencyPixelDataOnly,
5195 v->UrgentLatencyPixelMixedWithVMData,
5196 v->UrgentLatencyVMDataOnly,
5197 v->DoUrgentLatencyAdjustment,
5198 v->UrgentLatencyAdjustmentFabricClockComponent,
5199 v->UrgentLatencyAdjustmentFabricClockReference,
5200 v->FabricClock);
5201 //Prefetch Check
5202 for (i = 0; i < v->soc.num_states; ++i) {
5203 for (j = 0; j <= 1; ++j) {
5204 double VMDataOnlyReturnBWPerState;
5205 double HostVMInefficiencyFactor = 1;
5206 int NextPrefetchModeState = MinPrefetchMode;
5207 bool UnboundedRequestEnabledThisState = false;
5208 int CompressedBufferSizeInkByteThisState = 0;
5209 double dummy;
5210
5211 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5212
5213 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5214 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5215 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5216 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5217 }
5218
5219 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5220 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5221 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5222 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5223 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5224 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5225 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5226 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5227 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5228 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5229 }
5230
5231 VMDataOnlyReturnBWPerState = dml_min(
5232 dml_min(
5233 v->ReturnBusWidth * v->DCFCLKState[i][j],
5234 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5235 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5236 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5237 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5238 if (v->GPUVMEnable && v->HostVMEnable)
5239 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5240
5241 v->ExtraLatency = CalculateExtraLatency(
5242 v->RoundTripPingLatencyCycles,
5243 ReorderingBytes,
5244 v->DCFCLKState[i][j],
5245 v->TotalNumberOfActiveDPP[i][j],
5246 v->PixelChunkSizeInKByte,
5247 v->TotalNumberOfDCCActiveDPP[i][j],
5248 v->MetaChunkSize,
5249 v->ReturnBWPerState[i][j],
5250 v->GPUVMEnable,
5251 v->HostVMEnable,
5252 v->NumberOfActivePlanes,
5253 v->NoOfDPPThisState,
5254 v->dpte_group_bytes,
5255 HostVMInefficiencyFactor,
5256 v->HostVMMinPageSize,
5257 v->HostVMMaxNonCachedPageTableLevels);
5258
5259 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5260 do {
5261 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5262 v->MaxVStartup = v->NextMaxVStartup;
5263
5264 v->TWait = CalculateTWait(
5265 v->PrefetchModePerState[i][j],
5266 v->DRAMClockChangeLatency,
5267 v->UrgLatency[i],
5268 v->SREnterPlusExitTime);
5269
5270 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5271 CalculatePrefetchSchedulePerPlane(mode_lib,
5272 HostVMInefficiencyFactor,
5273 i, j, k);
5274 }
5275
5276 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5277 CalculateUrgentBurstFactor(
5278 v->swath_width_luma_ub_this_state[k],
5279 v->swath_width_chroma_ub_this_state[k],
5280 v->SwathHeightYThisState[k],
5281 v->SwathHeightCThisState[k],
5282 v->HTotal[k] / v->PixelClock[k],
5283 v->UrgLatency[i],
5284 v->CursorBufferSize,
5285 v->CursorWidth[k][0],
5286 v->CursorBPP[k][0],
5287 v->VRatioPreY[i][j][k],
5288 v->VRatioPreC[i][j][k],
5289 v->BytePerPixelInDETY[k],
5290 v->BytePerPixelInDETC[k],
5291 v->DETBufferSizeYThisState[k],
5292 v->DETBufferSizeCThisState[k],
5293 &v->UrgentBurstFactorCursorPre[k],
5294 &v->UrgentBurstFactorLumaPre[k],
5295 &v->UrgentBurstFactorChromaPre[k],
5296 &v->NotUrgentLatencyHidingPre[k]);
5297 }
5298
5299 v->MaximumReadBandwidthWithPrefetch = 0.0;
5300 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5301 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5302 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5303
5304 v->MaximumReadBandwidthWithPrefetch =
5305 v->MaximumReadBandwidthWithPrefetch
5306 + dml_max3(
5307 v->VActivePixelBandwidth[i][j][k]
5308 + v->VActiveCursorBandwidth[i][j][k]
5309 + v->NoOfDPP[i][j][k]
5310 * (v->meta_row_bandwidth[i][j][k]
5311 + v->dpte_row_bandwidth[i][j][k]),
5312 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5313 v->NoOfDPP[i][j][k]
5314 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5315 * v->UrgentBurstFactorLumaPre[k]
5316 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5317 * v->UrgentBurstFactorChromaPre[k])
5318 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5319 }
5320
5321 v->NotEnoughUrgentLatencyHidingPre = false;
5322 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5323 if (v->NotUrgentLatencyHidingPre[k] == true) {
5324 v->NotEnoughUrgentLatencyHidingPre = true;
5325 }
5326 }
5327
5328 v->PrefetchSupported[i][j] = true;
5329 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5330 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5331 v->PrefetchSupported[i][j] = false;
5332 }
5333 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5334 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5335 || v->NoTimeForPrefetch[i][j][k] == true) {
5336 v->PrefetchSupported[i][j] = false;
5337 }
5338 }
5339
5340 v->DynamicMetadataSupported[i][j] = true;
5341 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5342 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5343 v->DynamicMetadataSupported[i][j] = false;
5344 }
5345 }
5346
5347 v->VRatioInPrefetchSupported[i][j] = true;
5348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5349 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5350 v->VRatioInPrefetchSupported[i][j] = false;
5351 }
5352 }
5353 v->AnyLinesForVMOrRowTooLarge = false;
5354 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5355 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5356 v->AnyLinesForVMOrRowTooLarge = true;
5357 }
5358 }
5359
5360 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5361
5362 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5363 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5364 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5365 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5366 - dml_max(
5367 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5368 v->NoOfDPP[i][j][k]
5369 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5370 * v->UrgentBurstFactorLumaPre[k]
5371 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5372 * v->UrgentBurstFactorChromaPre[k])
5373 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5374 }
5375 v->TotImmediateFlipBytes = 0.0;
5376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5377 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5378 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5379 + v->DPTEBytesPerRow[i][j][k]);
5380 }
5381
5382 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5383 CalculateFlipSchedule(
5384 mode_lib,
5385 k,
5386 HostVMInefficiencyFactor,
5387 v->ExtraLatency,
5388 v->UrgLatency[i],
5389 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5390 v->MetaRowBytes[i][j][k],
5391 v->DPTEBytesPerRow[i][j][k]);
5392 }
5393 v->total_dcn_read_bw_with_flip = 0.0;
5394 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5395 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5396 + dml_max3(
5397 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5398 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5399 + v->VActiveCursorBandwidth[i][j][k],
5400 v->NoOfDPP[i][j][k]
5401 * (v->final_flip_bw[k]
5402 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5403 * v->UrgentBurstFactorLumaPre[k]
5404 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5405 * v->UrgentBurstFactorChromaPre[k])
5406 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5407 }
5408 v->ImmediateFlipSupportedForState[i][j] = true;
5409 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5410 v->ImmediateFlipSupportedForState[i][j] = false;
5411 }
5412 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5413 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5414 v->ImmediateFlipSupportedForState[i][j] = false;
5415 }
5416 }
5417 } else {
5418 v->ImmediateFlipSupportedForState[i][j] = false;
5419 }
5420
5421 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5422 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5423 NextPrefetchModeState = NextPrefetchModeState + 1;
5424 } else {
5425 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5426 }
5427 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5428 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5429 && ((v->HostVMEnable == false &&
5430 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5431 || v->ImmediateFlipSupportedForState[i][j] == true))
5432 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5433
5434 CalculateUnboundedRequestAndCompressedBufferSize(
5435 v->DETBufferSizeInKByte[0],
5436 v->ConfigReturnBufferSizeInKByte,
5437 v->UseUnboundedRequesting,
5438 v->TotalNumberOfActiveDPP[i][j],
5439 NoChroma,
5440 v->MaxNumDPP,
5441 v->CompressedBufferSegmentSizeInkByte,
5442 v->Output,
5443 &UnboundedRequestEnabledThisState,
5444 &CompressedBufferSizeInkByteThisState);
5445
5446 CalculateWatermarksAndDRAMSpeedChangeSupport(
5447 mode_lib,
5448 v->PrefetchModePerState[i][j],
5449 v->DCFCLKState[i][j],
5450 v->ReturnBWPerState[i][j],
5451 v->UrgLatency[i],
5452 v->ExtraLatency,
5453 v->SOCCLKPerState[i],
5454 v->ProjectedDCFCLKDeepSleep[i][j],
5455 v->DETBufferSizeYThisState,
5456 v->DETBufferSizeCThisState,
5457 v->SwathHeightYThisState,
5458 v->SwathHeightCThisState,
5459 v->SwathWidthYThisState,
5460 v->SwathWidthCThisState,
5461 v->NoOfDPPThisState,
5462 v->BytePerPixelInDETY,
5463 v->BytePerPixelInDETC,
5464 UnboundedRequestEnabledThisState,
5465 CompressedBufferSizeInkByteThisState,
5466 &v->DRAMClockChangeSupport[i][j],
5467 &dummy,
5468 &dummy,
5469 &dummy,
5470 &dummy);
5471 }
5472 }
5473
5474 /*PTE Buffer Size Check*/
5475 for (i = 0; i < v->soc.num_states; i++) {
5476 for (j = 0; j < 2; j++) {
5477 v->PTEBufferSizeNotExceeded[i][j] = true;
5478 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5479 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5480 v->PTEBufferSizeNotExceeded[i][j] = false;
5481 }
5482 }
5483 }
5484 }
5485
5486 /*Cursor Support Check*/
5487 v->CursorSupport = true;
5488 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5489 if (v->CursorWidth[k][0] > 0.0) {
5490 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5491 v->CursorSupport = false;
5492 }
5493 }
5494 }
5495
5496 /*Valid Pitch Check*/
5497 v->PitchSupport = true;
5498 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5499 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5500 if (v->DCCEnable[k] == true) {
5501 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5502 } else {
5503 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5504 }
5505 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5506 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5507 && v->SourcePixelFormat[k] != dm_mono_8) {
5508 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5509 if (v->DCCEnable[k] == true) {
5510 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5511 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5512 64.0 * v->Read256BlockWidthC[k]);
5513 } else {
5514 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5515 }
5516 } else {
5517 v->AlignedCPitch[k] = v->PitchC[k];
5518 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5519 }
5520 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5521 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5522 v->PitchSupport = false;
5523 }
5524 }
5525
5526 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5527 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5528 ViewportExceedsSurface = true;
5529 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5530 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5531 && v->SourcePixelFormat[k] != dm_rgbe) {
5532 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5533 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5534 ViewportExceedsSurface = true;
5535 }
5536 }
5537 }
5538 }
5539
5540 /*Mode Support, Voltage State and SOC Configuration*/
5541 for (i = v->soc.num_states - 1; i >= 0; i--) {
5542 for (j = 0; j < 2; j++) {
5543 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5544 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5545 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5546 && v->DTBCLKRequiredMoreThanSupported[i] == false
5547 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5548 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5549 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5550 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5551 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5552 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5553 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5554 && ((v->HostVMEnable == false
5555 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5556 || v->ImmediateFlipSupportedForState[i][j] == true)
5557 && FMTBufferExceeded == false) {
5558 v->ModeSupport[i][j] = true;
5559 } else {
5560 v->ModeSupport[i][j] = false;
5561 }
5562 }
5563 }
5564 for (i = v->soc.num_states; i >= 0; i--) {
5565 for (j = 0; j < 2; j++) {
5566 enum dm_validation_status status = DML_VALIDATION_OK;
5567
5568 if (!v->ScaleRatioAndTapsSupport) {
5569 status = DML_FAIL_SCALE_RATIO_TAP;
5570 } else if (!v->SourceFormatPixelAndScanSupport) {
5571 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5572 } else if (!v->ViewportSizeSupport[i][j]) {
5573 status = DML_FAIL_VIEWPORT_SIZE;
5574 } else if (P2IWith420) {
5575 status = DML_FAIL_P2I_WITH_420;
5576 } else if (DSCOnlyIfNecessaryWithBPP) {
5577 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5578 } else if (DSC422NativeNotSupported) {
5579 status = DML_FAIL_NOT_DSC422_NATIVE;
5580 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5581 status = DML_FAIL_ODM_COMBINE4TO1;
5582 } else if (v->NotEnoughDSCUnits[i]) {
5583 status = DML_FAIL_NOT_ENOUGH_DSC;
5584 } else if (!v->ROBSupport[i][j]) {
5585 status = DML_FAIL_REORDERING_BUFFER;
5586 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5587 status = DML_FAIL_DISPCLK_DPPCLK;
5588 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5589 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5590 } else if (!EnoughWritebackUnits) {
5591 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5592 } else if (!v->WritebackLatencySupport) {
5593 status = DML_FAIL_WRITEBACK_LATENCY;
5594 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5595 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5596 } else if (!v->CursorSupport) {
5597 status = DML_FAIL_CURSOR_SUPPORT;
5598 } else if (!v->PitchSupport) {
5599 status = DML_FAIL_PITCH_SUPPORT;
5600 } else if (ViewportExceedsSurface) {
5601 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5602 } else if (!v->PrefetchSupported[i][j]) {
5603 status = DML_FAIL_PREFETCH_SUPPORT;
5604 } else if (!v->DynamicMetadataSupported[i][j]) {
5605 status = DML_FAIL_DYNAMIC_METADATA;
5606 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5607 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5608 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5609 status = DML_FAIL_V_RATIO_PREFETCH;
5610 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5611 status = DML_FAIL_PTE_BUFFER_SIZE;
5612 } else if (v->NonsupportedDSCInputBPC) {
5613 status = DML_FAIL_DSC_INPUT_BPC;
5614 } else if ((v->HostVMEnable
5615 && !v->ImmediateFlipSupportedForState[i][j])) {
5616 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5617 } else if (FMTBufferExceeded) {
5618 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5619 }
5620 mode_lib->vba.ValidationStatus[i] = status;
5621 }
5622 }
5623
5624 {
5625 unsigned int MaximumMPCCombine = 0;
5626
5627 for (i = v->soc.num_states; i >= 0; i--) {
5628 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5629 v->VoltageLevel = i;
5630 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5631 if (v->ModeSupport[i][0] == true) {
5632 MaximumMPCCombine = 0;
5633 } else {
5634 MaximumMPCCombine = 1;
5635 }
5636 }
5637 }
5638 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5639 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5640 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5641 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5642 }
5643 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5644 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5645 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5646 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5647 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5648 v->maxMpcComb = MaximumMPCCombine;
5649 }
5650 }
5651
5652 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5653 struct display_mode_lib *mode_lib,
5654 unsigned int PrefetchMode,
5655 double DCFCLK,
5656 double ReturnBW,
5657 double UrgentLatency,
5658 double ExtraLatency,
5659 double SOCCLK,
5660 double DCFCLKDeepSleep,
5661 unsigned int DETBufferSizeY[],
5662 unsigned int DETBufferSizeC[],
5663 unsigned int SwathHeightY[],
5664 unsigned int SwathHeightC[],
5665 double SwathWidthY[],
5666 double SwathWidthC[],
5667 unsigned int DPPPerPlane[],
5668 double BytePerPixelDETY[],
5669 double BytePerPixelDETC[],
5670 bool UnboundedRequestEnabled,
5671 unsigned int CompressedBufferSizeInkByte,
5672 enum clock_change_support *DRAMClockChangeSupport,
5673 double *StutterExitWatermark,
5674 double *StutterEnterPlusExitWatermark,
5675 double *Z8StutterExitWatermark,
5676 double *Z8StutterEnterPlusExitWatermark)
5677 {
5678 struct vba_vars_st *v = &mode_lib->vba;
5679 double EffectiveLBLatencyHidingY;
5680 double EffectiveLBLatencyHidingC;
5681 double LinesInDETY[DC__NUM_DPP__MAX];
5682 double LinesInDETC;
5683 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5684 unsigned int LinesInDETCRoundedDownToSwath;
5685 double FullDETBufferingTimeY;
5686 double FullDETBufferingTimeC;
5687 double ActiveDRAMClockChangeLatencyMarginY;
5688 double ActiveDRAMClockChangeLatencyMarginC;
5689 double WritebackDRAMClockChangeLatencyMargin;
5690 double PlaneWithMinActiveDRAMClockChangeMargin;
5691 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5692 double WritebackDRAMClockChangeLatencyHiding;
5693 double TotalPixelBW = 0.0;
5694 int k, j;
5695
5696 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5697
5698 #ifdef __DML_VBA_DEBUG__
5699 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5700 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5701 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5702 #endif
5703
5704 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5705
5706 #ifdef __DML_VBA_DEBUG__
5707 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5708 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5709 #endif
5710
5711 v->TotalActiveWriteback = 0;
5712 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5713 if (v->WritebackEnable[k] == true) {
5714 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5715 }
5716 }
5717
5718 if (v->TotalActiveWriteback <= 1) {
5719 v->WritebackUrgentWatermark = v->WritebackLatency;
5720 } else {
5721 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5722 }
5723
5724 if (v->TotalActiveWriteback <= 1) {
5725 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5726 } else {
5727 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5728 }
5729
5730 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5731 TotalPixelBW = TotalPixelBW
5732 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5733 / (v->HTotal[k] / v->PixelClock[k]);
5734 }
5735
5736 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5737 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5738
5739 v->LBLatencyHidingSourceLinesY = dml_min(
5740 (double) v->MaxLineBufferLines,
5741 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5742
5743 v->LBLatencyHidingSourceLinesC = dml_min(
5744 (double) v->MaxLineBufferLines,
5745 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5746
5747 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5748
5749 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5750
5751 if (UnboundedRequestEnabled) {
5752 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5753 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5754 }
5755
5756 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5757 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5758 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5759 if (BytePerPixelDETC[k] > 0) {
5760 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5761 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5762 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5763 } else {
5764 LinesInDETC = 0;
5765 FullDETBufferingTimeC = 999999;
5766 }
5767
5768 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5769 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5770
5771 if (v->NumberOfActivePlanes > 1) {
5772 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5773 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5774 }
5775
5776 if (BytePerPixelDETC[k] > 0) {
5777 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5778 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5779
5780 if (v->NumberOfActivePlanes > 1) {
5781 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5782 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5783 }
5784 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5785 } else {
5786 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5787 }
5788
5789 if (v->WritebackEnable[k] == true) {
5790 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5791 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5792 if (v->WritebackPixelFormat[k] == dm_444_64) {
5793 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5794 }
5795 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5796 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5797 }
5798 }
5799
5800 v->MinActiveDRAMClockChangeMargin = 999999;
5801 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5802 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5803 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5804 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5805 if (v->BlendingAndTiming[k] == k) {
5806 PlaneWithMinActiveDRAMClockChangeMargin = k;
5807 } else {
5808 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5809 if (v->BlendingAndTiming[k] == j) {
5810 PlaneWithMinActiveDRAMClockChangeMargin = j;
5811 }
5812 }
5813 }
5814 }
5815 }
5816
5817 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5818
5819 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5820 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5821 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5822 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5823 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5824 }
5825 }
5826
5827 v->TotalNumberOfActiveOTG = 0;
5828
5829 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5830 if (v->BlendingAndTiming[k] == k) {
5831 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5832 }
5833 }
5834
5835 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5836 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5837 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5838 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5839 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5840 } else {
5841 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5842 }
5843
5844 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5845 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5846 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5847 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5848
5849 #ifdef __DML_VBA_DEBUG__
5850 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5851 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5852 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5853 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5854 #endif
5855 }
5856
5857 static void CalculateDCFCLKDeepSleep(
5858 struct display_mode_lib *mode_lib,
5859 unsigned int NumberOfActivePlanes,
5860 int BytePerPixelY[],
5861 int BytePerPixelC[],
5862 double VRatio[],
5863 double VRatioChroma[],
5864 double SwathWidthY[],
5865 double SwathWidthC[],
5866 unsigned int DPPPerPlane[],
5867 double HRatio[],
5868 double HRatioChroma[],
5869 double PixelClock[],
5870 double PSCL_THROUGHPUT[],
5871 double PSCL_THROUGHPUT_CHROMA[],
5872 double DPPCLK[],
5873 double ReadBandwidthLuma[],
5874 double ReadBandwidthChroma[],
5875 int ReturnBusWidth,
5876 double *DCFCLKDeepSleep)
5877 {
5878 struct vba_vars_st *v = &mode_lib->vba;
5879 double DisplayPipeLineDeliveryTimeLuma;
5880 double DisplayPipeLineDeliveryTimeChroma;
5881 double ReadBandwidth = 0.0;
5882 int k;
5883
5884 for (k = 0; k < NumberOfActivePlanes; ++k) {
5885
5886 if (VRatio[k] <= 1) {
5887 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5888 } else {
5889 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5890 }
5891 if (BytePerPixelC[k] == 0) {
5892 DisplayPipeLineDeliveryTimeChroma = 0;
5893 } else {
5894 if (VRatioChroma[k] <= 1) {
5895 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5896 } else {
5897 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5898 }
5899 }
5900
5901 if (BytePerPixelC[k] > 0) {
5902 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5903 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5904 } else {
5905 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5906 }
5907 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5908
5909 }
5910
5911 for (k = 0; k < NumberOfActivePlanes; ++k) {
5912 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5913 }
5914
5915 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5916
5917 for (k = 0; k < NumberOfActivePlanes; ++k) {
5918 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5919 }
5920 }
5921
5922 static void CalculateUrgentBurstFactor(
5923 int swath_width_luma_ub,
5924 int swath_width_chroma_ub,
5925 unsigned int SwathHeightY,
5926 unsigned int SwathHeightC,
5927 double LineTime,
5928 double UrgentLatency,
5929 double CursorBufferSize,
5930 unsigned int CursorWidth,
5931 unsigned int CursorBPP,
5932 double VRatio,
5933 double VRatioC,
5934 double BytePerPixelInDETY,
5935 double BytePerPixelInDETC,
5936 double DETBufferSizeY,
5937 double DETBufferSizeC,
5938 double *UrgentBurstFactorCursor,
5939 double *UrgentBurstFactorLuma,
5940 double *UrgentBurstFactorChroma,
5941 bool *NotEnoughUrgentLatencyHiding)
5942 {
5943 double LinesInDETLuma;
5944 double LinesInDETChroma;
5945 unsigned int LinesInCursorBuffer;
5946 double CursorBufferSizeInTime;
5947 double DETBufferSizeInTimeLuma;
5948 double DETBufferSizeInTimeChroma;
5949
5950 *NotEnoughUrgentLatencyHiding = 0;
5951
5952 if (CursorWidth > 0) {
5953 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5954 if (VRatio > 0) {
5955 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5956 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5957 *NotEnoughUrgentLatencyHiding = 1;
5958 *UrgentBurstFactorCursor = 0;
5959 } else {
5960 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5961 }
5962 } else {
5963 *UrgentBurstFactorCursor = 1;
5964 }
5965 }
5966
5967 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5968 if (VRatio > 0) {
5969 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5970 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5971 *NotEnoughUrgentLatencyHiding = 1;
5972 *UrgentBurstFactorLuma = 0;
5973 } else {
5974 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5975 }
5976 } else {
5977 *UrgentBurstFactorLuma = 1;
5978 }
5979
5980 if (BytePerPixelInDETC > 0) {
5981 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5982 if (VRatio > 0) {
5983 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5984 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5985 *NotEnoughUrgentLatencyHiding = 1;
5986 *UrgentBurstFactorChroma = 0;
5987 } else {
5988 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5989 }
5990 } else {
5991 *UrgentBurstFactorChroma = 1;
5992 }
5993 }
5994 }
5995
5996 static void CalculatePixelDeliveryTimes(
5997 unsigned int NumberOfActivePlanes,
5998 double VRatio[],
5999 double VRatioChroma[],
6000 double VRatioPrefetchY[],
6001 double VRatioPrefetchC[],
6002 unsigned int swath_width_luma_ub[],
6003 unsigned int swath_width_chroma_ub[],
6004 unsigned int DPPPerPlane[],
6005 double HRatio[],
6006 double HRatioChroma[],
6007 double PixelClock[],
6008 double PSCL_THROUGHPUT[],
6009 double PSCL_THROUGHPUT_CHROMA[],
6010 double DPPCLK[],
6011 int BytePerPixelC[],
6012 enum scan_direction_class SourceScan[],
6013 unsigned int NumberOfCursors[],
6014 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6015 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6016 unsigned int BlockWidth256BytesY[],
6017 unsigned int BlockHeight256BytesY[],
6018 unsigned int BlockWidth256BytesC[],
6019 unsigned int BlockHeight256BytesC[],
6020 double DisplayPipeLineDeliveryTimeLuma[],
6021 double DisplayPipeLineDeliveryTimeChroma[],
6022 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6023 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6024 double DisplayPipeRequestDeliveryTimeLuma[],
6025 double DisplayPipeRequestDeliveryTimeChroma[],
6026 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6027 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6028 double CursorRequestDeliveryTime[],
6029 double CursorRequestDeliveryTimePrefetch[])
6030 {
6031 double req_per_swath_ub;
6032 int k;
6033
6034 for (k = 0; k < NumberOfActivePlanes; ++k) {
6035 if (VRatio[k] <= 1) {
6036 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6037 } else {
6038 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6039 }
6040
6041 if (BytePerPixelC[k] == 0) {
6042 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6043 } else {
6044 if (VRatioChroma[k] <= 1) {
6045 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6046 } else {
6047 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6048 }
6049 }
6050
6051 if (VRatioPrefetchY[k] <= 1) {
6052 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6053 } else {
6054 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6055 }
6056
6057 if (BytePerPixelC[k] == 0) {
6058 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6059 } else {
6060 if (VRatioPrefetchC[k] <= 1) {
6061 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6062 } else {
6063 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6064 }
6065 }
6066 }
6067
6068 for (k = 0; k < NumberOfActivePlanes; ++k) {
6069 if (SourceScan[k] != dm_vert) {
6070 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6071 } else {
6072 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6073 }
6074 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6075 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6076 if (BytePerPixelC[k] == 0) {
6077 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6078 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6079 } else {
6080 if (SourceScan[k] != dm_vert) {
6081 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6082 } else {
6083 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6084 }
6085 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6086 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6087 }
6088 #ifdef __DML_VBA_DEBUG__
6089 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6090 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6091 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6092 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6093 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6094 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6095 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6096 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6097 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6098 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6099 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6100 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6101 #endif
6102 }
6103
6104 for (k = 0; k < NumberOfActivePlanes; ++k) {
6105 int cursor_req_per_width;
6106
6107 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6108 if (NumberOfCursors[k] > 0) {
6109 if (VRatio[k] <= 1) {
6110 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6111 } else {
6112 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6113 }
6114 if (VRatioPrefetchY[k] <= 1) {
6115 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6116 } else {
6117 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6118 }
6119 } else {
6120 CursorRequestDeliveryTime[k] = 0;
6121 CursorRequestDeliveryTimePrefetch[k] = 0;
6122 }
6123 #ifdef __DML_VBA_DEBUG__
6124 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6125 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6126 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6127 #endif
6128 }
6129 }
6130
6131 static void CalculateMetaAndPTETimes(
6132 int NumberOfActivePlanes,
6133 bool GPUVMEnable,
6134 int MetaChunkSize,
6135 int MinMetaChunkSizeBytes,
6136 int HTotal[],
6137 double VRatio[],
6138 double VRatioChroma[],
6139 double DestinationLinesToRequestRowInVBlank[],
6140 double DestinationLinesToRequestRowInImmediateFlip[],
6141 bool DCCEnable[],
6142 double PixelClock[],
6143 int BytePerPixelY[],
6144 int BytePerPixelC[],
6145 enum scan_direction_class SourceScan[],
6146 int dpte_row_height[],
6147 int dpte_row_height_chroma[],
6148 int meta_row_width[],
6149 int meta_row_width_chroma[],
6150 int meta_row_height[],
6151 int meta_row_height_chroma[],
6152 int meta_req_width[],
6153 int meta_req_width_chroma[],
6154 int meta_req_height[],
6155 int meta_req_height_chroma[],
6156 int dpte_group_bytes[],
6157 int PTERequestSizeY[],
6158 int PTERequestSizeC[],
6159 int PixelPTEReqWidthY[],
6160 int PixelPTEReqHeightY[],
6161 int PixelPTEReqWidthC[],
6162 int PixelPTEReqHeightC[],
6163 int dpte_row_width_luma_ub[],
6164 int dpte_row_width_chroma_ub[],
6165 double DST_Y_PER_PTE_ROW_NOM_L[],
6166 double DST_Y_PER_PTE_ROW_NOM_C[],
6167 double DST_Y_PER_META_ROW_NOM_L[],
6168 double DST_Y_PER_META_ROW_NOM_C[],
6169 double TimePerMetaChunkNominal[],
6170 double TimePerChromaMetaChunkNominal[],
6171 double TimePerMetaChunkVBlank[],
6172 double TimePerChromaMetaChunkVBlank[],
6173 double TimePerMetaChunkFlip[],
6174 double TimePerChromaMetaChunkFlip[],
6175 double time_per_pte_group_nom_luma[],
6176 double time_per_pte_group_vblank_luma[],
6177 double time_per_pte_group_flip_luma[],
6178 double time_per_pte_group_nom_chroma[],
6179 double time_per_pte_group_vblank_chroma[],
6180 double time_per_pte_group_flip_chroma[])
6181 {
6182 unsigned int meta_chunk_width;
6183 unsigned int min_meta_chunk_width;
6184 unsigned int meta_chunk_per_row_int;
6185 unsigned int meta_row_remainder;
6186 unsigned int meta_chunk_threshold;
6187 unsigned int meta_chunks_per_row_ub;
6188 unsigned int meta_chunk_width_chroma;
6189 unsigned int min_meta_chunk_width_chroma;
6190 unsigned int meta_chunk_per_row_int_chroma;
6191 unsigned int meta_row_remainder_chroma;
6192 unsigned int meta_chunk_threshold_chroma;
6193 unsigned int meta_chunks_per_row_ub_chroma;
6194 unsigned int dpte_group_width_luma;
6195 unsigned int dpte_groups_per_row_luma_ub;
6196 unsigned int dpte_group_width_chroma;
6197 unsigned int dpte_groups_per_row_chroma_ub;
6198 int k;
6199
6200 for (k = 0; k < NumberOfActivePlanes; ++k) {
6201 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6202 if (BytePerPixelC[k] == 0) {
6203 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6204 } else {
6205 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6206 }
6207 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6208 if (BytePerPixelC[k] == 0) {
6209 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6210 } else {
6211 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6212 }
6213 }
6214
6215 for (k = 0; k < NumberOfActivePlanes; ++k) {
6216 if (DCCEnable[k] == true) {
6217 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6218 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6219 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6220 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6221 if (SourceScan[k] != dm_vert) {
6222 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6223 } else {
6224 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6225 }
6226 if (meta_row_remainder <= meta_chunk_threshold) {
6227 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6228 } else {
6229 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6230 }
6231 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6232 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6233 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6234 if (BytePerPixelC[k] == 0) {
6235 TimePerChromaMetaChunkNominal[k] = 0;
6236 TimePerChromaMetaChunkVBlank[k] = 0;
6237 TimePerChromaMetaChunkFlip[k] = 0;
6238 } else {
6239 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6240 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6241 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6242 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6243 if (SourceScan[k] != dm_vert) {
6244 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6245 } else {
6246 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6247 }
6248 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6249 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6250 } else {
6251 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6252 }
6253 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6254 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6255 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6256 }
6257 } else {
6258 TimePerMetaChunkNominal[k] = 0;
6259 TimePerMetaChunkVBlank[k] = 0;
6260 TimePerMetaChunkFlip[k] = 0;
6261 TimePerChromaMetaChunkNominal[k] = 0;
6262 TimePerChromaMetaChunkVBlank[k] = 0;
6263 TimePerChromaMetaChunkFlip[k] = 0;
6264 }
6265 }
6266
6267 for (k = 0; k < NumberOfActivePlanes; ++k) {
6268 if (GPUVMEnable == true) {
6269 if (SourceScan[k] != dm_vert) {
6270 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6271 } else {
6272 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6273 }
6274 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6275 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6276 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6277 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6278 if (BytePerPixelC[k] == 0) {
6279 time_per_pte_group_nom_chroma[k] = 0;
6280 time_per_pte_group_vblank_chroma[k] = 0;
6281 time_per_pte_group_flip_chroma[k] = 0;
6282 } else {
6283 if (SourceScan[k] != dm_vert) {
6284 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6285 } else {
6286 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6287 }
6288 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6289 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6290 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6291 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6292 }
6293 } else {
6294 time_per_pte_group_nom_luma[k] = 0;
6295 time_per_pte_group_vblank_luma[k] = 0;
6296 time_per_pte_group_flip_luma[k] = 0;
6297 time_per_pte_group_nom_chroma[k] = 0;
6298 time_per_pte_group_vblank_chroma[k] = 0;
6299 time_per_pte_group_flip_chroma[k] = 0;
6300 }
6301 }
6302 }
6303
6304 static void CalculateVMGroupAndRequestTimes(
6305 unsigned int NumberOfActivePlanes,
6306 bool GPUVMEnable,
6307 unsigned int GPUVMMaxPageTableLevels,
6308 unsigned int HTotal[],
6309 int BytePerPixelC[],
6310 double DestinationLinesToRequestVMInVBlank[],
6311 double DestinationLinesToRequestVMInImmediateFlip[],
6312 bool DCCEnable[],
6313 double PixelClock[],
6314 int dpte_row_width_luma_ub[],
6315 int dpte_row_width_chroma_ub[],
6316 int vm_group_bytes[],
6317 unsigned int dpde0_bytes_per_frame_ub_l[],
6318 unsigned int dpde0_bytes_per_frame_ub_c[],
6319 int meta_pte_bytes_per_frame_ub_l[],
6320 int meta_pte_bytes_per_frame_ub_c[],
6321 double TimePerVMGroupVBlank[],
6322 double TimePerVMGroupFlip[],
6323 double TimePerVMRequestVBlank[],
6324 double TimePerVMRequestFlip[])
6325 {
6326 int num_group_per_lower_vm_stage;
6327 int num_req_per_lower_vm_stage;
6328 int k;
6329
6330 for (k = 0; k < NumberOfActivePlanes; ++k) {
6331 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6332 if (DCCEnable[k] == false) {
6333 if (BytePerPixelC[k] > 0) {
6334 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6335 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6336 } else {
6337 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6338 }
6339 } else {
6340 if (GPUVMMaxPageTableLevels == 1) {
6341 if (BytePerPixelC[k] > 0) {
6342 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6343 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6344 } else {
6345 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6346 }
6347 } else {
6348 if (BytePerPixelC[k] > 0) {
6349 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6350 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6351 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6352 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6353 } else {
6354 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6355 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6356 }
6357 }
6358 }
6359
6360 if (DCCEnable[k] == false) {
6361 if (BytePerPixelC[k] > 0) {
6362 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6363 } else {
6364 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6365 }
6366 } else {
6367 if (GPUVMMaxPageTableLevels == 1) {
6368 if (BytePerPixelC[k] > 0) {
6369 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6370 } else {
6371 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6372 }
6373 } else {
6374 if (BytePerPixelC[k] > 0) {
6375 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6376 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6377 } else {
6378 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6379 }
6380 }
6381 }
6382
6383 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6384 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6385 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6386 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6387
6388 if (GPUVMMaxPageTableLevels > 2) {
6389 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6390 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6391 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6392 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6393 }
6394
6395 } else {
6396 TimePerVMGroupVBlank[k] = 0;
6397 TimePerVMGroupFlip[k] = 0;
6398 TimePerVMRequestVBlank[k] = 0;
6399 TimePerVMRequestFlip[k] = 0;
6400 }
6401 }
6402 }
6403
6404 static void CalculateStutterEfficiency(
6405 struct display_mode_lib *mode_lib,
6406 int CompressedBufferSizeInkByte,
6407 bool UnboundedRequestEnabled,
6408 int ConfigReturnBufferSizeInKByte,
6409 int MetaFIFOSizeInKEntries,
6410 int ZeroSizeBufferEntries,
6411 int NumberOfActivePlanes,
6412 int ROBBufferSizeInKByte,
6413 double TotalDataReadBandwidth,
6414 double DCFCLK,
6415 double ReturnBW,
6416 double COMPBUF_RESERVED_SPACE_64B,
6417 double COMPBUF_RESERVED_SPACE_ZS,
6418 double SRExitTime,
6419 double SRExitZ8Time,
6420 bool SynchronizedVBlank,
6421 double Z8StutterEnterPlusExitWatermark,
6422 double StutterEnterPlusExitWatermark,
6423 bool ProgressiveToInterlaceUnitInOPP,
6424 bool Interlace[],
6425 double MinTTUVBlank[],
6426 int DPPPerPlane[],
6427 unsigned int DETBufferSizeY[],
6428 int BytePerPixelY[],
6429 double BytePerPixelDETY[],
6430 double SwathWidthY[],
6431 int SwathHeightY[],
6432 int SwathHeightC[],
6433 double NetDCCRateLuma[],
6434 double NetDCCRateChroma[],
6435 double DCCFractionOfZeroSizeRequestsLuma[],
6436 double DCCFractionOfZeroSizeRequestsChroma[],
6437 int HTotal[],
6438 int VTotal[],
6439 double PixelClock[],
6440 double VRatio[],
6441 enum scan_direction_class SourceScan[],
6442 int BlockHeight256BytesY[],
6443 int BlockWidth256BytesY[],
6444 int BlockHeight256BytesC[],
6445 int BlockWidth256BytesC[],
6446 int DCCYMaxUncompressedBlock[],
6447 int DCCCMaxUncompressedBlock[],
6448 int VActive[],
6449 bool DCCEnable[],
6450 bool WritebackEnable[],
6451 double ReadBandwidthPlaneLuma[],
6452 double ReadBandwidthPlaneChroma[],
6453 double meta_row_bw[],
6454 double dpte_row_bw[],
6455 double *StutterEfficiencyNotIncludingVBlank,
6456 double *StutterEfficiency,
6457 int *NumberOfStutterBurstsPerFrame,
6458 double *Z8StutterEfficiencyNotIncludingVBlank,
6459 double *Z8StutterEfficiency,
6460 int *Z8NumberOfStutterBurstsPerFrame,
6461 double *StutterPeriod)
6462 {
6463 struct vba_vars_st *v = &mode_lib->vba;
6464
6465 double DETBufferingTimeY;
6466 double SwathWidthYCriticalPlane = 0;
6467 double VActiveTimeCriticalPlane = 0;
6468 double FrameTimeCriticalPlane = 0;
6469 int BytePerPixelYCriticalPlane = 0;
6470 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6471 double MinTTUVBlankCriticalPlane = 0;
6472 double TotalCompressedReadBandwidth;
6473 double TotalRowReadBandwidth;
6474 double AverageDCCCompressionRate;
6475 double EffectiveCompressedBufferSize;
6476 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6477 double StutterBurstTime;
6478 int TotalActiveWriteback;
6479 double LinesInDETY;
6480 double LinesInDETYRoundedDownToSwath;
6481 double MaximumEffectiveCompressionLuma;
6482 double MaximumEffectiveCompressionChroma;
6483 double TotalZeroSizeRequestReadBandwidth;
6484 double TotalZeroSizeCompressedReadBandwidth;
6485 double AverageDCCZeroSizeFraction;
6486 double AverageZeroSizeCompressionRate;
6487 int TotalNumberOfActiveOTG = 0;
6488 double LastStutterPeriod = 0.0;
6489 double LastZ8StutterPeriod = 0.0;
6490 int k;
6491
6492 TotalZeroSizeRequestReadBandwidth = 0;
6493 TotalZeroSizeCompressedReadBandwidth = 0;
6494 TotalRowReadBandwidth = 0;
6495 TotalCompressedReadBandwidth = 0;
6496
6497 for (k = 0; k < NumberOfActivePlanes; ++k) {
6498 if (DCCEnable[k] == true) {
6499 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6500 || DCCYMaxUncompressedBlock[k] < 256) {
6501 MaximumEffectiveCompressionLuma = 2;
6502 } else {
6503 MaximumEffectiveCompressionLuma = 4;
6504 }
6505 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6506 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6507 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6508 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6509 if (ReadBandwidthPlaneChroma[k] > 0) {
6510 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6511 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6512 MaximumEffectiveCompressionChroma = 2;
6513 } else {
6514 MaximumEffectiveCompressionChroma = 4;
6515 }
6516 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6517 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6518 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6519 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6520 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6521 }
6522 } else {
6523 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6524 }
6525 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6526 }
6527
6528 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6529 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6530
6531 #ifdef __DML_VBA_DEBUG__
6532 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6533 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6534 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6535 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6536 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6537 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6538 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6539 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6540 #endif
6541
6542 if (AverageDCCZeroSizeFraction == 1) {
6543 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6544 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6545 } else if (AverageDCCZeroSizeFraction > 0) {
6546 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6547 EffectiveCompressedBufferSize = dml_min(
6548 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6549 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6550 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6551 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6552 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6553 dml_print(
6554 "DML::%s: min 2 = %f\n",
6555 __func__,
6556 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6557 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6558 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6559 } else {
6560 EffectiveCompressedBufferSize = dml_min(
6561 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6562 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6563 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6564 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6565 }
6566
6567 #ifdef __DML_VBA_DEBUG__
6568 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6569 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6570 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6571 #endif
6572
6573 *StutterPeriod = 0;
6574 for (k = 0; k < NumberOfActivePlanes; ++k) {
6575 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6576 / BytePerPixelDETY[k] / SwathWidthY[k];
6577 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6578 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6579 #ifdef __DML_VBA_DEBUG__
6580 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6581 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6582 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6583 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6584 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6585 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6586 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6587 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6588 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6589 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6590 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6591 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6592 #endif
6593
6594 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6595 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6596
6597 *StutterPeriod = DETBufferingTimeY;
6598 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6599 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6600 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6601 SwathWidthYCriticalPlane = SwathWidthY[k];
6602 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6603 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6604
6605 #ifdef __DML_VBA_DEBUG__
6606 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6607 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6608 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6609 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6610 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6611 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6612 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6613 #endif
6614 }
6615 }
6616
6617 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6618 #ifdef __DML_VBA_DEBUG__
6619 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6620 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6621 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6622 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6623 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6624 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6625 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6626 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6627 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6628 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6629 #endif
6630
6631 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6632 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6633 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6634 #ifdef __DML_VBA_DEBUG__
6635 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6636 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6637 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6638 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6639 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6640 #endif
6641 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6642
6643 dml_print(
6644 "DML::%s: Time to finish residue swath=%f\n",
6645 __func__,
6646 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6647
6648 TotalActiveWriteback = 0;
6649 for (k = 0; k < NumberOfActivePlanes; ++k) {
6650 if (WritebackEnable[k]) {
6651 TotalActiveWriteback = TotalActiveWriteback + 1;
6652 }
6653 }
6654
6655 if (TotalActiveWriteback == 0) {
6656 #ifdef __DML_VBA_DEBUG__
6657 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6658 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6659 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6660 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6661 #endif
6662 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6663 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6664 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6665 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6666 } else {
6667 *StutterEfficiencyNotIncludingVBlank = 0.;
6668 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6669 *NumberOfStutterBurstsPerFrame = 0;
6670 *Z8NumberOfStutterBurstsPerFrame = 0;
6671 }
6672 #ifdef __DML_VBA_DEBUG__
6673 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6674 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6675 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6676 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6677 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6678 #endif
6679
6680 for (k = 0; k < NumberOfActivePlanes; ++k) {
6681 if (v->BlendingAndTiming[k] == k) {
6682 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6683 }
6684 }
6685
6686 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6687 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6688
6689 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6690 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6691 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6692 } else {
6693 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6694 }
6695 } else {
6696 *StutterEfficiency = 0;
6697 }
6698
6699 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6700 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6701 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6702 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6703 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6704 } else {
6705 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6706 }
6707 } else {
6708 *Z8StutterEfficiency = 0.;
6709 }
6710
6711 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6712 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6713 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6714 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6715 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6716 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6717 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6718 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6719 }
6720
6721 static void CalculateSwathAndDETConfiguration(
6722 bool ForceSingleDPP,
6723 int NumberOfActivePlanes,
6724 unsigned int DETBufferSizeInKByte,
6725 double MaximumSwathWidthLuma[],
6726 double MaximumSwathWidthChroma[],
6727 enum scan_direction_class SourceScan[],
6728 enum source_format_class SourcePixelFormat[],
6729 enum dm_swizzle_mode SurfaceTiling[],
6730 int ViewportWidth[],
6731 int ViewportHeight[],
6732 int SurfaceWidthY[],
6733 int SurfaceWidthC[],
6734 int SurfaceHeightY[],
6735 int SurfaceHeightC[],
6736 int Read256BytesBlockHeightY[],
6737 int Read256BytesBlockHeightC[],
6738 int Read256BytesBlockWidthY[],
6739 int Read256BytesBlockWidthC[],
6740 enum odm_combine_mode ODMCombineEnabled[],
6741 int BlendingAndTiming[],
6742 int BytePerPixY[],
6743 int BytePerPixC[],
6744 double BytePerPixDETY[],
6745 double BytePerPixDETC[],
6746 int HActive[],
6747 double HRatio[],
6748 double HRatioChroma[],
6749 int DPPPerPlane[],
6750 int swath_width_luma_ub[],
6751 int swath_width_chroma_ub[],
6752 double SwathWidth[],
6753 double SwathWidthChroma[],
6754 int SwathHeightY[],
6755 int SwathHeightC[],
6756 unsigned int DETBufferSizeY[],
6757 unsigned int DETBufferSizeC[],
6758 bool ViewportSizeSupportPerPlane[],
6759 bool *ViewportSizeSupport)
6760 {
6761 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6762 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6763 int MinimumSwathHeightY;
6764 int MinimumSwathHeightC;
6765 int RoundedUpMaxSwathSizeBytesY;
6766 int RoundedUpMaxSwathSizeBytesC;
6767 int RoundedUpMinSwathSizeBytesY;
6768 int RoundedUpMinSwathSizeBytesC;
6769 int RoundedUpSwathSizeBytesY;
6770 int RoundedUpSwathSizeBytesC;
6771 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6772 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6773 int k;
6774
6775 CalculateSwathWidth(
6776 ForceSingleDPP,
6777 NumberOfActivePlanes,
6778 SourcePixelFormat,
6779 SourceScan,
6780 ViewportWidth,
6781 ViewportHeight,
6782 SurfaceWidthY,
6783 SurfaceWidthC,
6784 SurfaceHeightY,
6785 SurfaceHeightC,
6786 ODMCombineEnabled,
6787 BytePerPixY,
6788 BytePerPixC,
6789 Read256BytesBlockHeightY,
6790 Read256BytesBlockHeightC,
6791 Read256BytesBlockWidthY,
6792 Read256BytesBlockWidthC,
6793 BlendingAndTiming,
6794 HActive,
6795 HRatio,
6796 DPPPerPlane,
6797 SwathWidthSingleDPP,
6798 SwathWidthSingleDPPChroma,
6799 SwathWidth,
6800 SwathWidthChroma,
6801 MaximumSwathHeightY,
6802 MaximumSwathHeightC,
6803 swath_width_luma_ub,
6804 swath_width_chroma_ub);
6805
6806 *ViewportSizeSupport = true;
6807 for (k = 0; k < NumberOfActivePlanes; ++k) {
6808 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6809 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6810 if (SurfaceTiling[k] == dm_sw_linear
6811 || (SourcePixelFormat[k] == dm_444_64
6812 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6813 && SourceScan[k] != dm_vert)) {
6814 MinimumSwathHeightY = MaximumSwathHeightY[k];
6815 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6816 MinimumSwathHeightY = MaximumSwathHeightY[k];
6817 } else {
6818 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6819 }
6820 MinimumSwathHeightC = MaximumSwathHeightC[k];
6821 } else {
6822 if (SurfaceTiling[k] == dm_sw_linear) {
6823 MinimumSwathHeightY = MaximumSwathHeightY[k];
6824 MinimumSwathHeightC = MaximumSwathHeightC[k];
6825 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6826 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6827 MinimumSwathHeightC = MaximumSwathHeightC[k];
6828 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6829 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6830 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6831 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6832 MinimumSwathHeightY = MaximumSwathHeightY[k];
6833 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6834 } else {
6835 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6836 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6837 }
6838 }
6839
6840 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6841 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6842 if (SourcePixelFormat[k] == dm_420_10) {
6843 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6844 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6845 }
6846 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6847 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6848 if (SourcePixelFormat[k] == dm_420_10) {
6849 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6850 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6851 }
6852
6853 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6854 SwathHeightY[k] = MaximumSwathHeightY[k];
6855 SwathHeightC[k] = MaximumSwathHeightC[k];
6856 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6857 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6858 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6859 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6860 SwathHeightY[k] = MinimumSwathHeightY;
6861 SwathHeightC[k] = MaximumSwathHeightC[k];
6862 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6863 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6864 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6865 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6866 SwathHeightY[k] = MaximumSwathHeightY[k];
6867 SwathHeightC[k] = MinimumSwathHeightC;
6868 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6869 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6870 } else {
6871 SwathHeightY[k] = MinimumSwathHeightY;
6872 SwathHeightC[k] = MinimumSwathHeightC;
6873 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6874 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6875 }
6876 {
6877 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6878
6879 if (SwathHeightC[k] == 0) {
6880 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6881 DETBufferSizeC[k] = 0;
6882 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6883 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6884 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6885 } else {
6886 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6887 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6888 }
6889
6890 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6891 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6892 *ViewportSizeSupport = false;
6893 ViewportSizeSupportPerPlane[k] = false;
6894 } else {
6895 ViewportSizeSupportPerPlane[k] = true;
6896 }
6897 }
6898 }
6899 }
6900
6901 static void CalculateSwathWidth(
6902 bool ForceSingleDPP,
6903 int NumberOfActivePlanes,
6904 enum source_format_class SourcePixelFormat[],
6905 enum scan_direction_class SourceScan[],
6906 int ViewportWidth[],
6907 int ViewportHeight[],
6908 int SurfaceWidthY[],
6909 int SurfaceWidthC[],
6910 int SurfaceHeightY[],
6911 int SurfaceHeightC[],
6912 enum odm_combine_mode ODMCombineEnabled[],
6913 int BytePerPixY[],
6914 int BytePerPixC[],
6915 int Read256BytesBlockHeightY[],
6916 int Read256BytesBlockHeightC[],
6917 int Read256BytesBlockWidthY[],
6918 int Read256BytesBlockWidthC[],
6919 int BlendingAndTiming[],
6920 int HActive[],
6921 double HRatio[],
6922 int DPPPerPlane[],
6923 double SwathWidthSingleDPPY[],
6924 double SwathWidthSingleDPPC[],
6925 double SwathWidthY[],
6926 double SwathWidthC[],
6927 int MaximumSwathHeightY[],
6928 int MaximumSwathHeightC[],
6929 int swath_width_luma_ub[],
6930 int swath_width_chroma_ub[])
6931 {
6932 enum odm_combine_mode MainPlaneODMCombine;
6933 int j, k;
6934
6935 #ifdef __DML_VBA_DEBUG__
6936 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6937 #endif
6938
6939 for (k = 0; k < NumberOfActivePlanes; ++k) {
6940 if (SourceScan[k] != dm_vert) {
6941 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6942 } else {
6943 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6944 }
6945
6946 #ifdef __DML_VBA_DEBUG__
6947 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6948 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6949 #endif
6950
6951 MainPlaneODMCombine = ODMCombineEnabled[k];
6952 for (j = 0; j < NumberOfActivePlanes; ++j) {
6953 if (BlendingAndTiming[k] == j) {
6954 MainPlaneODMCombine = ODMCombineEnabled[j];
6955 }
6956 }
6957
6958 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6959 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6960 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6961 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6962 else if (DPPPerPlane[k] == 2)
6963 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6964 else
6965 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6966
6967 #ifdef __DML_VBA_DEBUG__
6968 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6969 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6970 #endif
6971
6972 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6973 SwathWidthC[k] = SwathWidthY[k] / 2;
6974 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6975 } else {
6976 SwathWidthC[k] = SwathWidthY[k];
6977 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6978 }
6979
6980 if (ForceSingleDPP == true) {
6981 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6982 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6983 }
6984 {
6985 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6986 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6987
6988 #ifdef __DML_VBA_DEBUG__
6989 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6990 #endif
6991
6992 if (SourceScan[k] != dm_vert) {
6993 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6994 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6995 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6996 if (BytePerPixC[k] > 0) {
6997 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6998
6999 swath_width_chroma_ub[k] = dml_min(
7000 surface_width_ub_c,
7001 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7002 } else {
7003 swath_width_chroma_ub[k] = 0;
7004 }
7005 } else {
7006 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7007 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7008 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7009 if (BytePerPixC[k] > 0) {
7010 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7011
7012 swath_width_chroma_ub[k] = dml_min(
7013 surface_height_ub_c,
7014 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7015 } else {
7016 swath_width_chroma_ub[k] = 0;
7017 }
7018 }
7019 }
7020 }
7021 }
7022
7023 static double CalculateExtraLatency(
7024 int RoundTripPingLatencyCycles,
7025 int ReorderingBytes,
7026 double DCFCLK,
7027 int TotalNumberOfActiveDPP,
7028 int PixelChunkSizeInKByte,
7029 int TotalNumberOfDCCActiveDPP,
7030 int MetaChunkSize,
7031 double ReturnBW,
7032 bool GPUVMEnable,
7033 bool HostVMEnable,
7034 int NumberOfActivePlanes,
7035 int NumberOfDPP[],
7036 int dpte_group_bytes[],
7037 double HostVMInefficiencyFactor,
7038 double HostVMMinPageSize,
7039 int HostVMMaxNonCachedPageTableLevels)
7040 {
7041 double ExtraLatencyBytes;
7042 double ExtraLatency;
7043
7044 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7045 ReorderingBytes,
7046 TotalNumberOfActiveDPP,
7047 PixelChunkSizeInKByte,
7048 TotalNumberOfDCCActiveDPP,
7049 MetaChunkSize,
7050 GPUVMEnable,
7051 HostVMEnable,
7052 NumberOfActivePlanes,
7053 NumberOfDPP,
7054 dpte_group_bytes,
7055 HostVMInefficiencyFactor,
7056 HostVMMinPageSize,
7057 HostVMMaxNonCachedPageTableLevels);
7058
7059 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7060
7061 #ifdef __DML_VBA_DEBUG__
7062 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7063 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7064 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7065 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7066 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7067 #endif
7068
7069 return ExtraLatency;
7070 }
7071
7072 static double CalculateExtraLatencyBytes(
7073 int ReorderingBytes,
7074 int TotalNumberOfActiveDPP,
7075 int PixelChunkSizeInKByte,
7076 int TotalNumberOfDCCActiveDPP,
7077 int MetaChunkSize,
7078 bool GPUVMEnable,
7079 bool HostVMEnable,
7080 int NumberOfActivePlanes,
7081 int NumberOfDPP[],
7082 int dpte_group_bytes[],
7083 double HostVMInefficiencyFactor,
7084 double HostVMMinPageSize,
7085 int HostVMMaxNonCachedPageTableLevels)
7086 {
7087 double ret;
7088 int HostVMDynamicLevels = 0, k;
7089
7090 if (GPUVMEnable == true && HostVMEnable == true) {
7091 if (HostVMMinPageSize < 2048)
7092 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7093 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7094 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7095 else
7096 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7097 } else {
7098 HostVMDynamicLevels = 0;
7099 }
7100
7101 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7102
7103 if (GPUVMEnable == true) {
7104 for (k = 0; k < NumberOfActivePlanes; ++k)
7105 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7106 }
7107 return ret;
7108 }
7109
7110 static double CalculateUrgentLatency(
7111 double UrgentLatencyPixelDataOnly,
7112 double UrgentLatencyPixelMixedWithVMData,
7113 double UrgentLatencyVMDataOnly,
7114 bool DoUrgentLatencyAdjustment,
7115 double UrgentLatencyAdjustmentFabricClockComponent,
7116 double UrgentLatencyAdjustmentFabricClockReference,
7117 double FabricClock)
7118 {
7119 double ret;
7120
7121 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7122 if (DoUrgentLatencyAdjustment == true)
7123 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7124 return ret;
7125 }
7126
7127 static noinline_for_stack void UseMinimumDCFCLK(
7128 struct display_mode_lib *mode_lib,
7129 int MaxPrefetchMode,
7130 int ReorderingBytes)
7131 {
7132 struct vba_vars_st *v = &mode_lib->vba;
7133 int dummy1, i, j, k;
7134 double NormalEfficiency, dummy2, dummy3;
7135 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7136
7137 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7138 for (i = 0; i < v->soc.num_states; ++i) {
7139 for (j = 0; j <= 1; ++j) {
7140 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7141 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7142 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7143 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7144 double MinimumTWait;
7145 double NonDPTEBandwidth;
7146 double DPTEBandwidth;
7147 double DCFCLKRequiredForAverageBandwidth;
7148 double ExtraLatencyBytes;
7149 double ExtraLatencyCycles;
7150 double DCFCLKRequiredForPeakBandwidth;
7151 int NoOfDPPState[DC__NUM_DPP__MAX];
7152 double MinimumTvmPlus2Tr0;
7153
7154 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7155 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7156 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7157 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7158 }
7159
7160 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7161 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7162
7163 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7164 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7165 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7166 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7167 DCFCLKRequiredForAverageBandwidth = dml_max3(
7168 v->ProjectedDCFCLKDeepSleep[i][j],
7169 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7170 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7171 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7172
7173 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7174 ReorderingBytes,
7175 v->TotalNumberOfActiveDPP[i][j],
7176 v->PixelChunkSizeInKByte,
7177 v->TotalNumberOfDCCActiveDPP[i][j],
7178 v->MetaChunkSize,
7179 v->GPUVMEnable,
7180 v->HostVMEnable,
7181 v->NumberOfActivePlanes,
7182 NoOfDPPState,
7183 v->dpte_group_bytes,
7184 1,
7185 v->HostVMMinPageSize,
7186 v->HostVMMaxNonCachedPageTableLevels);
7187 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7188 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7189 double DCFCLKCyclesRequiredInPrefetch;
7190 double ExpectedPrefetchBWAcceleration;
7191 double PrefetchTime;
7192
7193 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7194 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7195 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7196 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7197 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7198 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7199 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7200 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7201 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7202 DynamicMetadataVMExtraLatency[k] =
7203 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7204 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7205 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7206 - v->UrgLatency[i]
7207 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7208 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7209 - DynamicMetadataVMExtraLatency[k];
7210
7211 if (PrefetchTime > 0) {
7212 double ExpectedVRatioPrefetch;
7213
7214 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7215 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7216 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7217 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7218 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7219 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7220 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7221 }
7222 } else {
7223 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7224 }
7225 if (v->DynamicMetadataEnable[k] == true) {
7226 double TSetupPipe;
7227 double TdmbfPipe;
7228 double TdmsksPipe;
7229 double TdmecPipe;
7230 double AllowedTimeForUrgentExtraLatency;
7231
7232 CalculateVupdateAndDynamicMetadataParameters(
7233 v->MaxInterDCNTileRepeaters,
7234 v->RequiredDPPCLK[i][j][k],
7235 v->RequiredDISPCLK[i][j],
7236 v->ProjectedDCFCLKDeepSleep[i][j],
7237 v->PixelClock[k],
7238 v->HTotal[k],
7239 v->VTotal[k] - v->VActive[k],
7240 v->DynamicMetadataTransmittedBytes[k],
7241 v->DynamicMetadataLinesBeforeActiveRequired[k],
7242 v->Interlace[k],
7243 v->ProgressiveToInterlaceUnitInOPP,
7244 &TSetupPipe,
7245 &TdmbfPipe,
7246 &TdmecPipe,
7247 &TdmsksPipe,
7248 &dummy1,
7249 &dummy2,
7250 &dummy3);
7251 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7252 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7253 if (AllowedTimeForUrgentExtraLatency > 0) {
7254 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7255 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7256 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7257 } else {
7258 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7259 }
7260 }
7261 }
7262 DCFCLKRequiredForPeakBandwidth = 0;
7263 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7264 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7265
7266 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7267 * (v->GPUVMEnable == true ?
7268 (v->HostVMEnable == true ?
7269 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7270 0);
7271 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7272 double MaximumTvmPlus2Tr0PlusTsw;
7273
7274 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7275 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7276 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7277 } else {
7278 DCFCLKRequiredForPeakBandwidth = dml_max3(
7279 DCFCLKRequiredForPeakBandwidth,
7280 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7281 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7282 }
7283 }
7284 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7285 }
7286 }
7287 }
7288
7289 static void CalculateUnboundedRequestAndCompressedBufferSize(
7290 unsigned int DETBufferSizeInKByte,
7291 int ConfigReturnBufferSizeInKByte,
7292 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7293 int TotalActiveDPP,
7294 bool NoChromaPlanes,
7295 int MaxNumDPP,
7296 int CompressedBufferSegmentSizeInkByteFinal,
7297 enum output_encoder_class *Output,
7298 bool *UnboundedRequestEnabled,
7299 int *CompressedBufferSizeInkByte)
7300 {
7301 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7302
7303 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7304 *CompressedBufferSizeInkByte = (
7305 *UnboundedRequestEnabled == true ?
7306 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7307 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7308 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7309
7310 #ifdef __DML_VBA_DEBUG__
7311 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7312 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7313 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7314 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7315 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7316 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7317 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7318 #endif
7319 }
7320
7321 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7322 {
7323 bool ret_val = false;
7324
7325 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7326 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7327 ret_val = false;
7328 return ret_val;
7329 }
7330
7331 static unsigned int CalculateMaxVStartup(
7332 unsigned int VTotal,
7333 unsigned int VActive,
7334 unsigned int VBlankNom,
7335 unsigned int HTotal,
7336 double PixelClock,
7337 bool ProgressiveTointerlaceUnitinOPP,
7338 bool Interlace,
7339 unsigned int VBlankNomDefaultUS,
7340 double WritebackDelayTime)
7341 {
7342 unsigned int MaxVStartup = 0;
7343 unsigned int vblank_size = 0;
7344 double line_time_us = HTotal / PixelClock;
7345 unsigned int vblank_actual = VTotal - VActive;
7346 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7347 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7348 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7349
7350 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7351 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7352 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7353 else
7354 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7355 if (MaxVStartup > 1023)
7356 MaxVStartup = 1023;
7357 return MaxVStartup;
7358 }
7359