1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "../dcn30/display_mode_vba_30.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
46 #define DCN3_15_MAX_DET_SIZE 384
47
48 // For DML-C changes that hasn't been propagated to VBA yet
49 //#define __DML_VBA_ALLOW_DELTA__
50
51 // Move these to ip paramaters/constant
52
53 // At which vstartup the DML start to try if the mode can be supported
54 #define __DML_VBA_MIN_VSTARTUP__ 9
55
56 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
57 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
58
59 // fudge factor for min dcfclk calclation
60 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
61
62 typedef struct {
63 double DPPCLK;
64 double DISPCLK;
65 double PixelClock;
66 double DCFCLKDeepSleep;
67 unsigned int DPPPerPlane;
68 bool ScalerEnabled;
69 double VRatio;
70 double VRatioChroma;
71 enum scan_direction_class SourceScan;
72 unsigned int BlockWidth256BytesY;
73 unsigned int BlockHeight256BytesY;
74 unsigned int BlockWidth256BytesC;
75 unsigned int BlockHeight256BytesC;
76 unsigned int InterlaceEnable;
77 unsigned int NumberOfCursors;
78 unsigned int VBlank;
79 unsigned int HTotal;
80 unsigned int DCCEnable;
81 bool ODMCombineIsEnabled;
82 enum source_format_class SourcePixelFormat;
83 int BytePerPixelY;
84 int BytePerPixelC;
85 bool ProgressiveToInterlaceUnitInOPP;
86 } Pipe;
87
88 #define BPP_INVALID 0
89 #define BPP_BLENDED_PIPE 0xffffffff
90
91 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
92 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
93 static unsigned int dscceComputeDelay(
94 unsigned int bpc,
95 double BPP,
96 unsigned int sliceWidth,
97 unsigned int numSlices,
98 enum output_format_class pixelFormat,
99 enum output_encoder_class Output);
100 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
101 static bool CalculatePrefetchSchedule(
102 struct display_mode_lib *mode_lib,
103 double HostVMInefficiencyFactor,
104 Pipe *myPipe,
105 unsigned int DSCDelay,
106 double DPPCLKDelaySubtotalPlusCNVCFormater,
107 double DPPCLKDelaySCL,
108 double DPPCLKDelaySCLLBOnly,
109 double DPPCLKDelayCNVCCursor,
110 double DISPCLKDelaySubtotal,
111 unsigned int DPP_RECOUT_WIDTH,
112 enum output_format_class OutputFormat,
113 unsigned int MaxInterDCNTileRepeaters,
114 unsigned int VStartup,
115 unsigned int MaxVStartup,
116 unsigned int GPUVMPageTableLevels,
117 bool GPUVMEnable,
118 bool HostVMEnable,
119 unsigned int HostVMMaxNonCachedPageTableLevels,
120 double HostVMMinPageSize,
121 bool DynamicMetadataEnable,
122 bool DynamicMetadataVMEnabled,
123 int DynamicMetadataLinesBeforeActiveRequired,
124 unsigned int DynamicMetadataTransmittedBytes,
125 double UrgentLatency,
126 double UrgentExtraLatency,
127 double TCalc,
128 unsigned int PDEAndMetaPTEBytesFrame,
129 unsigned int MetaRowByte,
130 unsigned int PixelPTEBytesPerRow,
131 double PrefetchSourceLinesY,
132 unsigned int SwathWidthY,
133 double VInitPreFillY,
134 unsigned int MaxNumSwathY,
135 double PrefetchSourceLinesC,
136 unsigned int SwathWidthC,
137 double VInitPreFillC,
138 unsigned int MaxNumSwathC,
139 int swath_width_luma_ub,
140 int swath_width_chroma_ub,
141 unsigned int SwathHeightY,
142 unsigned int SwathHeightC,
143 double TWait,
144 double *DSTXAfterScaler,
145 double *DSTYAfterScaler,
146 double *DestinationLinesForPrefetch,
147 double *PrefetchBandwidth,
148 double *DestinationLinesToRequestVMInVBlank,
149 double *DestinationLinesToRequestRowInVBlank,
150 double *VRatioPrefetchY,
151 double *VRatioPrefetchC,
152 double *RequiredPrefetchPixDataBWLuma,
153 double *RequiredPrefetchPixDataBWChroma,
154 bool *NotEnoughTimeForDynamicMetadata,
155 double *Tno_bw,
156 double *prefetch_vmrow_bw,
157 double *Tdmdl_vm,
158 double *Tdmdl,
159 double *TSetup,
160 int *VUpdateOffsetPix,
161 double *VUpdateWidthPix,
162 double *VReadyOffsetPix);
163 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
164 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
165 static void CalculateDCCConfiguration(
166 bool DCCEnabled,
167 bool DCCProgrammingAssumesScanDirectionUnknown,
168 enum source_format_class SourcePixelFormat,
169 unsigned int SurfaceWidthLuma,
170 unsigned int SurfaceWidthChroma,
171 unsigned int SurfaceHeightLuma,
172 unsigned int SurfaceHeightChroma,
173 double DETBufferSize,
174 unsigned int RequestHeight256ByteLuma,
175 unsigned int RequestHeight256ByteChroma,
176 enum dm_swizzle_mode TilingFormat,
177 unsigned int BytePerPixelY,
178 unsigned int BytePerPixelC,
179 double BytePerPixelDETY,
180 double BytePerPixelDETC,
181 enum scan_direction_class ScanOrientation,
182 unsigned int *MaxUncompressedBlockLuma,
183 unsigned int *MaxUncompressedBlockChroma,
184 unsigned int *MaxCompressedBlockLuma,
185 unsigned int *MaxCompressedBlockChroma,
186 unsigned int *IndependentBlockLuma,
187 unsigned int *IndependentBlockChroma);
188 static double CalculatePrefetchSourceLines(
189 struct display_mode_lib *mode_lib,
190 double VRatio,
191 double vtaps,
192 bool Interlace,
193 bool ProgressiveToInterlaceUnitInOPP,
194 unsigned int SwathHeight,
195 unsigned int ViewportYStart,
196 double *VInitPreFill,
197 unsigned int *MaxNumSwath);
198 static unsigned int CalculateVMAndRowBytes(
199 struct display_mode_lib *mode_lib,
200 bool DCCEnable,
201 unsigned int BlockHeight256Bytes,
202 unsigned int BlockWidth256Bytes,
203 enum source_format_class SourcePixelFormat,
204 unsigned int SurfaceTiling,
205 unsigned int BytePerPixel,
206 enum scan_direction_class ScanDirection,
207 unsigned int SwathWidth,
208 unsigned int ViewportHeight,
209 bool GPUVMEnable,
210 bool HostVMEnable,
211 unsigned int HostVMMaxNonCachedPageTableLevels,
212 unsigned int GPUVMMinPageSize,
213 unsigned int HostVMMinPageSize,
214 unsigned int PTEBufferSizeInRequests,
215 unsigned int Pitch,
216 unsigned int DCCMetaPitch,
217 unsigned int *MacroTileWidth,
218 unsigned int *MetaRowByte,
219 unsigned int *PixelPTEBytesPerRow,
220 bool *PTEBufferSizeNotExceeded,
221 int *dpte_row_width_ub,
222 unsigned int *dpte_row_height,
223 unsigned int *MetaRequestWidth,
224 unsigned int *MetaRequestHeight,
225 unsigned int *meta_row_width,
226 unsigned int *meta_row_height,
227 int *vm_group_bytes,
228 unsigned int *dpte_group_bytes,
229 unsigned int *PixelPTEReqWidth,
230 unsigned int *PixelPTEReqHeight,
231 unsigned int *PTERequestSize,
232 int *DPDE0BytesFrame,
233 int *MetaPTEBytesFrame);
234 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
235 static void CalculateRowBandwidth(
236 bool GPUVMEnable,
237 enum source_format_class SourcePixelFormat,
238 double VRatio,
239 double VRatioChroma,
240 bool DCCEnable,
241 double LineTime,
242 unsigned int MetaRowByteLuma,
243 unsigned int MetaRowByteChroma,
244 unsigned int meta_row_height_luma,
245 unsigned int meta_row_height_chroma,
246 unsigned int PixelPTEBytesPerRowLuma,
247 unsigned int PixelPTEBytesPerRowChroma,
248 unsigned int dpte_row_height_luma,
249 unsigned int dpte_row_height_chroma,
250 double *meta_row_bw,
251 double *dpte_row_bw);
252
253 static void CalculateFlipSchedule(
254 struct display_mode_lib *mode_lib,
255 unsigned int k,
256 double HostVMInefficiencyFactor,
257 double UrgentExtraLatency,
258 double UrgentLatency,
259 double PDEAndMetaPTEBytesPerFrame,
260 double MetaRowBytes,
261 double DPTEBytesPerRow);
262 static double CalculateWriteBackDelay(
263 enum source_format_class WritebackPixelFormat,
264 double WritebackHRatio,
265 double WritebackVRatio,
266 unsigned int WritebackVTaps,
267 int WritebackDestinationWidth,
268 int WritebackDestinationHeight,
269 int WritebackSourceHeight,
270 unsigned int HTotal);
271
272 static void CalculateVupdateAndDynamicMetadataParameters(
273 int MaxInterDCNTileRepeaters,
274 double DPPCLK,
275 double DISPCLK,
276 double DCFClkDeepSleep,
277 double PixelClock,
278 int HTotal,
279 int VBlank,
280 int DynamicMetadataTransmittedBytes,
281 int DynamicMetadataLinesBeforeActiveRequired,
282 int InterlaceEnable,
283 bool ProgressiveToInterlaceUnitInOPP,
284 double *TSetup,
285 double *Tdmbf,
286 double *Tdmec,
287 double *Tdmsks,
288 int *VUpdateOffsetPix,
289 double *VUpdateWidthPix,
290 double *VReadyOffsetPix);
291
292 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
293 struct display_mode_lib *mode_lib,
294 unsigned int PrefetchMode,
295 double DCFCLK,
296 double ReturnBW,
297 double UrgentLatency,
298 double ExtraLatency,
299 double SOCCLK,
300 double DCFCLKDeepSleep,
301 unsigned int DETBufferSizeY[],
302 unsigned int DETBufferSizeC[],
303 unsigned int SwathHeightY[],
304 unsigned int SwathHeightC[],
305 double SwathWidthY[],
306 double SwathWidthC[],
307 unsigned int DPPPerPlane[],
308 double BytePerPixelDETY[],
309 double BytePerPixelDETC[],
310 bool UnboundedRequestEnabled,
311 int unsigned CompressedBufferSizeInkByte,
312 enum clock_change_support *DRAMClockChangeSupport,
313 double *StutterExitWatermark,
314 double *StutterEnterPlusExitWatermark,
315 double *Z8StutterExitWatermark,
316 double *Z8StutterEnterPlusExitWatermark);
317
318 static void CalculateDCFCLKDeepSleep(
319 struct display_mode_lib *mode_lib,
320 unsigned int NumberOfActivePlanes,
321 int BytePerPixelY[],
322 int BytePerPixelC[],
323 double VRatio[],
324 double VRatioChroma[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 unsigned int DPPPerPlane[],
328 double HRatio[],
329 double HRatioChroma[],
330 double PixelClock[],
331 double PSCL_THROUGHPUT[],
332 double PSCL_THROUGHPUT_CHROMA[],
333 double DPPCLK[],
334 double ReadBandwidthLuma[],
335 double ReadBandwidthChroma[],
336 int ReturnBusWidth,
337 double *DCFCLKDeepSleep);
338
339 static void CalculateUrgentBurstFactor(
340 int swath_width_luma_ub,
341 int swath_width_chroma_ub,
342 unsigned int SwathHeightY,
343 unsigned int SwathHeightC,
344 double LineTime,
345 double UrgentLatency,
346 double CursorBufferSize,
347 unsigned int CursorWidth,
348 unsigned int CursorBPP,
349 double VRatio,
350 double VRatioC,
351 double BytePerPixelInDETY,
352 double BytePerPixelInDETC,
353 double DETBufferSizeY,
354 double DETBufferSizeC,
355 double *UrgentBurstFactorCursor,
356 double *UrgentBurstFactorLuma,
357 double *UrgentBurstFactorChroma,
358 bool *NotEnoughUrgentLatencyHiding);
359
360 static void UseMinimumDCFCLK(
361 struct display_mode_lib *mode_lib,
362 int MaxPrefetchMode,
363 int ReorderingBytes);
364
365 static void CalculatePixelDeliveryTimes(
366 unsigned int NumberOfActivePlanes,
367 double VRatio[],
368 double VRatioChroma[],
369 double VRatioPrefetchY[],
370 double VRatioPrefetchC[],
371 unsigned int swath_width_luma_ub[],
372 unsigned int swath_width_chroma_ub[],
373 unsigned int DPPPerPlane[],
374 double HRatio[],
375 double HRatioChroma[],
376 double PixelClock[],
377 double PSCL_THROUGHPUT[],
378 double PSCL_THROUGHPUT_CHROMA[],
379 double DPPCLK[],
380 int BytePerPixelC[],
381 enum scan_direction_class SourceScan[],
382 unsigned int NumberOfCursors[],
383 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
384 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
385 unsigned int BlockWidth256BytesY[],
386 unsigned int BlockHeight256BytesY[],
387 unsigned int BlockWidth256BytesC[],
388 unsigned int BlockHeight256BytesC[],
389 double DisplayPipeLineDeliveryTimeLuma[],
390 double DisplayPipeLineDeliveryTimeChroma[],
391 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
392 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
393 double DisplayPipeRequestDeliveryTimeLuma[],
394 double DisplayPipeRequestDeliveryTimeChroma[],
395 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
396 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
397 double CursorRequestDeliveryTime[],
398 double CursorRequestDeliveryTimePrefetch[]);
399
400 static void CalculateMetaAndPTETimes(
401 int NumberOfActivePlanes,
402 bool GPUVMEnable,
403 int MetaChunkSize,
404 int MinMetaChunkSizeBytes,
405 int HTotal[],
406 double VRatio[],
407 double VRatioChroma[],
408 double DestinationLinesToRequestRowInVBlank[],
409 double DestinationLinesToRequestRowInImmediateFlip[],
410 bool DCCEnable[],
411 double PixelClock[],
412 int BytePerPixelY[],
413 int BytePerPixelC[],
414 enum scan_direction_class SourceScan[],
415 int dpte_row_height[],
416 int dpte_row_height_chroma[],
417 int meta_row_width[],
418 int meta_row_width_chroma[],
419 int meta_row_height[],
420 int meta_row_height_chroma[],
421 int meta_req_width[],
422 int meta_req_width_chroma[],
423 int meta_req_height[],
424 int meta_req_height_chroma[],
425 int dpte_group_bytes[],
426 int PTERequestSizeY[],
427 int PTERequestSizeC[],
428 int PixelPTEReqWidthY[],
429 int PixelPTEReqHeightY[],
430 int PixelPTEReqWidthC[],
431 int PixelPTEReqHeightC[],
432 int dpte_row_width_luma_ub[],
433 int dpte_row_width_chroma_ub[],
434 double DST_Y_PER_PTE_ROW_NOM_L[],
435 double DST_Y_PER_PTE_ROW_NOM_C[],
436 double DST_Y_PER_META_ROW_NOM_L[],
437 double DST_Y_PER_META_ROW_NOM_C[],
438 double TimePerMetaChunkNominal[],
439 double TimePerChromaMetaChunkNominal[],
440 double TimePerMetaChunkVBlank[],
441 double TimePerChromaMetaChunkVBlank[],
442 double TimePerMetaChunkFlip[],
443 double TimePerChromaMetaChunkFlip[],
444 double time_per_pte_group_nom_luma[],
445 double time_per_pte_group_vblank_luma[],
446 double time_per_pte_group_flip_luma[],
447 double time_per_pte_group_nom_chroma[],
448 double time_per_pte_group_vblank_chroma[],
449 double time_per_pte_group_flip_chroma[]);
450
451 static void CalculateVMGroupAndRequestTimes(
452 unsigned int NumberOfActivePlanes,
453 bool GPUVMEnable,
454 unsigned int GPUVMMaxPageTableLevels,
455 unsigned int HTotal[],
456 int BytePerPixelC[],
457 double DestinationLinesToRequestVMInVBlank[],
458 double DestinationLinesToRequestVMInImmediateFlip[],
459 bool DCCEnable[],
460 double PixelClock[],
461 int dpte_row_width_luma_ub[],
462 int dpte_row_width_chroma_ub[],
463 int vm_group_bytes[],
464 unsigned int dpde0_bytes_per_frame_ub_l[],
465 unsigned int dpde0_bytes_per_frame_ub_c[],
466 int meta_pte_bytes_per_frame_ub_l[],
467 int meta_pte_bytes_per_frame_ub_c[],
468 double TimePerVMGroupVBlank[],
469 double TimePerVMGroupFlip[],
470 double TimePerVMRequestVBlank[],
471 double TimePerVMRequestFlip[]);
472
473 static void CalculateStutterEfficiency(
474 struct display_mode_lib *mode_lib,
475 int CompressedBufferSizeInkByte,
476 bool UnboundedRequestEnabled,
477 int ConfigReturnBufferSizeInKByte,
478 int MetaFIFOSizeInKEntries,
479 int ZeroSizeBufferEntries,
480 int NumberOfActivePlanes,
481 int ROBBufferSizeInKByte,
482 double TotalDataReadBandwidth,
483 double DCFCLK,
484 double ReturnBW,
485 double COMPBUF_RESERVED_SPACE_64B,
486 double COMPBUF_RESERVED_SPACE_ZS,
487 double SRExitTime,
488 double SRExitZ8Time,
489 bool SynchronizedVBlank,
490 double Z8StutterEnterPlusExitWatermark,
491 double StutterEnterPlusExitWatermark,
492 bool ProgressiveToInterlaceUnitInOPP,
493 bool Interlace[],
494 double MinTTUVBlank[],
495 int DPPPerPlane[],
496 unsigned int DETBufferSizeY[],
497 int BytePerPixelY[],
498 double BytePerPixelDETY[],
499 double SwathWidthY[],
500 int SwathHeightY[],
501 int SwathHeightC[],
502 double NetDCCRateLuma[],
503 double NetDCCRateChroma[],
504 double DCCFractionOfZeroSizeRequestsLuma[],
505 double DCCFractionOfZeroSizeRequestsChroma[],
506 int HTotal[],
507 int VTotal[],
508 double PixelClock[],
509 double VRatio[],
510 enum scan_direction_class SourceScan[],
511 int BlockHeight256BytesY[],
512 int BlockWidth256BytesY[],
513 int BlockHeight256BytesC[],
514 int BlockWidth256BytesC[],
515 int DCCYMaxUncompressedBlock[],
516 int DCCCMaxUncompressedBlock[],
517 int VActive[],
518 bool DCCEnable[],
519 bool WritebackEnable[],
520 double ReadBandwidthPlaneLuma[],
521 double ReadBandwidthPlaneChroma[],
522 double meta_row_bw[],
523 double dpte_row_bw[],
524 double *StutterEfficiencyNotIncludingVBlank,
525 double *StutterEfficiency,
526 int *NumberOfStutterBurstsPerFrame,
527 double *Z8StutterEfficiencyNotIncludingVBlank,
528 double *Z8StutterEfficiency,
529 int *Z8NumberOfStutterBurstsPerFrame,
530 double *StutterPeriod);
531
532 static void CalculateSwathAndDETConfiguration(
533 bool ForceSingleDPP,
534 int NumberOfActivePlanes,
535 bool DETSharedByAllDPP,
536 unsigned int DETBufferSizeInKByte[],
537 double MaximumSwathWidthLuma[],
538 double MaximumSwathWidthChroma[],
539 enum scan_direction_class SourceScan[],
540 enum source_format_class SourcePixelFormat[],
541 enum dm_swizzle_mode SurfaceTiling[],
542 int ViewportWidth[],
543 int ViewportHeight[],
544 int SurfaceWidthY[],
545 int SurfaceWidthC[],
546 int SurfaceHeightY[],
547 int SurfaceHeightC[],
548 int Read256BytesBlockHeightY[],
549 int Read256BytesBlockHeightC[],
550 int Read256BytesBlockWidthY[],
551 int Read256BytesBlockWidthC[],
552 enum odm_combine_mode ODMCombineEnabled[],
553 int BlendingAndTiming[],
554 int BytePerPixY[],
555 int BytePerPixC[],
556 double BytePerPixDETY[],
557 double BytePerPixDETC[],
558 int HActive[],
559 double HRatio[],
560 double HRatioChroma[],
561 int DPPPerPlane[],
562 int swath_width_luma_ub[],
563 int swath_width_chroma_ub[],
564 double SwathWidth[],
565 double SwathWidthChroma[],
566 int SwathHeightY[],
567 int SwathHeightC[],
568 unsigned int DETBufferSizeY[],
569 unsigned int DETBufferSizeC[],
570 bool ViewportSizeSupportPerPlane[],
571 bool *ViewportSizeSupport);
572 static void CalculateSwathWidth(
573 bool ForceSingleDPP,
574 int NumberOfActivePlanes,
575 enum source_format_class SourcePixelFormat[],
576 enum scan_direction_class SourceScan[],
577 int ViewportWidth[],
578 int ViewportHeight[],
579 int SurfaceWidthY[],
580 int SurfaceWidthC[],
581 int SurfaceHeightY[],
582 int SurfaceHeightC[],
583 enum odm_combine_mode ODMCombineEnabled[],
584 int BytePerPixY[],
585 int BytePerPixC[],
586 int Read256BytesBlockHeightY[],
587 int Read256BytesBlockHeightC[],
588 int Read256BytesBlockWidthY[],
589 int Read256BytesBlockWidthC[],
590 int BlendingAndTiming[],
591 int HActive[],
592 double HRatio[],
593 int DPPPerPlane[],
594 double SwathWidthSingleDPPY[],
595 double SwathWidthSingleDPPC[],
596 double SwathWidthY[],
597 double SwathWidthC[],
598 int MaximumSwathHeightY[],
599 int MaximumSwathHeightC[],
600 int swath_width_luma_ub[],
601 int swath_width_chroma_ub[]);
602
603 static double CalculateExtraLatency(
604 int RoundTripPingLatencyCycles,
605 int ReorderingBytes,
606 double DCFCLK,
607 int TotalNumberOfActiveDPP,
608 int PixelChunkSizeInKByte,
609 int TotalNumberOfDCCActiveDPP,
610 int MetaChunkSize,
611 double ReturnBW,
612 bool GPUVMEnable,
613 bool HostVMEnable,
614 int NumberOfActivePlanes,
615 int NumberOfDPP[],
616 int dpte_group_bytes[],
617 double HostVMInefficiencyFactor,
618 double HostVMMinPageSize,
619 int HostVMMaxNonCachedPageTableLevels);
620
621 static double CalculateExtraLatencyBytes(
622 int ReorderingBytes,
623 int TotalNumberOfActiveDPP,
624 int PixelChunkSizeInKByte,
625 int TotalNumberOfDCCActiveDPP,
626 int MetaChunkSize,
627 bool GPUVMEnable,
628 bool HostVMEnable,
629 int NumberOfActivePlanes,
630 int NumberOfDPP[],
631 int dpte_group_bytes[],
632 double HostVMInefficiencyFactor,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635
636 static double CalculateUrgentLatency(
637 double UrgentLatencyPixelDataOnly,
638 double UrgentLatencyPixelMixedWithVMData,
639 double UrgentLatencyVMDataOnly,
640 bool DoUrgentLatencyAdjustment,
641 double UrgentLatencyAdjustmentFabricClockComponent,
642 double UrgentLatencyAdjustmentFabricClockReference,
643 double FabricClockSingle);
644
645 static void CalculateUnboundedRequestAndCompressedBufferSize(
646 unsigned int DETBufferSizeInKByte,
647 int ConfigReturnBufferSizeInKByte,
648 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
649 int TotalActiveDPP,
650 bool NoChromaPlanes,
651 int MaxNumDPP,
652 int CompressedBufferSegmentSizeInkByteFinal,
653 enum output_encoder_class *Output,
654 bool *UnboundedRequestEnabled,
655 int *CompressedBufferSizeInkByte);
656
657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
658
dml31_recalculate(struct display_mode_lib * mode_lib)659 void dml31_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 #ifdef __DML_VBA_DEBUG__
665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
666 #endif
667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
668 }
669
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)670 static unsigned int dscceComputeDelay(
671 unsigned int bpc,
672 double BPP,
673 unsigned int sliceWidth,
674 unsigned int numSlices,
675 enum output_format_class pixelFormat,
676 enum output_encoder_class Output)
677 {
678 // valid bpc = source bits per component in the set of {8, 10, 12}
679 // valid bpp = increments of 1/16 of a bit
680 // min = 6/7/8 in N420/N422/444, respectively
681 // max = such that compression is 1:1
682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
685
686 // fixed value
687 unsigned int rcModelSize = 8192;
688
689 // N422/N420 operate at 2 pixels per clock
690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
691
692 if (pixelFormat == dm_420)
693 pixelsPerClock = 2;
694 else if (pixelFormat == dm_444)
695 pixelsPerClock = 1;
696 else if (pixelFormat == dm_n422)
697 pixelsPerClock = 2;
698 // #all other modes operate at 1 pixel per clock
699 else
700 pixelsPerClock = 1;
701
702 //initial transmit delay as per PPS
703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
704
705 //compute ssm delay
706 if (bpc == 8)
707 D = 81;
708 else if (bpc == 10)
709 D = 89;
710 else
711 D = 113;
712
713 //divide by pixel per cycle to compute slice width as seen by DSC
714 w = sliceWidth / pixelsPerClock;
715
716 //422 mode has an additional cycle of delay
717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
718 s = 0;
719 else
720 s = 1;
721
722 //main calculation for the dscce
723 ix = initalXmitDelay + 45;
724 wx = (w + 2) / 3;
725 P = 3 * wx - w;
726 l0 = ix / w;
727 a = ix + P * l0;
728 ax = (a + 2) / 3 + D + 6 + 1;
729 L = (ax + wx - 1) / wx;
730 if ((ix % w) == 0 && P != 0)
731 lstall = 1;
732 else
733 lstall = 0;
734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735
736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
737 pixels = Delay * 3 * pixelsPerClock;
738 return pixels;
739 }
740
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 {
743 unsigned int Delay = 0;
744
745 if (pixelFormat == dm_420) {
746 // sfr
747 Delay = Delay + 2;
748 // dsccif
749 Delay = Delay + 0;
750 // dscc - input deserializer
751 Delay = Delay + 3;
752 // dscc gets pixels every other cycle
753 Delay = Delay + 2;
754 // dscc - input cdc fifo
755 Delay = Delay + 12;
756 // dscc gets pixels every other cycle
757 Delay = Delay + 13;
758 // dscc - cdc uncertainty
759 Delay = Delay + 2;
760 // dscc - output cdc fifo
761 Delay = Delay + 7;
762 // dscc gets pixels every other cycle
763 Delay = Delay + 3;
764 // dscc - cdc uncertainty
765 Delay = Delay + 2;
766 // dscc - output serializer
767 Delay = Delay + 1;
768 // sft
769 Delay = Delay + 1;
770 } else if (pixelFormat == dm_n422) {
771 // sfr
772 Delay = Delay + 2;
773 // dsccif
774 Delay = Delay + 1;
775 // dscc - input deserializer
776 Delay = Delay + 5;
777 // dscc - input cdc fifo
778 Delay = Delay + 25;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 10;
783 // dscc - cdc uncertainty
784 Delay = Delay + 2;
785 // dscc - output serializer
786 Delay = Delay + 1;
787 // sft
788 Delay = Delay + 1;
789 } else {
790 // sfr
791 Delay = Delay + 2;
792 // dsccif
793 Delay = Delay + 0;
794 // dscc - input deserializer
795 Delay = Delay + 3;
796 // dscc - input cdc fifo
797 Delay = Delay + 12;
798 // dscc - cdc uncertainty
799 Delay = Delay + 2;
800 // dscc - output cdc fifo
801 Delay = Delay + 7;
802 // dscc - output serializer
803 Delay = Delay + 1;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // sft
807 Delay = Delay + 1;
808 }
809
810 return Delay;
811 }
812
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)813 static bool CalculatePrefetchSchedule(
814 struct display_mode_lib *mode_lib,
815 double HostVMInefficiencyFactor,
816 Pipe *myPipe,
817 unsigned int DSCDelay,
818 double DPPCLKDelaySubtotalPlusCNVCFormater,
819 double DPPCLKDelaySCL,
820 double DPPCLKDelaySCLLBOnly,
821 double DPPCLKDelayCNVCCursor,
822 double DISPCLKDelaySubtotal,
823 unsigned int DPP_RECOUT_WIDTH,
824 enum output_format_class OutputFormat,
825 unsigned int MaxInterDCNTileRepeaters,
826 unsigned int VStartup,
827 unsigned int MaxVStartup,
828 unsigned int GPUVMPageTableLevels,
829 bool GPUVMEnable,
830 bool HostVMEnable,
831 unsigned int HostVMMaxNonCachedPageTableLevels,
832 double HostVMMinPageSize,
833 bool DynamicMetadataEnable,
834 bool DynamicMetadataVMEnabled,
835 int DynamicMetadataLinesBeforeActiveRequired,
836 unsigned int DynamicMetadataTransmittedBytes,
837 double UrgentLatency,
838 double UrgentExtraLatency,
839 double TCalc,
840 unsigned int PDEAndMetaPTEBytesFrame,
841 unsigned int MetaRowByte,
842 unsigned int PixelPTEBytesPerRow,
843 double PrefetchSourceLinesY,
844 unsigned int SwathWidthY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 double VInitPreFillC,
850 unsigned int MaxNumSwathC,
851 int swath_width_luma_ub,
852 int swath_width_chroma_ub,
853 unsigned int SwathHeightY,
854 unsigned int SwathHeightC,
855 double TWait,
856 double *DSTXAfterScaler,
857 double *DSTYAfterScaler,
858 double *DestinationLinesForPrefetch,
859 double *PrefetchBandwidth,
860 double *DestinationLinesToRequestVMInVBlank,
861 double *DestinationLinesToRequestRowInVBlank,
862 double *VRatioPrefetchY,
863 double *VRatioPrefetchC,
864 double *RequiredPrefetchPixDataBWLuma,
865 double *RequiredPrefetchPixDataBWChroma,
866 bool *NotEnoughTimeForDynamicMetadata,
867 double *Tno_bw,
868 double *prefetch_vmrow_bw,
869 double *Tdmdl_vm,
870 double *Tdmdl,
871 double *TSetup,
872 int *VUpdateOffsetPix,
873 double *VUpdateWidthPix,
874 double *VReadyOffsetPix)
875 {
876 bool MyError = false;
877 unsigned int DPPCycles, DISPCLKCycles;
878 double DSTTotalPixelsAfterScaler;
879 double LineTime;
880 double dst_y_prefetch_equ;
881 #ifdef __DML_VBA_DEBUG__
882 double Tsw_oto;
883 #endif
884 double prefetch_bw_oto;
885 double prefetch_bw_pr;
886 double Tvm_oto;
887 double Tr0_oto;
888 double Tvm_oto_lines;
889 double Tr0_oto_lines;
890 double dst_y_prefetch_oto;
891 double TimeForFetchingMetaPTE = 0;
892 double TimeForFetchingRowInVBlank = 0;
893 double LinesToRequestPrefetchPixelData = 0;
894 unsigned int HostVMDynamicLevelsTrips;
895 double trip_to_mem;
896 double Tvm_trips;
897 double Tr0_trips;
898 double Tvm_trips_rounded;
899 double Tr0_trips_rounded;
900 double Lsw_oto;
901 double Tpre_rounded;
902 double prefetch_bw_equ;
903 double Tvm_equ;
904 double Tr0_equ;
905 double Tdmbf;
906 double Tdmec;
907 double Tdmsks;
908 double prefetch_sw_bytes;
909 double bytes_pp;
910 double dep_bytes;
911 int max_vratio_pre = 4;
912 double min_Lsw;
913 double Tsw_est1 = 0;
914 double Tsw_est3 = 0;
915 double max_Tsw = 0;
916
917 if (GPUVMEnable == true && HostVMEnable == true) {
918 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
919 } else {
920 HostVMDynamicLevelsTrips = 0;
921 }
922 #ifdef __DML_VBA_DEBUG__
923 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
924 #endif
925 CalculateVupdateAndDynamicMetadataParameters(
926 MaxInterDCNTileRepeaters,
927 myPipe->DPPCLK,
928 myPipe->DISPCLK,
929 myPipe->DCFCLKDeepSleep,
930 myPipe->PixelClock,
931 myPipe->HTotal,
932 myPipe->VBlank,
933 DynamicMetadataTransmittedBytes,
934 DynamicMetadataLinesBeforeActiveRequired,
935 myPipe->InterlaceEnable,
936 myPipe->ProgressiveToInterlaceUnitInOPP,
937 TSetup,
938 &Tdmbf,
939 &Tdmec,
940 &Tdmsks,
941 VUpdateOffsetPix,
942 VUpdateWidthPix,
943 VReadyOffsetPix);
944
945 LineTime = myPipe->HTotal / myPipe->PixelClock;
946 trip_to_mem = UrgentLatency;
947 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
948
949 #ifdef __DML_VBA_ALLOW_DELTA__
950 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
951 #else
952 if (DynamicMetadataVMEnabled == true) {
953 #endif
954 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
955 } else {
956 *Tdmdl = TWait + UrgentExtraLatency;
957 }
958
959 #ifdef __DML_VBA_ALLOW_DELTA__
960 if (DynamicMetadataEnable == false) {
961 *Tdmdl = 0.0;
962 }
963 #endif
964
965 if (DynamicMetadataEnable == true) {
966 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
967 *NotEnoughTimeForDynamicMetadata = true;
968 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
969 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
970 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
971 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
972 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
973 } else {
974 *NotEnoughTimeForDynamicMetadata = false;
975 }
976 } else {
977 *NotEnoughTimeForDynamicMetadata = false;
978 }
979
980 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
981
982 if (myPipe->ScalerEnabled)
983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
984 else
985 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
986
987 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
988
989 DISPCLKCycles = DISPCLKDelaySubtotal;
990
991 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
992 return true;
993
994 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
995
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
998 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
999 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1000 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1001 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1002 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1003 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1004 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1005 #endif
1006
1007 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1008
1009 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1010 *DSTYAfterScaler = 1;
1011 else
1012 *DSTYAfterScaler = 0;
1013
1014 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1015 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1016 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1017
1018 #ifdef __DML_VBA_DEBUG__
1019 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1020 #endif
1021
1022 MyError = false;
1023
1024 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1025 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1026 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1027
1028 #ifdef __DML_VBA_ALLOW_DELTA__
1029 if (!myPipe->DCCEnable) {
1030 Tr0_trips = 0.0;
1031 Tr0_trips_rounded = 0.0;
1032 }
1033 #endif
1034
1035 if (!GPUVMEnable) {
1036 Tvm_trips = 0.0;
1037 Tvm_trips_rounded = 0.0;
1038 }
1039
1040 if (GPUVMEnable) {
1041 if (GPUVMPageTableLevels >= 3) {
1042 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1043 } else {
1044 *Tno_bw = 0;
1045 }
1046 } else if (!myPipe->DCCEnable) {
1047 *Tno_bw = LineTime;
1048 } else {
1049 *Tno_bw = LineTime / 4;
1050 }
1051
1052 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1054 else
1055 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1056 /*rev 99*/
1057 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1058 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1059 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1060 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1061 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1062
1063 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1064 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1065 #ifdef __DML_VBA_DEBUG__
1066 Tsw_oto = Lsw_oto * LineTime;
1067 #endif
1068
1069
1070 #ifdef __DML_VBA_DEBUG__
1071 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1072 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1073 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1074 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1075 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1076 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1077 #endif
1078
1079 if (GPUVMEnable == true)
1080 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1081 else
1082 Tvm_oto = LineTime / 4.0;
1083
1084 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1085 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1086 LineTime - Tvm_oto,
1087 LineTime / 4);
1088 } else {
1089 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1090 }
1091
1092 #ifdef __DML_VBA_DEBUG__
1093 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1094 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1095 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1096 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1097 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1098 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1099 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1100 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1101 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1102 #endif
1103
1104 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1105 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1106 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1107 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1108 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
1109 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1110 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1111
1112 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1113
1114 if (prefetch_sw_bytes < dep_bytes)
1115 prefetch_sw_bytes = 2 * dep_bytes;
1116
1117 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1118 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1119 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1120 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1121 dml_print("DML: LineTime: %f\n", LineTime);
1122 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1123
1124 dml_print("DML: LineTime: %f\n", LineTime);
1125 dml_print("DML: VStartup: %d\n", VStartup);
1126 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1127 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1128 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1129 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1130 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1131 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1132 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1133 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1134 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1135 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1136 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1137
1138 *PrefetchBandwidth = 0;
1139 *DestinationLinesToRequestVMInVBlank = 0;
1140 *DestinationLinesToRequestRowInVBlank = 0;
1141 *VRatioPrefetchY = 0;
1142 *VRatioPrefetchC = 0;
1143 *RequiredPrefetchPixDataBWLuma = 0;
1144 if (dst_y_prefetch_equ > 1) {
1145 double PrefetchBandwidth1;
1146 double PrefetchBandwidth2;
1147 double PrefetchBandwidth3;
1148 double PrefetchBandwidth4;
1149
1150 if (Tpre_rounded - *Tno_bw > 0) {
1151 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1152 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1153 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1154 } else {
1155 PrefetchBandwidth1 = 0;
1156 }
1157
1158 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1159 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1160 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1161 }
1162
1163 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1164 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1165 else
1166 PrefetchBandwidth2 = 0;
1167
1168 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1169 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1170 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1171 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1172 } else {
1173 PrefetchBandwidth3 = 0;
1174 }
1175
1176 #ifdef __DML_VBA_DEBUG__
1177 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1178 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1179 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1180 #endif
1181 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1182 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1183 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1184 }
1185
1186 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1187 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1188 else
1189 PrefetchBandwidth4 = 0;
1190
1191 {
1192 bool Case1OK;
1193 bool Case2OK;
1194 bool Case3OK;
1195
1196 if (PrefetchBandwidth1 > 0) {
1197 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1198 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1199 Case1OK = true;
1200 } else {
1201 Case1OK = false;
1202 }
1203 } else {
1204 Case1OK = false;
1205 }
1206
1207 if (PrefetchBandwidth2 > 0) {
1208 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1209 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1210 Case2OK = true;
1211 } else {
1212 Case2OK = false;
1213 }
1214 } else {
1215 Case2OK = false;
1216 }
1217
1218 if (PrefetchBandwidth3 > 0) {
1219 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1220 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1221 Case3OK = true;
1222 } else {
1223 Case3OK = false;
1224 }
1225 } else {
1226 Case3OK = false;
1227 }
1228
1229 if (Case1OK) {
1230 prefetch_bw_equ = PrefetchBandwidth1;
1231 } else if (Case2OK) {
1232 prefetch_bw_equ = PrefetchBandwidth2;
1233 } else if (Case3OK) {
1234 prefetch_bw_equ = PrefetchBandwidth3;
1235 } else {
1236 prefetch_bw_equ = PrefetchBandwidth4;
1237 }
1238
1239 #ifdef __DML_VBA_DEBUG__
1240 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1241 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1242 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1243 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1244 #endif
1245
1246 if (prefetch_bw_equ > 0) {
1247 if (GPUVMEnable == true) {
1248 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1249 } else {
1250 Tvm_equ = LineTime / 4;
1251 }
1252
1253 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1254 Tr0_equ = dml_max4(
1255 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1256 Tr0_trips,
1257 (LineTime - Tvm_equ) / 2,
1258 LineTime / 4);
1259 } else {
1260 Tr0_equ = (LineTime - Tvm_equ) / 2;
1261 }
1262 } else {
1263 Tvm_equ = 0;
1264 Tr0_equ = 0;
1265 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1266 }
1267 }
1268
1269 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1270 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1271 TimeForFetchingMetaPTE = Tvm_oto;
1272 TimeForFetchingRowInVBlank = Tr0_oto;
1273 *PrefetchBandwidth = prefetch_bw_oto;
1274 } else {
1275 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1276 TimeForFetchingMetaPTE = Tvm_equ;
1277 TimeForFetchingRowInVBlank = Tr0_equ;
1278 *PrefetchBandwidth = prefetch_bw_equ;
1279 }
1280
1281 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1282
1283 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1284
1285 #ifdef __DML_VBA_ALLOW_DELTA__
1286 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1287 // See note above dated 5/30/2018
1288 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1289 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1290 #else
1291 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1292 #endif
1293
1294 #ifdef __DML_VBA_DEBUG__
1295 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1296 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1297 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1298 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1299 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1300 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1301 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1302 #endif
1303
1304 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1305
1306 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1307 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1308 #ifdef __DML_VBA_DEBUG__
1309 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1310 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1311 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1312 #endif
1313 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1314 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1315 *VRatioPrefetchY = dml_max(
1316 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1317 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1318 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1319 } else {
1320 MyError = true;
1321 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1322 *VRatioPrefetchY = 0;
1323 }
1324 #ifdef __DML_VBA_DEBUG__
1325 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1326 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1327 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1328 #endif
1329 }
1330
1331 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1332 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1333
1334 #ifdef __DML_VBA_DEBUG__
1335 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1336 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1337 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1338 #endif
1339 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1340 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1341 *VRatioPrefetchC = dml_max(
1342 *VRatioPrefetchC,
1343 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1344 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1345 } else {
1346 MyError = true;
1347 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1348 *VRatioPrefetchC = 0;
1349 }
1350 #ifdef __DML_VBA_DEBUG__
1351 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1352 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1353 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1354 #endif
1355 }
1356
1357 #ifdef __DML_VBA_DEBUG__
1358 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1359 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1360 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1361 #endif
1362
1363 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1364
1365 #ifdef __DML_VBA_DEBUG__
1366 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1367 #endif
1368
1369 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1370 / LineTime;
1371 } else {
1372 MyError = true;
1373 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1374 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1375 *VRatioPrefetchY = 0;
1376 *VRatioPrefetchC = 0;
1377 *RequiredPrefetchPixDataBWLuma = 0;
1378 *RequiredPrefetchPixDataBWChroma = 0;
1379 }
1380
1381 dml_print(
1382 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1383 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1384 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1385 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1386 dml_print(
1387 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1388 (double) LinesToRequestPrefetchPixelData * LineTime);
1389 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1390 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1391 (double) myPipe->HTotal)) * LineTime);
1392 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1393 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1394 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1395 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1396 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1397
1398 } else {
1399 MyError = true;
1400 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1401 }
1402
1403 {
1404 double prefetch_vm_bw;
1405 double prefetch_row_bw;
1406
1407 if (PDEAndMetaPTEBytesFrame == 0) {
1408 prefetch_vm_bw = 0;
1409 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1410 #ifdef __DML_VBA_DEBUG__
1411 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1412 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1413 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1414 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1415 #endif
1416 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1417 #ifdef __DML_VBA_DEBUG__
1418 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1419 #endif
1420 } else {
1421 prefetch_vm_bw = 0;
1422 MyError = true;
1423 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1424 }
1425
1426 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1427 prefetch_row_bw = 0;
1428 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1429 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1430
1431 #ifdef __DML_VBA_DEBUG__
1432 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1433 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1434 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1435 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1436 #endif
1437 } else {
1438 prefetch_row_bw = 0;
1439 MyError = true;
1440 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1441 }
1442
1443 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1444 }
1445
1446 if (MyError) {
1447 *PrefetchBandwidth = 0;
1448 *DestinationLinesToRequestVMInVBlank = 0;
1449 *DestinationLinesToRequestRowInVBlank = 0;
1450 *DestinationLinesForPrefetch = 0;
1451 *VRatioPrefetchY = 0;
1452 *VRatioPrefetchC = 0;
1453 *RequiredPrefetchPixDataBWLuma = 0;
1454 *RequiredPrefetchPixDataBWChroma = 0;
1455 }
1456
1457 return MyError;
1458 }
1459
1460 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1461 {
1462 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1463 }
1464
1465 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1466 {
1467 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1468 }
1469
1470 static void CalculateDCCConfiguration(
1471 bool DCCEnabled,
1472 bool DCCProgrammingAssumesScanDirectionUnknown,
1473 enum source_format_class SourcePixelFormat,
1474 unsigned int SurfaceWidthLuma,
1475 unsigned int SurfaceWidthChroma,
1476 unsigned int SurfaceHeightLuma,
1477 unsigned int SurfaceHeightChroma,
1478 double DETBufferSize,
1479 unsigned int RequestHeight256ByteLuma,
1480 unsigned int RequestHeight256ByteChroma,
1481 enum dm_swizzle_mode TilingFormat,
1482 unsigned int BytePerPixelY,
1483 unsigned int BytePerPixelC,
1484 double BytePerPixelDETY,
1485 double BytePerPixelDETC,
1486 enum scan_direction_class ScanOrientation,
1487 unsigned int *MaxUncompressedBlockLuma,
1488 unsigned int *MaxUncompressedBlockChroma,
1489 unsigned int *MaxCompressedBlockLuma,
1490 unsigned int *MaxCompressedBlockChroma,
1491 unsigned int *IndependentBlockLuma,
1492 unsigned int *IndependentBlockChroma)
1493 {
1494 int yuv420;
1495 int horz_div_l;
1496 int horz_div_c;
1497 int vert_div_l;
1498 int vert_div_c;
1499
1500 int swath_buf_size;
1501 double detile_buf_vp_horz_limit;
1502 double detile_buf_vp_vert_limit;
1503
1504 int MAS_vp_horz_limit;
1505 int MAS_vp_vert_limit;
1506 int max_vp_horz_width;
1507 int max_vp_vert_height;
1508 int eff_surf_width_l;
1509 int eff_surf_width_c;
1510 int eff_surf_height_l;
1511 int eff_surf_height_c;
1512
1513 int full_swath_bytes_horz_wc_l;
1514 int full_swath_bytes_horz_wc_c;
1515 int full_swath_bytes_vert_wc_l;
1516 int full_swath_bytes_vert_wc_c;
1517 int req128_horz_wc_l;
1518 int req128_horz_wc_c;
1519 int req128_vert_wc_l;
1520 int req128_vert_wc_c;
1521 int segment_order_horz_contiguous_luma;
1522 int segment_order_horz_contiguous_chroma;
1523 int segment_order_vert_contiguous_luma;
1524 int segment_order_vert_contiguous_chroma;
1525
1526 typedef enum {
1527 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1528 } RequestType;
1529 RequestType RequestLuma;
1530 RequestType RequestChroma;
1531
1532 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1533 horz_div_l = 1;
1534 horz_div_c = 1;
1535 vert_div_l = 1;
1536 vert_div_c = 1;
1537
1538 if (BytePerPixelY == 1)
1539 vert_div_l = 0;
1540 if (BytePerPixelC == 1)
1541 vert_div_c = 0;
1542 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1543 horz_div_l = 0;
1544 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1545 horz_div_c = 0;
1546
1547 if (BytePerPixelC == 0) {
1548 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1549 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1550 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1551 } else {
1552 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1553 detile_buf_vp_horz_limit = (double) swath_buf_size
1554 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1555 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1556 detile_buf_vp_vert_limit = (double) swath_buf_size
1557 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1558 }
1559
1560 if (SourcePixelFormat == dm_420_10) {
1561 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1562 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1563 }
1564
1565 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1566 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1567
1568 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1569 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1570 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1571 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1572 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1573 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1574 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1575 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1576
1577 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1578 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1579 if (BytePerPixelC > 0) {
1580 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1581 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1582 } else {
1583 full_swath_bytes_horz_wc_c = 0;
1584 full_swath_bytes_vert_wc_c = 0;
1585 }
1586
1587 if (SourcePixelFormat == dm_420_10) {
1588 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1589 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1590 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1591 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1592 }
1593
1594 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1595 req128_horz_wc_l = 0;
1596 req128_horz_wc_c = 0;
1597 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1598 req128_horz_wc_l = 0;
1599 req128_horz_wc_c = 1;
1600 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1601 req128_horz_wc_l = 1;
1602 req128_horz_wc_c = 0;
1603 } else {
1604 req128_horz_wc_l = 1;
1605 req128_horz_wc_c = 1;
1606 }
1607
1608 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1609 req128_vert_wc_l = 0;
1610 req128_vert_wc_c = 0;
1611 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1612 req128_vert_wc_l = 0;
1613 req128_vert_wc_c = 1;
1614 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1615 req128_vert_wc_l = 1;
1616 req128_vert_wc_c = 0;
1617 } else {
1618 req128_vert_wc_l = 1;
1619 req128_vert_wc_c = 1;
1620 }
1621
1622 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1623 segment_order_horz_contiguous_luma = 0;
1624 } else {
1625 segment_order_horz_contiguous_luma = 1;
1626 }
1627 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1628 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1629 segment_order_vert_contiguous_luma = 0;
1630 } else {
1631 segment_order_vert_contiguous_luma = 1;
1632 }
1633 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1634 segment_order_horz_contiguous_chroma = 0;
1635 } else {
1636 segment_order_horz_contiguous_chroma = 1;
1637 }
1638 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1639 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1640 segment_order_vert_contiguous_chroma = 0;
1641 } else {
1642 segment_order_vert_contiguous_chroma = 1;
1643 }
1644
1645 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1646 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1647 RequestLuma = REQ_256Bytes;
1648 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1649 RequestLuma = REQ_128BytesNonContiguous;
1650 } else {
1651 RequestLuma = REQ_128BytesContiguous;
1652 }
1653 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1654 RequestChroma = REQ_256Bytes;
1655 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1656 RequestChroma = REQ_128BytesNonContiguous;
1657 } else {
1658 RequestChroma = REQ_128BytesContiguous;
1659 }
1660 } else if (ScanOrientation != dm_vert) {
1661 if (req128_horz_wc_l == 0) {
1662 RequestLuma = REQ_256Bytes;
1663 } else if (segment_order_horz_contiguous_luma == 0) {
1664 RequestLuma = REQ_128BytesNonContiguous;
1665 } else {
1666 RequestLuma = REQ_128BytesContiguous;
1667 }
1668 if (req128_horz_wc_c == 0) {
1669 RequestChroma = REQ_256Bytes;
1670 } else if (segment_order_horz_contiguous_chroma == 0) {
1671 RequestChroma = REQ_128BytesNonContiguous;
1672 } else {
1673 RequestChroma = REQ_128BytesContiguous;
1674 }
1675 } else {
1676 if (req128_vert_wc_l == 0) {
1677 RequestLuma = REQ_256Bytes;
1678 } else if (segment_order_vert_contiguous_luma == 0) {
1679 RequestLuma = REQ_128BytesNonContiguous;
1680 } else {
1681 RequestLuma = REQ_128BytesContiguous;
1682 }
1683 if (req128_vert_wc_c == 0) {
1684 RequestChroma = REQ_256Bytes;
1685 } else if (segment_order_vert_contiguous_chroma == 0) {
1686 RequestChroma = REQ_128BytesNonContiguous;
1687 } else {
1688 RequestChroma = REQ_128BytesContiguous;
1689 }
1690 }
1691
1692 if (RequestLuma == REQ_256Bytes) {
1693 *MaxUncompressedBlockLuma = 256;
1694 *MaxCompressedBlockLuma = 256;
1695 *IndependentBlockLuma = 0;
1696 } else if (RequestLuma == REQ_128BytesContiguous) {
1697 *MaxUncompressedBlockLuma = 256;
1698 *MaxCompressedBlockLuma = 128;
1699 *IndependentBlockLuma = 128;
1700 } else {
1701 *MaxUncompressedBlockLuma = 256;
1702 *MaxCompressedBlockLuma = 64;
1703 *IndependentBlockLuma = 64;
1704 }
1705
1706 if (RequestChroma == REQ_256Bytes) {
1707 *MaxUncompressedBlockChroma = 256;
1708 *MaxCompressedBlockChroma = 256;
1709 *IndependentBlockChroma = 0;
1710 } else if (RequestChroma == REQ_128BytesContiguous) {
1711 *MaxUncompressedBlockChroma = 256;
1712 *MaxCompressedBlockChroma = 128;
1713 *IndependentBlockChroma = 128;
1714 } else {
1715 *MaxUncompressedBlockChroma = 256;
1716 *MaxCompressedBlockChroma = 64;
1717 *IndependentBlockChroma = 64;
1718 }
1719
1720 if (DCCEnabled != true || BytePerPixelC == 0) {
1721 *MaxUncompressedBlockChroma = 0;
1722 *MaxCompressedBlockChroma = 0;
1723 *IndependentBlockChroma = 0;
1724 }
1725
1726 if (DCCEnabled != true) {
1727 *MaxUncompressedBlockLuma = 0;
1728 *MaxCompressedBlockLuma = 0;
1729 *IndependentBlockLuma = 0;
1730 }
1731 }
1732
1733 static double CalculatePrefetchSourceLines(
1734 struct display_mode_lib *mode_lib,
1735 double VRatio,
1736 double vtaps,
1737 bool Interlace,
1738 bool ProgressiveToInterlaceUnitInOPP,
1739 unsigned int SwathHeight,
1740 unsigned int ViewportYStart,
1741 double *VInitPreFill,
1742 unsigned int *MaxNumSwath)
1743 {
1744 struct vba_vars_st *v = &mode_lib->vba;
1745 unsigned int MaxPartialSwath;
1746
1747 if (ProgressiveToInterlaceUnitInOPP)
1748 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1749 else
1750 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1751
1752 if (!v->IgnoreViewportPositioning) {
1753
1754 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1755
1756 if (*VInitPreFill > 1.0)
1757 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1758 else
1759 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1760 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1761
1762 } else {
1763
1764 if (ViewportYStart != 0)
1765 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1766
1767 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1768
1769 if (*VInitPreFill > 1.0)
1770 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1771 else
1772 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1773 }
1774
1775 #ifdef __DML_VBA_DEBUG__
1776 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1777 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1778 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1779 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1780 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1781 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1782 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1783 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1784 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1785 #endif
1786 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1787 }
1788
1789 static unsigned int CalculateVMAndRowBytes(
1790 struct display_mode_lib *mode_lib,
1791 bool DCCEnable,
1792 unsigned int BlockHeight256Bytes,
1793 unsigned int BlockWidth256Bytes,
1794 enum source_format_class SourcePixelFormat,
1795 unsigned int SurfaceTiling,
1796 unsigned int BytePerPixel,
1797 enum scan_direction_class ScanDirection,
1798 unsigned int SwathWidth,
1799 unsigned int ViewportHeight,
1800 bool GPUVMEnable,
1801 bool HostVMEnable,
1802 unsigned int HostVMMaxNonCachedPageTableLevels,
1803 unsigned int GPUVMMinPageSize,
1804 unsigned int HostVMMinPageSize,
1805 unsigned int PTEBufferSizeInRequests,
1806 unsigned int Pitch,
1807 unsigned int DCCMetaPitch,
1808 unsigned int *MacroTileWidth,
1809 unsigned int *MetaRowByte,
1810 unsigned int *PixelPTEBytesPerRow,
1811 bool *PTEBufferSizeNotExceeded,
1812 int *dpte_row_width_ub,
1813 unsigned int *dpte_row_height,
1814 unsigned int *MetaRequestWidth,
1815 unsigned int *MetaRequestHeight,
1816 unsigned int *meta_row_width,
1817 unsigned int *meta_row_height,
1818 int *vm_group_bytes,
1819 unsigned int *dpte_group_bytes,
1820 unsigned int *PixelPTEReqWidth,
1821 unsigned int *PixelPTEReqHeight,
1822 unsigned int *PTERequestSize,
1823 int *DPDE0BytesFrame,
1824 int *MetaPTEBytesFrame)
1825 {
1826 struct vba_vars_st *v = &mode_lib->vba;
1827 unsigned int MPDEBytesFrame;
1828 unsigned int DCCMetaSurfaceBytes;
1829 unsigned int MacroTileSizeBytes;
1830 unsigned int MacroTileHeight;
1831 unsigned int ExtraDPDEBytesFrame;
1832 unsigned int PDEAndMetaPTEBytesFrame;
1833 unsigned int PixelPTEReqHeightPTEs = 0;
1834 unsigned int HostVMDynamicLevels = 0;
1835 double FractionOfPTEReturnDrop;
1836
1837 if (GPUVMEnable == true && HostVMEnable == true) {
1838 if (HostVMMinPageSize < 2048) {
1839 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1840 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1841 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1842 } else {
1843 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1844 }
1845 }
1846
1847 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1848 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1849 if (ScanDirection != dm_vert) {
1850 *meta_row_height = *MetaRequestHeight;
1851 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1852 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1853 } else {
1854 *meta_row_height = *MetaRequestWidth;
1855 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1856 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1857 }
1858 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1859 if (GPUVMEnable == true) {
1860 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1861 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1862 } else {
1863 *MetaPTEBytesFrame = 0;
1864 MPDEBytesFrame = 0;
1865 }
1866
1867 if (DCCEnable != true) {
1868 *MetaPTEBytesFrame = 0;
1869 MPDEBytesFrame = 0;
1870 *MetaRowByte = 0;
1871 }
1872
1873 if (SurfaceTiling == dm_sw_linear) {
1874 MacroTileSizeBytes = 256;
1875 MacroTileHeight = BlockHeight256Bytes;
1876 } else {
1877 MacroTileSizeBytes = 65536;
1878 MacroTileHeight = 16 * BlockHeight256Bytes;
1879 }
1880 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1881
1882 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1883 if (ScanDirection != dm_vert) {
1884 *DPDE0BytesFrame = 64
1885 * (dml_ceil(
1886 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1887 / (8 * 2097152),
1888 1) + 1);
1889 } else {
1890 *DPDE0BytesFrame = 64
1891 * (dml_ceil(
1892 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1893 / (8 * 2097152),
1894 1) + 1);
1895 }
1896 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1897 } else {
1898 *DPDE0BytesFrame = 0;
1899 ExtraDPDEBytesFrame = 0;
1900 }
1901
1902 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1903
1904 #ifdef __DML_VBA_DEBUG__
1905 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1906 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1907 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1908 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1909 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1910 #endif
1911
1912 if (HostVMEnable == true) {
1913 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1914 }
1915 #ifdef __DML_VBA_DEBUG__
1916 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1917 #endif
1918
1919 if (SurfaceTiling == dm_sw_linear) {
1920 PixelPTEReqHeightPTEs = 1;
1921 *PixelPTEReqHeight = 1;
1922 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1923 *PTERequestSize = 64;
1924 FractionOfPTEReturnDrop = 0;
1925 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1926 PixelPTEReqHeightPTEs = 16;
1927 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1928 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1929 *PTERequestSize = 128;
1930 FractionOfPTEReturnDrop = 0;
1931 } else {
1932 PixelPTEReqHeightPTEs = 1;
1933 *PixelPTEReqHeight = MacroTileHeight;
1934 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1935 *PTERequestSize = 64;
1936 FractionOfPTEReturnDrop = 0;
1937 }
1938
1939 if (SurfaceTiling == dm_sw_linear) {
1940 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1941 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1942 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1943 } else if (ScanDirection != dm_vert) {
1944 *dpte_row_height = *PixelPTEReqHeight;
1945 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1946 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1947 } else {
1948 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1949 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1950 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1951 }
1952
1953 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1954 *PTEBufferSizeNotExceeded = true;
1955 } else {
1956 *PTEBufferSizeNotExceeded = false;
1957 }
1958
1959 if (GPUVMEnable != true) {
1960 *PixelPTEBytesPerRow = 0;
1961 *PTEBufferSizeNotExceeded = true;
1962 }
1963
1964 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1965
1966 if (HostVMEnable == true) {
1967 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1968 }
1969
1970 if (HostVMEnable == true) {
1971 *vm_group_bytes = 512;
1972 *dpte_group_bytes = 512;
1973 } else if (GPUVMEnable == true) {
1974 *vm_group_bytes = 2048;
1975 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1976 *dpte_group_bytes = 512;
1977 } else {
1978 *dpte_group_bytes = 2048;
1979 }
1980 } else {
1981 *vm_group_bytes = 0;
1982 *dpte_group_bytes = 0;
1983 }
1984 return PDEAndMetaPTEBytesFrame;
1985 }
1986
1987 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
1988 {
1989 struct vba_vars_st *v = &mode_lib->vba;
1990 unsigned int j, k;
1991 double HostVMInefficiencyFactor = 1.0;
1992 bool NoChromaPlanes = true;
1993 int ReorderBytes;
1994 double VMDataOnlyReturnBW;
1995 double MaxTotalRDBandwidth = 0;
1996 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1997
1998 v->WritebackDISPCLK = 0.0;
1999 v->DISPCLKWithRamping = 0;
2000 v->DISPCLKWithoutRamping = 0;
2001 v->GlobalDPPCLK = 0.0;
2002 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2003 {
2004 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2005 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2006 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2007 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2008 if (v->HostVMEnable != true) {
2009 v->ReturnBW = dml_min(
2010 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2011 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2012 } else {
2013 v->ReturnBW = dml_min(
2014 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2015 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2016 }
2017 }
2018 /* End DAL custom code */
2019
2020 // DISPCLK and DPPCLK Calculation
2021 //
2022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2023 if (v->WritebackEnable[k]) {
2024 v->WritebackDISPCLK = dml_max(
2025 v->WritebackDISPCLK,
2026 dml31_CalculateWriteBackDISPCLK(
2027 v->WritebackPixelFormat[k],
2028 v->PixelClock[k],
2029 v->WritebackHRatio[k],
2030 v->WritebackVRatio[k],
2031 v->WritebackHTaps[k],
2032 v->WritebackVTaps[k],
2033 v->WritebackSourceWidth[k],
2034 v->WritebackDestinationWidth[k],
2035 v->HTotal[k],
2036 v->WritebackLineBufferSize));
2037 }
2038 }
2039
2040 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2041 if (v->HRatio[k] > 1) {
2042 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2043 v->MaxDCHUBToPSCLThroughput,
2044 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2045 } else {
2046 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2047 }
2048
2049 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2050 * dml_max(
2051 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2052 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2053
2054 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2055 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2056 }
2057
2058 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2059 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2060 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2061 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2062 } else {
2063 if (v->HRatioChroma[k] > 1) {
2064 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2065 v->MaxDCHUBToPSCLThroughput,
2066 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2067 } else {
2068 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2069 }
2070 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2071 * dml_max3(
2072 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2073 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2074 1.0);
2075
2076 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2077 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2078 }
2079
2080 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2081 }
2082 }
2083
2084 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2085 if (v->BlendingAndTiming[k] != k)
2086 continue;
2087 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2088 v->DISPCLKWithRamping = dml_max(
2089 v->DISPCLKWithRamping,
2090 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2091 * (1 + v->DISPCLKRampingMargin / 100));
2092 v->DISPCLKWithoutRamping = dml_max(
2093 v->DISPCLKWithoutRamping,
2094 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2095 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2096 v->DISPCLKWithRamping = dml_max(
2097 v->DISPCLKWithRamping,
2098 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2099 * (1 + v->DISPCLKRampingMargin / 100));
2100 v->DISPCLKWithoutRamping = dml_max(
2101 v->DISPCLKWithoutRamping,
2102 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2103 } else {
2104 v->DISPCLKWithRamping = dml_max(
2105 v->DISPCLKWithRamping,
2106 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2107 v->DISPCLKWithoutRamping = dml_max(
2108 v->DISPCLKWithoutRamping,
2109 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2110 }
2111 }
2112
2113 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2114 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2115
2116 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2117 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2118 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2119 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2120 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2121 v->DISPCLKDPPCLKVCOSpeed);
2122 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2123 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2124 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2125 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2126 } else {
2127 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2128 }
2129 v->DISPCLK = v->DISPCLK_calculated;
2130 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2131
2132 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2133 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2134 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2135 }
2136 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2138 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2139 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2140 }
2141
2142 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2143 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2144 }
2145
2146 // Urgent and B P-State/DRAM Clock Change Watermark
2147 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2148 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2149
2150 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2151 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2152 v->SourcePixelFormat[k],
2153 v->SurfaceTiling[k],
2154 &v->BytePerPixelY[k],
2155 &v->BytePerPixelC[k],
2156 &v->BytePerPixelDETY[k],
2157 &v->BytePerPixelDETC[k],
2158 &v->BlockHeight256BytesY[k],
2159 &v->BlockHeight256BytesC[k],
2160 &v->BlockWidth256BytesY[k],
2161 &v->BlockWidth256BytesC[k]);
2162 }
2163
2164 CalculateSwathWidth(
2165 false,
2166 v->NumberOfActivePlanes,
2167 v->SourcePixelFormat,
2168 v->SourceScan,
2169 v->ViewportWidth,
2170 v->ViewportHeight,
2171 v->SurfaceWidthY,
2172 v->SurfaceWidthC,
2173 v->SurfaceHeightY,
2174 v->SurfaceHeightC,
2175 v->ODMCombineEnabled,
2176 v->BytePerPixelY,
2177 v->BytePerPixelC,
2178 v->BlockHeight256BytesY,
2179 v->BlockHeight256BytesC,
2180 v->BlockWidth256BytesY,
2181 v->BlockWidth256BytesC,
2182 v->BlendingAndTiming,
2183 v->HActive,
2184 v->HRatio,
2185 v->DPPPerPlane,
2186 v->SwathWidthSingleDPPY,
2187 v->SwathWidthSingleDPPC,
2188 v->SwathWidthY,
2189 v->SwathWidthC,
2190 v->dummyinteger3,
2191 v->dummyinteger4,
2192 v->swath_width_luma_ub,
2193 v->swath_width_chroma_ub);
2194
2195 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2196 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2197 * v->VRatio[k];
2198 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2199 * v->VRatioChroma[k];
2200 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2201 }
2202
2203 // DCFCLK Deep Sleep
2204 CalculateDCFCLKDeepSleep(
2205 mode_lib,
2206 v->NumberOfActivePlanes,
2207 v->BytePerPixelY,
2208 v->BytePerPixelC,
2209 v->VRatio,
2210 v->VRatioChroma,
2211 v->SwathWidthY,
2212 v->SwathWidthC,
2213 v->DPPPerPlane,
2214 v->HRatio,
2215 v->HRatioChroma,
2216 v->PixelClock,
2217 v->PSCL_THROUGHPUT_LUMA,
2218 v->PSCL_THROUGHPUT_CHROMA,
2219 v->DPPCLK,
2220 v->ReadBandwidthPlaneLuma,
2221 v->ReadBandwidthPlaneChroma,
2222 v->ReturnBusWidth,
2223 &v->DCFCLKDeepSleep);
2224
2225 // DSCCLK
2226 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2227 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2228 v->DSCCLK_calculated[k] = 0.0;
2229 } else {
2230 if (v->OutputFormat[k] == dm_420)
2231 v->DSCFormatFactor = 2;
2232 else if (v->OutputFormat[k] == dm_444)
2233 v->DSCFormatFactor = 1;
2234 else if (v->OutputFormat[k] == dm_n422)
2235 v->DSCFormatFactor = 2;
2236 else
2237 v->DSCFormatFactor = 1;
2238 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2239 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2240 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2241 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2242 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2243 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2244 else
2245 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2246 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2247 }
2248 }
2249
2250 // DSC Delay
2251 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2252 double BPP = v->OutputBpp[k];
2253
2254 if (v->DSCEnabled[k] && BPP != 0) {
2255 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2256 v->DSCDelay[k] = dscceComputeDelay(
2257 v->DSCInputBitPerComponent[k],
2258 BPP,
2259 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2260 v->NumberOfDSCSlices[k],
2261 v->OutputFormat[k],
2262 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2263 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2264 v->DSCDelay[k] = 2
2265 * (dscceComputeDelay(
2266 v->DSCInputBitPerComponent[k],
2267 BPP,
2268 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2269 v->NumberOfDSCSlices[k] / 2.0,
2270 v->OutputFormat[k],
2271 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2272 } else {
2273 v->DSCDelay[k] = 4
2274 * (dscceComputeDelay(
2275 v->DSCInputBitPerComponent[k],
2276 BPP,
2277 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2278 v->NumberOfDSCSlices[k] / 4.0,
2279 v->OutputFormat[k],
2280 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2281 }
2282 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2283 } else {
2284 v->DSCDelay[k] = 0;
2285 }
2286 }
2287
2288 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2289 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2290 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2291 v->DSCDelay[k] = v->DSCDelay[j];
2292
2293 // Prefetch
2294 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2295 unsigned int PDEAndMetaPTEBytesFrameY;
2296 unsigned int PixelPTEBytesPerRowY;
2297 unsigned int MetaRowByteY;
2298 unsigned int MetaRowByteC;
2299 unsigned int PDEAndMetaPTEBytesFrameC;
2300 unsigned int PixelPTEBytesPerRowC;
2301 bool PTEBufferSizeNotExceededY;
2302 bool PTEBufferSizeNotExceededC;
2303
2304 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2305 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2306 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2307 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2308 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2309 } else {
2310 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2311 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2312 }
2313
2314 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2315 mode_lib,
2316 v->DCCEnable[k],
2317 v->BlockHeight256BytesC[k],
2318 v->BlockWidth256BytesC[k],
2319 v->SourcePixelFormat[k],
2320 v->SurfaceTiling[k],
2321 v->BytePerPixelC[k],
2322 v->SourceScan[k],
2323 v->SwathWidthC[k],
2324 v->ViewportHeightChroma[k],
2325 v->GPUVMEnable,
2326 v->HostVMEnable,
2327 v->HostVMMaxNonCachedPageTableLevels,
2328 v->GPUVMMinPageSize,
2329 v->HostVMMinPageSize,
2330 v->PTEBufferSizeInRequestsForChroma,
2331 v->PitchC[k],
2332 v->DCCMetaPitchC[k],
2333 &v->MacroTileWidthC[k],
2334 &MetaRowByteC,
2335 &PixelPTEBytesPerRowC,
2336 &PTEBufferSizeNotExceededC,
2337 &v->dpte_row_width_chroma_ub[k],
2338 &v->dpte_row_height_chroma[k],
2339 &v->meta_req_width_chroma[k],
2340 &v->meta_req_height_chroma[k],
2341 &v->meta_row_width_chroma[k],
2342 &v->meta_row_height_chroma[k],
2343 &v->dummyinteger1,
2344 &v->dummyinteger2,
2345 &v->PixelPTEReqWidthC[k],
2346 &v->PixelPTEReqHeightC[k],
2347 &v->PTERequestSizeC[k],
2348 &v->dpde0_bytes_per_frame_ub_c[k],
2349 &v->meta_pte_bytes_per_frame_ub_c[k]);
2350
2351 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2352 mode_lib,
2353 v->VRatioChroma[k],
2354 v->VTAPsChroma[k],
2355 v->Interlace[k],
2356 v->ProgressiveToInterlaceUnitInOPP,
2357 v->SwathHeightC[k],
2358 v->ViewportYStartC[k],
2359 &v->VInitPreFillC[k],
2360 &v->MaxNumSwathC[k]);
2361 } else {
2362 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2363 v->PTEBufferSizeInRequestsForChroma = 0;
2364 PixelPTEBytesPerRowC = 0;
2365 PDEAndMetaPTEBytesFrameC = 0;
2366 MetaRowByteC = 0;
2367 v->MaxNumSwathC[k] = 0;
2368 v->PrefetchSourceLinesC[k] = 0;
2369 }
2370
2371 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2372 mode_lib,
2373 v->DCCEnable[k],
2374 v->BlockHeight256BytesY[k],
2375 v->BlockWidth256BytesY[k],
2376 v->SourcePixelFormat[k],
2377 v->SurfaceTiling[k],
2378 v->BytePerPixelY[k],
2379 v->SourceScan[k],
2380 v->SwathWidthY[k],
2381 v->ViewportHeight[k],
2382 v->GPUVMEnable,
2383 v->HostVMEnable,
2384 v->HostVMMaxNonCachedPageTableLevels,
2385 v->GPUVMMinPageSize,
2386 v->HostVMMinPageSize,
2387 v->PTEBufferSizeInRequestsForLuma,
2388 v->PitchY[k],
2389 v->DCCMetaPitchY[k],
2390 &v->MacroTileWidthY[k],
2391 &MetaRowByteY,
2392 &PixelPTEBytesPerRowY,
2393 &PTEBufferSizeNotExceededY,
2394 &v->dpte_row_width_luma_ub[k],
2395 &v->dpte_row_height[k],
2396 &v->meta_req_width[k],
2397 &v->meta_req_height[k],
2398 &v->meta_row_width[k],
2399 &v->meta_row_height[k],
2400 &v->vm_group_bytes[k],
2401 &v->dpte_group_bytes[k],
2402 &v->PixelPTEReqWidthY[k],
2403 &v->PixelPTEReqHeightY[k],
2404 &v->PTERequestSizeY[k],
2405 &v->dpde0_bytes_per_frame_ub_l[k],
2406 &v->meta_pte_bytes_per_frame_ub_l[k]);
2407
2408 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2409 mode_lib,
2410 v->VRatio[k],
2411 v->vtaps[k],
2412 v->Interlace[k],
2413 v->ProgressiveToInterlaceUnitInOPP,
2414 v->SwathHeightY[k],
2415 v->ViewportYStartY[k],
2416 &v->VInitPreFillY[k],
2417 &v->MaxNumSwathY[k]);
2418 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2419 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2420 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2421
2422 CalculateRowBandwidth(
2423 v->GPUVMEnable,
2424 v->SourcePixelFormat[k],
2425 v->VRatio[k],
2426 v->VRatioChroma[k],
2427 v->DCCEnable[k],
2428 v->HTotal[k] / v->PixelClock[k],
2429 MetaRowByteY,
2430 MetaRowByteC,
2431 v->meta_row_height[k],
2432 v->meta_row_height_chroma[k],
2433 PixelPTEBytesPerRowY,
2434 PixelPTEBytesPerRowC,
2435 v->dpte_row_height[k],
2436 v->dpte_row_height_chroma[k],
2437 &v->meta_row_bw[k],
2438 &v->dpte_row_bw[k]);
2439 }
2440
2441 v->TotalDCCActiveDPP = 0;
2442 v->TotalActiveDPP = 0;
2443 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2444 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2445 if (v->DCCEnable[k])
2446 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2447 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2448 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2449 NoChromaPlanes = false;
2450 }
2451
2452 ReorderBytes = v->NumberOfChannels
2453 * dml_max3(
2454 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2455 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2456 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2457
2458 VMDataOnlyReturnBW = dml_min(
2459 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2460 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2461 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2462 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2463
2464 #ifdef __DML_VBA_DEBUG__
2465 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2466 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2467 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2468 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2469 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2470 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2471 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2472 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2473 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2474 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2475 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2476 #endif
2477
2478 if (v->GPUVMEnable && v->HostVMEnable)
2479 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2480
2481 v->UrgentExtraLatency = CalculateExtraLatency(
2482 v->RoundTripPingLatencyCycles,
2483 ReorderBytes,
2484 v->DCFCLK,
2485 v->TotalActiveDPP,
2486 v->PixelChunkSizeInKByte,
2487 v->TotalDCCActiveDPP,
2488 v->MetaChunkSize,
2489 v->ReturnBW,
2490 v->GPUVMEnable,
2491 v->HostVMEnable,
2492 v->NumberOfActivePlanes,
2493 v->DPPPerPlane,
2494 v->dpte_group_bytes,
2495 HostVMInefficiencyFactor,
2496 v->HostVMMinPageSize,
2497 v->HostVMMaxNonCachedPageTableLevels);
2498
2499 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2500
2501 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2502 if (v->BlendingAndTiming[k] == k) {
2503 if (v->WritebackEnable[k] == true) {
2504 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2505 + CalculateWriteBackDelay(
2506 v->WritebackPixelFormat[k],
2507 v->WritebackHRatio[k],
2508 v->WritebackVRatio[k],
2509 v->WritebackVTaps[k],
2510 v->WritebackDestinationWidth[k],
2511 v->WritebackDestinationHeight[k],
2512 v->WritebackSourceHeight[k],
2513 v->HTotal[k]) / v->DISPCLK;
2514 } else
2515 v->WritebackDelay[v->VoltageLevel][k] = 0;
2516 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2517 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2518 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2519 v->WritebackDelay[v->VoltageLevel][k],
2520 v->WritebackLatency
2521 + CalculateWriteBackDelay(
2522 v->WritebackPixelFormat[j],
2523 v->WritebackHRatio[j],
2524 v->WritebackVRatio[j],
2525 v->WritebackVTaps[j],
2526 v->WritebackDestinationWidth[j],
2527 v->WritebackDestinationHeight[j],
2528 v->WritebackSourceHeight[j],
2529 v->HTotal[k]) / v->DISPCLK);
2530 }
2531 }
2532 }
2533 }
2534
2535 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2536 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2537 if (v->BlendingAndTiming[k] == j)
2538 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2539
2540 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2541 v->MaxVStartupLines[k] =
2542 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2543 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2544 v->VTotal[k] - v->VActive[k]
2545 - dml_max(
2546 1.0,
2547 dml_ceil(
2548 (double) v->WritebackDelay[v->VoltageLevel][k]
2549 / (v->HTotal[k] / v->PixelClock[k]),
2550 1));
2551 if (v->MaxVStartupLines[k] > 1023)
2552 v->MaxVStartupLines[k] = 1023;
2553
2554 #ifdef __DML_VBA_DEBUG__
2555 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2556 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2557 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2558 #endif
2559 }
2560
2561 v->MaximumMaxVStartupLines = 0;
2562 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2563 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2564
2565 // VBA_DELTA
2566 // We don't really care to iterate between the various prefetch modes
2567 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2568
2569 v->UrgentLatency = CalculateUrgentLatency(
2570 v->UrgentLatencyPixelDataOnly,
2571 v->UrgentLatencyPixelMixedWithVMData,
2572 v->UrgentLatencyVMDataOnly,
2573 v->DoUrgentLatencyAdjustment,
2574 v->UrgentLatencyAdjustmentFabricClockComponent,
2575 v->UrgentLatencyAdjustmentFabricClockReference,
2576 v->FabricClock);
2577
2578 v->FractionOfUrgentBandwidth = 0.0;
2579 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2580
2581 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2582
2583 do {
2584 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2585 bool DestinationLineTimesForPrefetchLessThan2 = false;
2586 bool VRatioPrefetchMoreThan4 = false;
2587 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2588 MaxTotalRDBandwidth = 0;
2589
2590 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2591
2592 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2593 Pipe myPipe;
2594
2595 myPipe.DPPCLK = v->DPPCLK[k];
2596 myPipe.DISPCLK = v->DISPCLK;
2597 myPipe.PixelClock = v->PixelClock[k];
2598 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2599 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2600 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2601 myPipe.VRatio = v->VRatio[k];
2602 myPipe.VRatioChroma = v->VRatioChroma[k];
2603 myPipe.SourceScan = v->SourceScan[k];
2604 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2605 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2606 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2607 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2608 myPipe.InterlaceEnable = v->Interlace[k];
2609 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2610 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2611 myPipe.HTotal = v->HTotal[k];
2612 myPipe.DCCEnable = v->DCCEnable[k];
2613 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2614 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2615 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2616 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2617 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2618 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2619 v->ErrorResult[k] = CalculatePrefetchSchedule(
2620 mode_lib,
2621 HostVMInefficiencyFactor,
2622 &myPipe,
2623 v->DSCDelay[k],
2624 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2625 v->DPPCLKDelaySCL,
2626 v->DPPCLKDelaySCLLBOnly,
2627 v->DPPCLKDelayCNVCCursor,
2628 v->DISPCLKDelaySubtotal,
2629 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2630 v->OutputFormat[k],
2631 v->MaxInterDCNTileRepeaters,
2632 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2633 v->MaxVStartupLines[k],
2634 v->GPUVMMaxPageTableLevels,
2635 v->GPUVMEnable,
2636 v->HostVMEnable,
2637 v->HostVMMaxNonCachedPageTableLevels,
2638 v->HostVMMinPageSize,
2639 v->DynamicMetadataEnable[k],
2640 v->DynamicMetadataVMEnabled,
2641 v->DynamicMetadataLinesBeforeActiveRequired[k],
2642 v->DynamicMetadataTransmittedBytes[k],
2643 v->UrgentLatency,
2644 v->UrgentExtraLatency,
2645 v->TCalc,
2646 v->PDEAndMetaPTEBytesFrame[k],
2647 v->MetaRowByte[k],
2648 v->PixelPTEBytesPerRow[k],
2649 v->PrefetchSourceLinesY[k],
2650 v->SwathWidthY[k],
2651 v->VInitPreFillY[k],
2652 v->MaxNumSwathY[k],
2653 v->PrefetchSourceLinesC[k],
2654 v->SwathWidthC[k],
2655 v->VInitPreFillC[k],
2656 v->MaxNumSwathC[k],
2657 v->swath_width_luma_ub[k],
2658 v->swath_width_chroma_ub[k],
2659 v->SwathHeightY[k],
2660 v->SwathHeightC[k],
2661 TWait,
2662 &v->DSTXAfterScaler[k],
2663 &v->DSTYAfterScaler[k],
2664 &v->DestinationLinesForPrefetch[k],
2665 &v->PrefetchBandwidth[k],
2666 &v->DestinationLinesToRequestVMInVBlank[k],
2667 &v->DestinationLinesToRequestRowInVBlank[k],
2668 &v->VRatioPrefetchY[k],
2669 &v->VRatioPrefetchC[k],
2670 &v->RequiredPrefetchPixDataBWLuma[k],
2671 &v->RequiredPrefetchPixDataBWChroma[k],
2672 &v->NotEnoughTimeForDynamicMetadata[k],
2673 &v->Tno_bw[k],
2674 &v->prefetch_vmrow_bw[k],
2675 &v->Tdmdl_vm[k],
2676 &v->Tdmdl[k],
2677 &v->TSetup[k],
2678 &v->VUpdateOffsetPix[k],
2679 &v->VUpdateWidthPix[k],
2680 &v->VReadyOffsetPix[k]);
2681
2682 #ifdef __DML_VBA_DEBUG__
2683 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2684 #endif
2685 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2686 }
2687
2688 v->NoEnoughUrgentLatencyHiding = false;
2689 v->NoEnoughUrgentLatencyHidingPre = false;
2690
2691 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2692 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2693 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2694 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2695 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2696
2697 CalculateUrgentBurstFactor(
2698 v->swath_width_luma_ub[k],
2699 v->swath_width_chroma_ub[k],
2700 v->SwathHeightY[k],
2701 v->SwathHeightC[k],
2702 v->HTotal[k] / v->PixelClock[k],
2703 v->UrgentLatency,
2704 v->CursorBufferSize,
2705 v->CursorWidth[k][0],
2706 v->CursorBPP[k][0],
2707 v->VRatio[k],
2708 v->VRatioChroma[k],
2709 v->BytePerPixelDETY[k],
2710 v->BytePerPixelDETC[k],
2711 v->DETBufferSizeY[k],
2712 v->DETBufferSizeC[k],
2713 &v->UrgBurstFactorCursor[k],
2714 &v->UrgBurstFactorLuma[k],
2715 &v->UrgBurstFactorChroma[k],
2716 &v->NoUrgentLatencyHiding[k]);
2717
2718 CalculateUrgentBurstFactor(
2719 v->swath_width_luma_ub[k],
2720 v->swath_width_chroma_ub[k],
2721 v->SwathHeightY[k],
2722 v->SwathHeightC[k],
2723 v->HTotal[k] / v->PixelClock[k],
2724 v->UrgentLatency,
2725 v->CursorBufferSize,
2726 v->CursorWidth[k][0],
2727 v->CursorBPP[k][0],
2728 v->VRatioPrefetchY[k],
2729 v->VRatioPrefetchC[k],
2730 v->BytePerPixelDETY[k],
2731 v->BytePerPixelDETC[k],
2732 v->DETBufferSizeY[k],
2733 v->DETBufferSizeC[k],
2734 &v->UrgBurstFactorCursorPre[k],
2735 &v->UrgBurstFactorLumaPre[k],
2736 &v->UrgBurstFactorChromaPre[k],
2737 &v->NoUrgentLatencyHidingPre[k]);
2738
2739 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2740 + dml_max3(
2741 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2742 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2743 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2744 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2745 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2746 v->DPPPerPlane[k]
2747 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2748 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2749 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2750
2751 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2752 + dml_max3(
2753 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2754 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2755 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2756 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2757 + v->cursor_bw_pre[k]);
2758
2759 #ifdef __DML_VBA_DEBUG__
2760 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2761 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2762 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2763 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2764 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2765
2766 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2767 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2768
2769 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2770 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2771 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2772 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2773 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2774 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2775 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2776 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2777 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2778 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2779 #endif
2780
2781 if (v->DestinationLinesForPrefetch[k] < 2)
2782 DestinationLineTimesForPrefetchLessThan2 = true;
2783
2784 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2785 VRatioPrefetchMoreThan4 = true;
2786
2787 if (v->NoUrgentLatencyHiding[k] == true)
2788 v->NoEnoughUrgentLatencyHiding = true;
2789
2790 if (v->NoUrgentLatencyHidingPre[k] == true)
2791 v->NoEnoughUrgentLatencyHidingPre = true;
2792 }
2793
2794 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2795
2796 #ifdef __DML_VBA_DEBUG__
2797 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2798 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2799 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2800 #endif
2801
2802 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2803 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2804 v->PrefetchModeSupported = true;
2805 else {
2806 v->PrefetchModeSupported = false;
2807 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2808 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2809 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2810 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2811 }
2812
2813 // PREVIOUS_ERROR
2814 // This error result check was done after the PrefetchModeSupported. So we will
2815 // still try to calculate flip schedule even prefetch mode not supported
2816 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2817 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2818 v->PrefetchModeSupported = false;
2819 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2820 }
2821 }
2822
2823 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2824 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2825 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2826 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2827 - dml_max(
2828 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2829 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2830 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2831 v->DPPPerPlane[k]
2832 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2833 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2834 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2835 }
2836
2837 v->TotImmediateFlipBytes = 0;
2838 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2839 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2840 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2841 }
2842 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2843 CalculateFlipSchedule(
2844 mode_lib,
2845 k,
2846 HostVMInefficiencyFactor,
2847 v->UrgentExtraLatency,
2848 v->UrgentLatency,
2849 v->PDEAndMetaPTEBytesFrame[k],
2850 v->MetaRowByte[k],
2851 v->PixelPTEBytesPerRow[k]);
2852 }
2853
2854 v->total_dcn_read_bw_with_flip = 0.0;
2855 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2856 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2857 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2858 + dml_max3(
2859 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2860 v->DPPPerPlane[k] * v->final_flip_bw[k]
2861 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2862 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2863 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2864 v->DPPPerPlane[k]
2865 * (v->final_flip_bw[k]
2866 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2867 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2868 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2869 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2870 + dml_max3(
2871 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2872 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2873 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2874 v->DPPPerPlane[k]
2875 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2876 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2877 }
2878 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2879
2880 v->ImmediateFlipSupported = true;
2881 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2882 #ifdef __DML_VBA_DEBUG__
2883 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2884 #endif
2885 v->ImmediateFlipSupported = false;
2886 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2887 }
2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2889 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2890 #ifdef __DML_VBA_DEBUG__
2891 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2892 __func__, k);
2893 #endif
2894 v->ImmediateFlipSupported = false;
2895 }
2896 }
2897 } else {
2898 v->ImmediateFlipSupported = false;
2899 }
2900
2901 v->PrefetchAndImmediateFlipSupported =
2902 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2903 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2904 v->ImmediateFlipSupported)) ? true : false;
2905 #ifdef __DML_VBA_DEBUG__
2906 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2907 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2908 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2909 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2910 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2911 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2912 #endif
2913 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2914
2915 v->VStartupLines = v->VStartupLines + 1;
2916 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2917 ASSERT(v->PrefetchAndImmediateFlipSupported);
2918
2919 // Unbounded Request Enabled
2920 CalculateUnboundedRequestAndCompressedBufferSize(
2921 v->DETBufferSizeInKByte[0],
2922 v->ConfigReturnBufferSizeInKByte,
2923 v->UseUnboundedRequesting,
2924 v->TotalActiveDPP,
2925 NoChromaPlanes,
2926 v->MaxNumDPP,
2927 v->CompressedBufferSegmentSizeInkByte,
2928 v->Output,
2929 &v->UnboundedRequestEnabled,
2930 &v->CompressedBufferSizeInkByte);
2931
2932 //Watermarks and NB P-State/DRAM Clock Change Support
2933 {
2934 enum clock_change_support DRAMClockChangeSupport; // dummy
2935 CalculateWatermarksAndDRAMSpeedChangeSupport(
2936 mode_lib,
2937 PrefetchMode,
2938 v->DCFCLK,
2939 v->ReturnBW,
2940 v->UrgentLatency,
2941 v->UrgentExtraLatency,
2942 v->SOCCLK,
2943 v->DCFCLKDeepSleep,
2944 v->DETBufferSizeY,
2945 v->DETBufferSizeC,
2946 v->SwathHeightY,
2947 v->SwathHeightC,
2948 v->SwathWidthY,
2949 v->SwathWidthC,
2950 v->DPPPerPlane,
2951 v->BytePerPixelDETY,
2952 v->BytePerPixelDETC,
2953 v->UnboundedRequestEnabled,
2954 v->CompressedBufferSizeInkByte,
2955 &DRAMClockChangeSupport,
2956 &v->StutterExitWatermark,
2957 &v->StutterEnterPlusExitWatermark,
2958 &v->Z8StutterExitWatermark,
2959 &v->Z8StutterEnterPlusExitWatermark);
2960
2961 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2962 if (v->WritebackEnable[k] == true) {
2963 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2964 0,
2965 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2966 } else {
2967 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2968 }
2969 }
2970 }
2971
2972 //Display Pipeline Delivery Time in Prefetch, Groups
2973 CalculatePixelDeliveryTimes(
2974 v->NumberOfActivePlanes,
2975 v->VRatio,
2976 v->VRatioChroma,
2977 v->VRatioPrefetchY,
2978 v->VRatioPrefetchC,
2979 v->swath_width_luma_ub,
2980 v->swath_width_chroma_ub,
2981 v->DPPPerPlane,
2982 v->HRatio,
2983 v->HRatioChroma,
2984 v->PixelClock,
2985 v->PSCL_THROUGHPUT_LUMA,
2986 v->PSCL_THROUGHPUT_CHROMA,
2987 v->DPPCLK,
2988 v->BytePerPixelC,
2989 v->SourceScan,
2990 v->NumberOfCursors,
2991 v->CursorWidth,
2992 v->CursorBPP,
2993 v->BlockWidth256BytesY,
2994 v->BlockHeight256BytesY,
2995 v->BlockWidth256BytesC,
2996 v->BlockHeight256BytesC,
2997 v->DisplayPipeLineDeliveryTimeLuma,
2998 v->DisplayPipeLineDeliveryTimeChroma,
2999 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3000 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3001 v->DisplayPipeRequestDeliveryTimeLuma,
3002 v->DisplayPipeRequestDeliveryTimeChroma,
3003 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3004 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3005 v->CursorRequestDeliveryTime,
3006 v->CursorRequestDeliveryTimePrefetch);
3007
3008 CalculateMetaAndPTETimes(
3009 v->NumberOfActivePlanes,
3010 v->GPUVMEnable,
3011 v->MetaChunkSize,
3012 v->MinMetaChunkSizeBytes,
3013 v->HTotal,
3014 v->VRatio,
3015 v->VRatioChroma,
3016 v->DestinationLinesToRequestRowInVBlank,
3017 v->DestinationLinesToRequestRowInImmediateFlip,
3018 v->DCCEnable,
3019 v->PixelClock,
3020 v->BytePerPixelY,
3021 v->BytePerPixelC,
3022 v->SourceScan,
3023 v->dpte_row_height,
3024 v->dpte_row_height_chroma,
3025 v->meta_row_width,
3026 v->meta_row_width_chroma,
3027 v->meta_row_height,
3028 v->meta_row_height_chroma,
3029 v->meta_req_width,
3030 v->meta_req_width_chroma,
3031 v->meta_req_height,
3032 v->meta_req_height_chroma,
3033 v->dpte_group_bytes,
3034 v->PTERequestSizeY,
3035 v->PTERequestSizeC,
3036 v->PixelPTEReqWidthY,
3037 v->PixelPTEReqHeightY,
3038 v->PixelPTEReqWidthC,
3039 v->PixelPTEReqHeightC,
3040 v->dpte_row_width_luma_ub,
3041 v->dpte_row_width_chroma_ub,
3042 v->DST_Y_PER_PTE_ROW_NOM_L,
3043 v->DST_Y_PER_PTE_ROW_NOM_C,
3044 v->DST_Y_PER_META_ROW_NOM_L,
3045 v->DST_Y_PER_META_ROW_NOM_C,
3046 v->TimePerMetaChunkNominal,
3047 v->TimePerChromaMetaChunkNominal,
3048 v->TimePerMetaChunkVBlank,
3049 v->TimePerChromaMetaChunkVBlank,
3050 v->TimePerMetaChunkFlip,
3051 v->TimePerChromaMetaChunkFlip,
3052 v->time_per_pte_group_nom_luma,
3053 v->time_per_pte_group_vblank_luma,
3054 v->time_per_pte_group_flip_luma,
3055 v->time_per_pte_group_nom_chroma,
3056 v->time_per_pte_group_vblank_chroma,
3057 v->time_per_pte_group_flip_chroma);
3058
3059 CalculateVMGroupAndRequestTimes(
3060 v->NumberOfActivePlanes,
3061 v->GPUVMEnable,
3062 v->GPUVMMaxPageTableLevels,
3063 v->HTotal,
3064 v->BytePerPixelC,
3065 v->DestinationLinesToRequestVMInVBlank,
3066 v->DestinationLinesToRequestVMInImmediateFlip,
3067 v->DCCEnable,
3068 v->PixelClock,
3069 v->dpte_row_width_luma_ub,
3070 v->dpte_row_width_chroma_ub,
3071 v->vm_group_bytes,
3072 v->dpde0_bytes_per_frame_ub_l,
3073 v->dpde0_bytes_per_frame_ub_c,
3074 v->meta_pte_bytes_per_frame_ub_l,
3075 v->meta_pte_bytes_per_frame_ub_c,
3076 v->TimePerVMGroupVBlank,
3077 v->TimePerVMGroupFlip,
3078 v->TimePerVMRequestVBlank,
3079 v->TimePerVMRequestFlip);
3080
3081 // Min TTUVBlank
3082 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3083 if (PrefetchMode == 0) {
3084 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3085 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3086 v->MinTTUVBlank[k] = dml_max(
3087 v->DRAMClockChangeWatermark,
3088 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3089 } else if (PrefetchMode == 1) {
3090 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3091 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3092 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3093 } else {
3094 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3095 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3096 v->MinTTUVBlank[k] = v->UrgentWatermark;
3097 }
3098 if (!v->DynamicMetadataEnable[k])
3099 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3100 }
3101
3102 // DCC Configuration
3103 v->ActiveDPPs = 0;
3104 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3105 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3106 v->SourcePixelFormat[k],
3107 v->SurfaceWidthY[k],
3108 v->SurfaceWidthC[k],
3109 v->SurfaceHeightY[k],
3110 v->SurfaceHeightC[k],
3111 v->DETBufferSizeInKByte[k] * 1024,
3112 v->BlockHeight256BytesY[k],
3113 v->BlockHeight256BytesC[k],
3114 v->SurfaceTiling[k],
3115 v->BytePerPixelY[k],
3116 v->BytePerPixelC[k],
3117 v->BytePerPixelDETY[k],
3118 v->BytePerPixelDETC[k],
3119 v->SourceScan[k],
3120 &v->DCCYMaxUncompressedBlock[k],
3121 &v->DCCCMaxUncompressedBlock[k],
3122 &v->DCCYMaxCompressedBlock[k],
3123 &v->DCCCMaxCompressedBlock[k],
3124 &v->DCCYIndependentBlock[k],
3125 &v->DCCCIndependentBlock[k]);
3126 }
3127
3128 // VStartup Adjustment
3129 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3130 bool isInterlaceTiming;
3131 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3132 #ifdef __DML_VBA_DEBUG__
3133 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3134 #endif
3135
3136 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3137
3138 #ifdef __DML_VBA_DEBUG__
3139 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3140 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3141 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3142 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3143 #endif
3144
3145 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3146 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3147 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3148 }
3149
3150 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3151
3152 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3153 - v->VFrontPorch[k])
3154 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3155 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3156
3157 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3158
3159 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3160 <= (isInterlaceTiming ?
3161 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3162 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3163 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3164 } else {
3165 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3166 }
3167 #ifdef __DML_VBA_DEBUG__
3168 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3169 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3170 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3171 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3172 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3173 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3174 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3175 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3176 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3177 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3178 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3179 #endif
3180 }
3181
3182 {
3183 //Maximum Bandwidth Used
3184 double TotalWRBandwidth = 0;
3185 double MaxPerPlaneVActiveWRBandwidth = 0;
3186 double WRBandwidth = 0;
3187 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3188 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3189 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3190 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3191 } else if (v->WritebackEnable[k] == true) {
3192 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3193 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3194 }
3195 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3196 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3197 }
3198
3199 v->TotalDataReadBandwidth = 0;
3200 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3201 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3202 }
3203 }
3204 // Stutter Efficiency
3205 CalculateStutterEfficiency(
3206 mode_lib,
3207 v->CompressedBufferSizeInkByte,
3208 v->UnboundedRequestEnabled,
3209 v->ConfigReturnBufferSizeInKByte,
3210 v->MetaFIFOSizeInKEntries,
3211 v->ZeroSizeBufferEntries,
3212 v->NumberOfActivePlanes,
3213 v->ROBBufferSizeInKByte,
3214 v->TotalDataReadBandwidth,
3215 v->DCFCLK,
3216 v->ReturnBW,
3217 v->COMPBUF_RESERVED_SPACE_64B,
3218 v->COMPBUF_RESERVED_SPACE_ZS,
3219 v->SRExitTime,
3220 v->SRExitZ8Time,
3221 v->SynchronizedVBlank,
3222 v->StutterEnterPlusExitWatermark,
3223 v->Z8StutterEnterPlusExitWatermark,
3224 v->ProgressiveToInterlaceUnitInOPP,
3225 v->Interlace,
3226 v->MinTTUVBlank,
3227 v->DPPPerPlane,
3228 v->DETBufferSizeY,
3229 v->BytePerPixelY,
3230 v->BytePerPixelDETY,
3231 v->SwathWidthY,
3232 v->SwathHeightY,
3233 v->SwathHeightC,
3234 v->DCCRateLuma,
3235 v->DCCRateChroma,
3236 v->DCCFractionOfZeroSizeRequestsLuma,
3237 v->DCCFractionOfZeroSizeRequestsChroma,
3238 v->HTotal,
3239 v->VTotal,
3240 v->PixelClock,
3241 v->VRatio,
3242 v->SourceScan,
3243 v->BlockHeight256BytesY,
3244 v->BlockWidth256BytesY,
3245 v->BlockHeight256BytesC,
3246 v->BlockWidth256BytesC,
3247 v->DCCYMaxUncompressedBlock,
3248 v->DCCCMaxUncompressedBlock,
3249 v->VActive,
3250 v->DCCEnable,
3251 v->WritebackEnable,
3252 v->ReadBandwidthPlaneLuma,
3253 v->ReadBandwidthPlaneChroma,
3254 v->meta_row_bw,
3255 v->dpte_row_bw,
3256 &v->StutterEfficiencyNotIncludingVBlank,
3257 &v->StutterEfficiency,
3258 &v->NumberOfStutterBurstsPerFrame,
3259 &v->Z8StutterEfficiencyNotIncludingVBlank,
3260 &v->Z8StutterEfficiency,
3261 &v->Z8NumberOfStutterBurstsPerFrame,
3262 &v->StutterPeriod);
3263 }
3264
3265 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3266 {
3267 struct vba_vars_st *v = &mode_lib->vba;
3268 // Display Pipe Configuration
3269 double BytePerPixDETY[DC__NUM_DPP__MAX];
3270 double BytePerPixDETC[DC__NUM_DPP__MAX];
3271 int BytePerPixY[DC__NUM_DPP__MAX];
3272 int BytePerPixC[DC__NUM_DPP__MAX];
3273 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3274 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3275 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3276 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3277 double dummy1[DC__NUM_DPP__MAX];
3278 double dummy2[DC__NUM_DPP__MAX];
3279 double dummy3[DC__NUM_DPP__MAX];
3280 double dummy4[DC__NUM_DPP__MAX];
3281 int dummy5[DC__NUM_DPP__MAX];
3282 int dummy6[DC__NUM_DPP__MAX];
3283 bool dummy7[DC__NUM_DPP__MAX];
3284 bool dummysinglestring;
3285
3286 unsigned int k;
3287
3288 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3289
3290 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3291 v->SourcePixelFormat[k],
3292 v->SurfaceTiling[k],
3293 &BytePerPixY[k],
3294 &BytePerPixC[k],
3295 &BytePerPixDETY[k],
3296 &BytePerPixDETC[k],
3297 &Read256BytesBlockHeightY[k],
3298 &Read256BytesBlockHeightC[k],
3299 &Read256BytesBlockWidthY[k],
3300 &Read256BytesBlockWidthC[k]);
3301 }
3302
3303 CalculateSwathAndDETConfiguration(
3304 false,
3305 v->NumberOfActivePlanes,
3306 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
3307 v->DETBufferSizeInKByte,
3308 dummy1,
3309 dummy2,
3310 v->SourceScan,
3311 v->SourcePixelFormat,
3312 v->SurfaceTiling,
3313 v->ViewportWidth,
3314 v->ViewportHeight,
3315 v->SurfaceWidthY,
3316 v->SurfaceWidthC,
3317 v->SurfaceHeightY,
3318 v->SurfaceHeightC,
3319 Read256BytesBlockHeightY,
3320 Read256BytesBlockHeightC,
3321 Read256BytesBlockWidthY,
3322 Read256BytesBlockWidthC,
3323 v->ODMCombineEnabled,
3324 v->BlendingAndTiming,
3325 BytePerPixY,
3326 BytePerPixC,
3327 BytePerPixDETY,
3328 BytePerPixDETC,
3329 v->HActive,
3330 v->HRatio,
3331 v->HRatioChroma,
3332 v->DPPPerPlane,
3333 dummy5,
3334 dummy6,
3335 dummy3,
3336 dummy4,
3337 v->SwathHeightY,
3338 v->SwathHeightC,
3339 v->DETBufferSizeY,
3340 v->DETBufferSizeC,
3341 dummy7,
3342 &dummysinglestring);
3343 }
3344
3345 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3346 {
3347 if (PrefetchMode == 0) {
3348 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3349 } else if (PrefetchMode == 1) {
3350 return dml_max(SREnterPlusExitTime, UrgentLatency);
3351 } else {
3352 return UrgentLatency;
3353 }
3354 }
3355
3356 double dml31_CalculateWriteBackDISPCLK(
3357 enum source_format_class WritebackPixelFormat,
3358 double PixelClock,
3359 double WritebackHRatio,
3360 double WritebackVRatio,
3361 unsigned int WritebackHTaps,
3362 unsigned int WritebackVTaps,
3363 long WritebackSourceWidth,
3364 long WritebackDestinationWidth,
3365 unsigned int HTotal,
3366 unsigned int WritebackLineBufferSize)
3367 {
3368 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3369
3370 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3371 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3372 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3373 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3374 }
3375
3376 static double CalculateWriteBackDelay(
3377 enum source_format_class WritebackPixelFormat,
3378 double WritebackHRatio,
3379 double WritebackVRatio,
3380 unsigned int WritebackVTaps,
3381 int WritebackDestinationWidth,
3382 int WritebackDestinationHeight,
3383 int WritebackSourceHeight,
3384 unsigned int HTotal)
3385 {
3386 double CalculateWriteBackDelay;
3387 double Line_length;
3388 double Output_lines_last_notclamped;
3389 double WritebackVInit;
3390
3391 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3392 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3393 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3394 if (Output_lines_last_notclamped < 0) {
3395 CalculateWriteBackDelay = 0;
3396 } else {
3397 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3398 }
3399 return CalculateWriteBackDelay;
3400 }
3401
3402 static void CalculateVupdateAndDynamicMetadataParameters(
3403 int MaxInterDCNTileRepeaters,
3404 double DPPCLK,
3405 double DISPCLK,
3406 double DCFClkDeepSleep,
3407 double PixelClock,
3408 int HTotal,
3409 int VBlank,
3410 int DynamicMetadataTransmittedBytes,
3411 int DynamicMetadataLinesBeforeActiveRequired,
3412 int InterlaceEnable,
3413 bool ProgressiveToInterlaceUnitInOPP,
3414 double *TSetup,
3415 double *Tdmbf,
3416 double *Tdmec,
3417 double *Tdmsks,
3418 int *VUpdateOffsetPix,
3419 double *VUpdateWidthPix,
3420 double *VReadyOffsetPix)
3421 {
3422 double TotalRepeaterDelayTime;
3423
3424 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3425 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3426 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3427 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3428 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3429 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3430 *Tdmec = HTotal / PixelClock;
3431 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3432 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3433 } else {
3434 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3435 }
3436 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3437 *Tdmsks = *Tdmsks / 2;
3438 }
3439 #ifdef __DML_VBA_DEBUG__
3440 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3441 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3442 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3443 #endif
3444 }
3445
3446 static void CalculateRowBandwidth(
3447 bool GPUVMEnable,
3448 enum source_format_class SourcePixelFormat,
3449 double VRatio,
3450 double VRatioChroma,
3451 bool DCCEnable,
3452 double LineTime,
3453 unsigned int MetaRowByteLuma,
3454 unsigned int MetaRowByteChroma,
3455 unsigned int meta_row_height_luma,
3456 unsigned int meta_row_height_chroma,
3457 unsigned int PixelPTEBytesPerRowLuma,
3458 unsigned int PixelPTEBytesPerRowChroma,
3459 unsigned int dpte_row_height_luma,
3460 unsigned int dpte_row_height_chroma,
3461 double *meta_row_bw,
3462 double *dpte_row_bw)
3463 {
3464 if (DCCEnable != true) {
3465 *meta_row_bw = 0;
3466 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3467 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3468 } else {
3469 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3470 }
3471
3472 if (GPUVMEnable != true) {
3473 *dpte_row_bw = 0;
3474 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3475 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3476 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3477 } else {
3478 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3479 }
3480 }
3481
3482 static void CalculateFlipSchedule(
3483 struct display_mode_lib *mode_lib,
3484 unsigned int k,
3485 double HostVMInefficiencyFactor,
3486 double UrgentExtraLatency,
3487 double UrgentLatency,
3488 double PDEAndMetaPTEBytesPerFrame,
3489 double MetaRowBytes,
3490 double DPTEBytesPerRow)
3491 {
3492 struct vba_vars_st *v = &mode_lib->vba;
3493 double min_row_time = 0.0;
3494 unsigned int HostVMDynamicLevelsTrips;
3495 double TimeForFetchingMetaPTEImmediateFlip;
3496 double TimeForFetchingRowInVBlankImmediateFlip;
3497 double ImmediateFlipBW = 1.0;
3498 double LineTime = v->HTotal[k] / v->PixelClock[k];
3499
3500 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3501 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3502 } else {
3503 HostVMDynamicLevelsTrips = 0;
3504 }
3505
3506 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3507 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3508 }
3509
3510 if (v->GPUVMEnable == true) {
3511 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3512 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3513 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3514 LineTime / 4.0);
3515 } else {
3516 TimeForFetchingMetaPTEImmediateFlip = 0;
3517 }
3518
3519 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3520 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3521 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3522 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3523 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3524 LineTime / 4);
3525 } else {
3526 TimeForFetchingRowInVBlankImmediateFlip = 0;
3527 }
3528
3529 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3530
3531 if (v->GPUVMEnable == true) {
3532 v->final_flip_bw[k] = dml_max(
3533 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3534 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3535 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3536 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3537 } else {
3538 v->final_flip_bw[k] = 0;
3539 }
3540
3541 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3542 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3543 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3544 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3545 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3546 } else {
3547 min_row_time = dml_min4(
3548 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3549 v->meta_row_height[k] * LineTime / v->VRatio[k],
3550 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3551 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3552 }
3553 } else {
3554 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3555 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3556 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3557 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3558 } else {
3559 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3560 }
3561 }
3562
3563 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3564 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3565 v->ImmediateFlipSupportedForPipe[k] = false;
3566 } else {
3567 v->ImmediateFlipSupportedForPipe[k] = true;
3568 }
3569
3570 #ifdef __DML_VBA_DEBUG__
3571 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3572 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3573 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3574 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3575 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3576 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3577 #endif
3578
3579 }
3580
3581 static double TruncToValidBPP(
3582 double LinkBitRate,
3583 int Lanes,
3584 int HTotal,
3585 int HActive,
3586 double PixelClock,
3587 double DesiredBPP,
3588 bool DSCEnable,
3589 enum output_encoder_class Output,
3590 enum output_format_class Format,
3591 unsigned int DSCInputBitPerComponent,
3592 int DSCSlices,
3593 int AudioRate,
3594 int AudioLayout,
3595 enum odm_combine_mode ODMCombine)
3596 {
3597 double MaxLinkBPP;
3598 int MinDSCBPP;
3599 double MaxDSCBPP;
3600 int NonDSCBPP0;
3601 int NonDSCBPP1;
3602 int NonDSCBPP2;
3603
3604 if (Format == dm_420) {
3605 NonDSCBPP0 = 12;
3606 NonDSCBPP1 = 15;
3607 NonDSCBPP2 = 18;
3608 MinDSCBPP = 6;
3609 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3610 } else if (Format == dm_444) {
3611 NonDSCBPP0 = 24;
3612 NonDSCBPP1 = 30;
3613 NonDSCBPP2 = 36;
3614 MinDSCBPP = 8;
3615 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3616 } else {
3617
3618 NonDSCBPP0 = 16;
3619 NonDSCBPP1 = 20;
3620 NonDSCBPP2 = 24;
3621
3622 if (Format == dm_n422) {
3623 MinDSCBPP = 7;
3624 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3625 } else {
3626 MinDSCBPP = 8;
3627 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3628 }
3629 }
3630
3631 if (DSCEnable && Output == dm_dp) {
3632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3633 } else {
3634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3635 }
3636
3637 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3638 MaxLinkBPP = 16;
3639 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3640 MaxLinkBPP = 32;
3641 }
3642
3643 if (DesiredBPP == 0) {
3644 if (DSCEnable) {
3645 if (MaxLinkBPP < MinDSCBPP) {
3646 return BPP_INVALID;
3647 } else if (MaxLinkBPP >= MaxDSCBPP) {
3648 return MaxDSCBPP;
3649 } else {
3650 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3651 }
3652 } else {
3653 if (MaxLinkBPP >= NonDSCBPP2) {
3654 return NonDSCBPP2;
3655 } else if (MaxLinkBPP >= NonDSCBPP1) {
3656 return NonDSCBPP1;
3657 } else if (MaxLinkBPP >= NonDSCBPP0) {
3658 return 16.0;
3659 } else {
3660 return BPP_INVALID;
3661 }
3662 }
3663 } else {
3664 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3665 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3666 return BPP_INVALID;
3667 } else {
3668 return DesiredBPP;
3669 }
3670 }
3671 }
3672
3673 static noinline void CalculatePrefetchSchedulePerPlane(
3674 struct display_mode_lib *mode_lib,
3675 double HostVMInefficiencyFactor,
3676 int i,
3677 unsigned j,
3678 unsigned k)
3679 {
3680 struct vba_vars_st *v = &mode_lib->vba;
3681 Pipe myPipe;
3682
3683 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3684 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3685 myPipe.PixelClock = v->PixelClock[k];
3686 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3687 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3688 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3689 myPipe.VRatio = mode_lib->vba.VRatio[k];
3690 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3691
3692 myPipe.SourceScan = v->SourceScan[k];
3693 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3694 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3695 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3696 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3697 myPipe.InterlaceEnable = v->Interlace[k];
3698 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3699 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3700 myPipe.HTotal = v->HTotal[k];
3701 myPipe.DCCEnable = v->DCCEnable[k];
3702 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3703 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3704 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3705 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3706 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3707 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3708 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3709 mode_lib,
3710 HostVMInefficiencyFactor,
3711 &myPipe,
3712 v->DSCDelayPerState[i][k],
3713 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3714 v->DPPCLKDelaySCL,
3715 v->DPPCLKDelaySCLLBOnly,
3716 v->DPPCLKDelayCNVCCursor,
3717 v->DISPCLKDelaySubtotal,
3718 v->SwathWidthYThisState[k] / v->HRatio[k],
3719 v->OutputFormat[k],
3720 v->MaxInterDCNTileRepeaters,
3721 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3722 v->MaximumVStartup[i][j][k],
3723 v->GPUVMMaxPageTableLevels,
3724 v->GPUVMEnable,
3725 v->HostVMEnable,
3726 v->HostVMMaxNonCachedPageTableLevels,
3727 v->HostVMMinPageSize,
3728 v->DynamicMetadataEnable[k],
3729 v->DynamicMetadataVMEnabled,
3730 v->DynamicMetadataLinesBeforeActiveRequired[k],
3731 v->DynamicMetadataTransmittedBytes[k],
3732 v->UrgLatency[i],
3733 v->ExtraLatency,
3734 v->TimeCalc,
3735 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3736 v->MetaRowBytes[i][j][k],
3737 v->DPTEBytesPerRow[i][j][k],
3738 v->PrefetchLinesY[i][j][k],
3739 v->SwathWidthYThisState[k],
3740 v->PrefillY[k],
3741 v->MaxNumSwY[k],
3742 v->PrefetchLinesC[i][j][k],
3743 v->SwathWidthCThisState[k],
3744 v->PrefillC[k],
3745 v->MaxNumSwC[k],
3746 v->swath_width_luma_ub_this_state[k],
3747 v->swath_width_chroma_ub_this_state[k],
3748 v->SwathHeightYThisState[k],
3749 v->SwathHeightCThisState[k],
3750 v->TWait,
3751 &v->DSTXAfterScaler[k],
3752 &v->DSTYAfterScaler[k],
3753 &v->LineTimesForPrefetch[k],
3754 &v->PrefetchBW[k],
3755 &v->LinesForMetaPTE[k],
3756 &v->LinesForMetaAndDPTERow[k],
3757 &v->VRatioPreY[i][j][k],
3758 &v->VRatioPreC[i][j][k],
3759 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3760 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3761 &v->NoTimeForDynamicMetadata[i][j][k],
3762 &v->Tno_bw[k],
3763 &v->prefetch_vmrow_bw[k],
3764 &v->dummy7[k],
3765 &v->dummy8[k],
3766 &v->dummy13[k],
3767 &v->VUpdateOffsetPix[k],
3768 &v->VUpdateWidthPix[k],
3769 &v->VReadyOffsetPix[k]);
3770 }
3771
3772 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
3773 {
3774 int i, total_pipes = 0;
3775 for (i = 0; i < NumberOfActivePlanes; i++)
3776 total_pipes += NoOfDPPThisState[i];
3777 DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
3778 if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
3779 DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
3780 for (i = 1; i < NumberOfActivePlanes; i++)
3781 DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
3782 }
3783
3784
3785 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3786 {
3787 struct vba_vars_st *v = &mode_lib->vba;
3788
3789 int i, j;
3790 unsigned int k, m;
3791 int ReorderingBytes;
3792 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3793 bool NoChroma = true;
3794 bool EnoughWritebackUnits = true;
3795 bool P2IWith420 = false;
3796 bool DSCOnlyIfNecessaryWithBPP = false;
3797 bool DSC422NativeNotSupported = false;
3798 double MaxTotalVActiveRDBandwidth;
3799 bool ViewportExceedsSurface = false;
3800 bool FMTBufferExceeded = false;
3801
3802 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3803
3804 CalculateMinAndMaxPrefetchMode(
3805 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3806 &MinPrefetchMode, &MaxPrefetchMode);
3807
3808 /*Scale Ratio, taps Support Check*/
3809
3810 v->ScaleRatioAndTapsSupport = true;
3811 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3812 if (v->ScalerEnabled[k] == false
3813 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3814 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3815 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3816 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3817 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3818 v->ScaleRatioAndTapsSupport = false;
3819 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3820 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3821 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3822 || v->VRatio[k] > v->vtaps[k]
3823 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3824 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3825 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3826 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3827 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3828 || v->HRatioChroma[k] > v->MaxHSCLRatio
3829 || v->VRatioChroma[k] > v->MaxVSCLRatio
3830 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3831 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3832 v->ScaleRatioAndTapsSupport = false;
3833 }
3834 }
3835 /*Source Format, Pixel Format and Scan Support Check*/
3836
3837 v->SourceFormatPixelAndScanSupport = true;
3838 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3839 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3840 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3841 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3842 v->SourceFormatPixelAndScanSupport = false;
3843 }
3844 }
3845 /*Bandwidth Support Check*/
3846
3847 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3848 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3849 v->SourcePixelFormat[k],
3850 v->SurfaceTiling[k],
3851 &v->BytePerPixelY[k],
3852 &v->BytePerPixelC[k],
3853 &v->BytePerPixelInDETY[k],
3854 &v->BytePerPixelInDETC[k],
3855 &v->Read256BlockHeightY[k],
3856 &v->Read256BlockHeightC[k],
3857 &v->Read256BlockWidthY[k],
3858 &v->Read256BlockWidthC[k]);
3859 }
3860 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3861 if (v->SourceScan[k] != dm_vert) {
3862 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3863 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3864 } else {
3865 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3866 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3867 }
3868 }
3869 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3870 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3871 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3872 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3873 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3874 }
3875 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3876 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3877 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3878 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3879 } else if (v->WritebackEnable[k] == true) {
3880 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3881 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3882 } else {
3883 v->WriteBandwidth[k] = 0.0;
3884 }
3885 }
3886
3887 /*Writeback Latency support check*/
3888
3889 v->WritebackLatencySupport = true;
3890 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3891 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3892 v->WritebackLatencySupport = false;
3893 }
3894 }
3895
3896 /*Writeback Mode Support Check*/
3897
3898 v->TotalNumberOfActiveWriteback = 0;
3899 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3900 if (v->WritebackEnable[k] == true) {
3901 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3902 }
3903 }
3904
3905 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3906 EnoughWritebackUnits = false;
3907 }
3908
3909 /*Writeback Scale Ratio and Taps Support Check*/
3910
3911 v->WritebackScaleRatioAndTapsSupport = true;
3912 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3913 if (v->WritebackEnable[k] == true) {
3914 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3915 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3916 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3917 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3918 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3919 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3920 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3921 v->WritebackScaleRatioAndTapsSupport = false;
3922 }
3923 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3924 v->WritebackScaleRatioAndTapsSupport = false;
3925 }
3926 }
3927 }
3928 /*Maximum DISPCLK/DPPCLK Support check*/
3929
3930 v->WritebackRequiredDISPCLK = 0.0;
3931 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3932 if (v->WritebackEnable[k] == true) {
3933 v->WritebackRequiredDISPCLK = dml_max(
3934 v->WritebackRequiredDISPCLK,
3935 dml31_CalculateWriteBackDISPCLK(
3936 v->WritebackPixelFormat[k],
3937 v->PixelClock[k],
3938 v->WritebackHRatio[k],
3939 v->WritebackVRatio[k],
3940 v->WritebackHTaps[k],
3941 v->WritebackVTaps[k],
3942 v->WritebackSourceWidth[k],
3943 v->WritebackDestinationWidth[k],
3944 v->HTotal[k],
3945 v->WritebackLineBufferSize));
3946 }
3947 }
3948 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3949 if (v->HRatio[k] > 1.0) {
3950 v->PSCL_FACTOR[k] = dml_min(
3951 v->MaxDCHUBToPSCLThroughput,
3952 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3953 } else {
3954 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3955 }
3956 if (v->BytePerPixelC[k] == 0.0) {
3957 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3958 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3959 * dml_max3(
3960 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3961 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3962 1.0);
3963 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3964 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3965 }
3966 } else {
3967 if (v->HRatioChroma[k] > 1.0) {
3968 v->PSCL_FACTOR_CHROMA[k] = dml_min(
3969 v->MaxDCHUBToPSCLThroughput,
3970 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3971 } else {
3972 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3973 }
3974 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3975 * dml_max5(
3976 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3977 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3978 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3979 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3980 1.0);
3981 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3982 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3983 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3984 }
3985 }
3986 }
3987 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3988 int MaximumSwathWidthSupportLuma;
3989 int MaximumSwathWidthSupportChroma;
3990
3991 if (v->SurfaceTiling[k] == dm_sw_linear) {
3992 MaximumSwathWidthSupportLuma = 8192.0;
3993 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3994 MaximumSwathWidthSupportLuma = 2880.0;
3995 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3996 MaximumSwathWidthSupportLuma = 3840.0;
3997 } else {
3998 MaximumSwathWidthSupportLuma = 5760.0;
3999 }
4000
4001 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4002 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4003 } else {
4004 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4005 }
4006 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4007 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4008 if (v->BytePerPixelC[k] == 0.0) {
4009 v->MaximumSwathWidthInLineBufferChroma = 0;
4010 } else {
4011 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4012 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4013 }
4014 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4015 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4016 }
4017
4018 CalculateSwathAndDETConfiguration(
4019 true,
4020 v->NumberOfActivePlanes,
4021 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4022 v->DETBufferSizeInKByte,
4023 v->MaximumSwathWidthLuma,
4024 v->MaximumSwathWidthChroma,
4025 v->SourceScan,
4026 v->SourcePixelFormat,
4027 v->SurfaceTiling,
4028 v->ViewportWidth,
4029 v->ViewportHeight,
4030 v->SurfaceWidthY,
4031 v->SurfaceWidthC,
4032 v->SurfaceHeightY,
4033 v->SurfaceHeightC,
4034 v->Read256BlockHeightY,
4035 v->Read256BlockHeightC,
4036 v->Read256BlockWidthY,
4037 v->Read256BlockWidthC,
4038 v->odm_combine_dummy,
4039 v->BlendingAndTiming,
4040 v->BytePerPixelY,
4041 v->BytePerPixelC,
4042 v->BytePerPixelInDETY,
4043 v->BytePerPixelInDETC,
4044 v->HActive,
4045 v->HRatio,
4046 v->HRatioChroma,
4047 v->NoOfDPPThisState,
4048 v->swath_width_luma_ub_this_state,
4049 v->swath_width_chroma_ub_this_state,
4050 v->SwathWidthYThisState,
4051 v->SwathWidthCThisState,
4052 v->SwathHeightYThisState,
4053 v->SwathHeightCThisState,
4054 v->DETBufferSizeYThisState,
4055 v->DETBufferSizeCThisState,
4056 v->SingleDPPViewportSizeSupportPerPlane,
4057 &v->ViewportSizeSupport[0][0]);
4058
4059 for (i = 0; i < v->soc.num_states; i++) {
4060 for (j = 0; j < 2; j++) {
4061 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4062 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4063 v->RequiredDISPCLK[i][j] = 0.0;
4064 v->DISPCLK_DPPCLK_Support[i][j] = true;
4065 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4066 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4067 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4068 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4069 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4070 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4071 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4072 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4073 }
4074 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4075 * (1 + v->DISPCLKRampingMargin / 100.0);
4076 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4077 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4078 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4079 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4080 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4081 }
4082 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4083 * (1 + v->DISPCLKRampingMargin / 100.0);
4084 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4085 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4086 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4087 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4088 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4089 }
4090
4091 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4092 || !(v->Output[k] == dm_dp ||
4093 v->Output[k] == dm_dp2p0 ||
4094 v->Output[k] == dm_edp)) {
4095 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4096 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4097
4098 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4099 FMTBufferExceeded = true;
4100 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4101 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4102 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4103 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4104 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4105 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4106 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4107 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4108 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4109 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4110 } else {
4111 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4112 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4113 }
4114 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4115 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4116 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4117 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4118 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4119 } else {
4120 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4121 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4122 }
4123 }
4124 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4125 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4126 if (v->Output[k] == dm_hdmi) {
4127 FMTBufferExceeded = true;
4128 } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4129 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4130 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4131
4132 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4133 FMTBufferExceeded = true;
4134 } else {
4135 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4136 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4137 }
4138 }
4139 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4140 v->MPCCombine[i][j][k] = false;
4141 v->NoOfDPP[i][j][k] = 4;
4142 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4143 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4144 v->MPCCombine[i][j][k] = false;
4145 v->NoOfDPP[i][j][k] = 2;
4146 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4147 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4148 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4149 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4150 v->MPCCombine[i][j][k] = false;
4151 v->NoOfDPP[i][j][k] = 1;
4152 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4153 } else {
4154 v->MPCCombine[i][j][k] = true;
4155 v->NoOfDPP[i][j][k] = 2;
4156 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4157 }
4158 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4159 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4160 > v->MaxDppclkRoundedDownToDFSGranularity)
4161 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4162 v->DISPCLK_DPPCLK_Support[i][j] = false;
4163 }
4164 if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
4165 v->MPCCombine[i][j][k] = true;
4166 v->NoOfDPP[i][j][k] = 2;
4167 }
4168 }
4169 v->TotalNumberOfActiveDPP[i][j] = 0;
4170 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4171 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4172 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4173 if (v->NoOfDPP[i][j][k] == 1)
4174 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4175 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4176 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4177 NoChroma = false;
4178 }
4179
4180 // UPTO
4181 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4182 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4183 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4184 double BWOfNonSplitPlaneOfMaximumBandwidth;
4185 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4186 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4187 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4188 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4189 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4190 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4191 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4192 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4193 }
4194 }
4195 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4196 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4197 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4198 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4199 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4200 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4201 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4202 }
4203 }
4204 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4205 v->RequiredDISPCLK[i][j] = 0.0;
4206 v->DISPCLK_DPPCLK_Support[i][j] = true;
4207 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4208 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4209 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4210 v->MPCCombine[i][j][k] = true;
4211 v->NoOfDPP[i][j][k] = 2;
4212 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4213 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4214 } else {
4215 v->MPCCombine[i][j][k] = false;
4216 v->NoOfDPP[i][j][k] = 1;
4217 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4218 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4219 }
4220 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4221 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4222 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4223 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4224 } else {
4225 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4226 }
4227 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4228 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4229 > v->MaxDppclkRoundedDownToDFSGranularity)
4230 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4231 v->DISPCLK_DPPCLK_Support[i][j] = false;
4232 }
4233 }
4234 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4235 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4236 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4237 }
4238 }
4239 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4240 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4241 v->DISPCLK_DPPCLK_Support[i][j] = false;
4242 }
4243 }
4244 }
4245
4246 /*Total Available Pipes Support Check*/
4247
4248 for (i = 0; i < v->soc.num_states; i++) {
4249 for (j = 0; j < 2; j++) {
4250 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4251 v->TotalAvailablePipesSupport[i][j] = true;
4252 } else {
4253 v->TotalAvailablePipesSupport[i][j] = false;
4254 }
4255 }
4256 }
4257 /*Display IO and DSC Support Check*/
4258
4259 v->NonsupportedDSCInputBPC = false;
4260 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4261 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4262 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4263 v->NonsupportedDSCInputBPC = true;
4264 }
4265 }
4266
4267 /*Number Of DSC Slices*/
4268 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4269 if (v->BlendingAndTiming[k] == k) {
4270 if (v->PixelClockBackEnd[k] > 3200) {
4271 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4272 } else if (v->PixelClockBackEnd[k] > 1360) {
4273 v->NumberOfDSCSlices[k] = 8;
4274 } else if (v->PixelClockBackEnd[k] > 680) {
4275 v->NumberOfDSCSlices[k] = 4;
4276 } else if (v->PixelClockBackEnd[k] > 340) {
4277 v->NumberOfDSCSlices[k] = 2;
4278 } else {
4279 v->NumberOfDSCSlices[k] = 1;
4280 }
4281 } else {
4282 v->NumberOfDSCSlices[k] = 0;
4283 }
4284 }
4285
4286 for (i = 0; i < v->soc.num_states; i++) {
4287 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4288 v->RequiresDSC[i][k] = false;
4289 v->RequiresFEC[i][k] = false;
4290 if (v->BlendingAndTiming[k] == k) {
4291 if (v->Output[k] == dm_hdmi) {
4292 v->RequiresDSC[i][k] = false;
4293 v->RequiresFEC[i][k] = false;
4294 v->OutputBppPerState[i][k] = TruncToValidBPP(
4295 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4296 3,
4297 v->HTotal[k],
4298 v->HActive[k],
4299 v->PixelClockBackEnd[k],
4300 v->ForcedOutputLinkBPP[k],
4301 false,
4302 v->Output[k],
4303 v->OutputFormat[k],
4304 v->DSCInputBitPerComponent[k],
4305 v->NumberOfDSCSlices[k],
4306 v->AudioSampleRate[k],
4307 v->AudioSampleLayout[k],
4308 v->ODMCombineEnablePerState[i][k]);
4309 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4310 if (v->DSCEnable[k] == true) {
4311 v->RequiresDSC[i][k] = true;
4312 v->LinkDSCEnable = true;
4313 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4314 v->RequiresFEC[i][k] = true;
4315 } else {
4316 v->RequiresFEC[i][k] = false;
4317 }
4318 } else {
4319 v->RequiresDSC[i][k] = false;
4320 v->LinkDSCEnable = false;
4321 if (v->Output[k] == dm_dp2p0) {
4322 v->RequiresFEC[i][k] = true;
4323 } else {
4324 v->RequiresFEC[i][k] = false;
4325 }
4326 }
4327 if (v->Output[k] == dm_dp2p0) {
4328 v->Outbpp = BPP_INVALID;
4329 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4330 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4331 v->Outbpp = TruncToValidBPP(
4332 (1.0 - v->Downspreading / 100.0) * 10000,
4333 v->OutputLinkDPLanes[k],
4334 v->HTotal[k],
4335 v->HActive[k],
4336 v->PixelClockBackEnd[k],
4337 v->ForcedOutputLinkBPP[k],
4338 v->LinkDSCEnable,
4339 v->Output[k],
4340 v->OutputFormat[k],
4341 v->DSCInputBitPerComponent[k],
4342 v->NumberOfDSCSlices[k],
4343 v->AudioSampleRate[k],
4344 v->AudioSampleLayout[k],
4345 v->ODMCombineEnablePerState[i][k]);
4346 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4347 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4348 v->RequiresDSC[i][k] = true;
4349 v->LinkDSCEnable = true;
4350 v->Outbpp = TruncToValidBPP(
4351 (1.0 - v->Downspreading / 100.0) * 10000,
4352 v->OutputLinkDPLanes[k],
4353 v->HTotal[k],
4354 v->HActive[k],
4355 v->PixelClockBackEnd[k],
4356 v->ForcedOutputLinkBPP[k],
4357 v->LinkDSCEnable,
4358 v->Output[k],
4359 v->OutputFormat[k],
4360 v->DSCInputBitPerComponent[k],
4361 v->NumberOfDSCSlices[k],
4362 v->AudioSampleRate[k],
4363 v->AudioSampleLayout[k],
4364 v->ODMCombineEnablePerState[i][k]);
4365 }
4366 v->OutputBppPerState[i][k] = v->Outbpp;
4367 // TODO: Need some other way to handle this nonsense
4368 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4369 }
4370 if (v->Outbpp == BPP_INVALID &&
4371 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4372 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4373 v->Outbpp = TruncToValidBPP(
4374 (1.0 - v->Downspreading / 100.0) * 13500,
4375 v->OutputLinkDPLanes[k],
4376 v->HTotal[k],
4377 v->HActive[k],
4378 v->PixelClockBackEnd[k],
4379 v->ForcedOutputLinkBPP[k],
4380 v->LinkDSCEnable,
4381 v->Output[k],
4382 v->OutputFormat[k],
4383 v->DSCInputBitPerComponent[k],
4384 v->NumberOfDSCSlices[k],
4385 v->AudioSampleRate[k],
4386 v->AudioSampleLayout[k],
4387 v->ODMCombineEnablePerState[i][k]);
4388 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4389 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4390 v->RequiresDSC[i][k] = true;
4391 v->LinkDSCEnable = true;
4392 v->Outbpp = TruncToValidBPP(
4393 (1.0 - v->Downspreading / 100.0) * 13500,
4394 v->OutputLinkDPLanes[k],
4395 v->HTotal[k],
4396 v->HActive[k],
4397 v->PixelClockBackEnd[k],
4398 v->ForcedOutputLinkBPP[k],
4399 v->LinkDSCEnable,
4400 v->Output[k],
4401 v->OutputFormat[k],
4402 v->DSCInputBitPerComponent[k],
4403 v->NumberOfDSCSlices[k],
4404 v->AudioSampleRate[k],
4405 v->AudioSampleLayout[k],
4406 v->ODMCombineEnablePerState[i][k]);
4407 }
4408 v->OutputBppPerState[i][k] = v->Outbpp;
4409 // TODO: Need some other way to handle this nonsense
4410 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4411 }
4412 if (v->Outbpp == BPP_INVALID &&
4413 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4414 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4415 v->Outbpp = TruncToValidBPP(
4416 (1.0 - v->Downspreading / 100.0) * 20000,
4417 v->OutputLinkDPLanes[k],
4418 v->HTotal[k],
4419 v->HActive[k],
4420 v->PixelClockBackEnd[k],
4421 v->ForcedOutputLinkBPP[k],
4422 v->LinkDSCEnable,
4423 v->Output[k],
4424 v->OutputFormat[k],
4425 v->DSCInputBitPerComponent[k],
4426 v->NumberOfDSCSlices[k],
4427 v->AudioSampleRate[k],
4428 v->AudioSampleLayout[k],
4429 v->ODMCombineEnablePerState[i][k]);
4430 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4431 v->ForcedOutputLinkBPP[k] == 0) {
4432 v->RequiresDSC[i][k] = true;
4433 v->LinkDSCEnable = true;
4434 v->Outbpp = TruncToValidBPP(
4435 (1.0 - v->Downspreading / 100.0) * 20000,
4436 v->OutputLinkDPLanes[k],
4437 v->HTotal[k],
4438 v->HActive[k],
4439 v->PixelClockBackEnd[k],
4440 v->ForcedOutputLinkBPP[k],
4441 v->LinkDSCEnable,
4442 v->Output[k],
4443 v->OutputFormat[k],
4444 v->DSCInputBitPerComponent[k],
4445 v->NumberOfDSCSlices[k],
4446 v->AudioSampleRate[k],
4447 v->AudioSampleLayout[k],
4448 v->ODMCombineEnablePerState[i][k]);
4449 }
4450 v->OutputBppPerState[i][k] = v->Outbpp;
4451 // TODO: Need some other way to handle this nonsense
4452 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4453 }
4454 } else {
4455 v->Outbpp = BPP_INVALID;
4456 if (v->PHYCLKPerState[i] >= 270.0) {
4457 v->Outbpp = TruncToValidBPP(
4458 (1.0 - v->Downspreading / 100.0) * 2700,
4459 v->OutputLinkDPLanes[k],
4460 v->HTotal[k],
4461 v->HActive[k],
4462 v->PixelClockBackEnd[k],
4463 v->ForcedOutputLinkBPP[k],
4464 v->LinkDSCEnable,
4465 v->Output[k],
4466 v->OutputFormat[k],
4467 v->DSCInputBitPerComponent[k],
4468 v->NumberOfDSCSlices[k],
4469 v->AudioSampleRate[k],
4470 v->AudioSampleLayout[k],
4471 v->ODMCombineEnablePerState[i][k]);
4472 v->OutputBppPerState[i][k] = v->Outbpp;
4473 // TODO: Need some other way to handle this nonsense
4474 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4475 }
4476 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4477 v->Outbpp = TruncToValidBPP(
4478 (1.0 - v->Downspreading / 100.0) * 5400,
4479 v->OutputLinkDPLanes[k],
4480 v->HTotal[k],
4481 v->HActive[k],
4482 v->PixelClockBackEnd[k],
4483 v->ForcedOutputLinkBPP[k],
4484 v->LinkDSCEnable,
4485 v->Output[k],
4486 v->OutputFormat[k],
4487 v->DSCInputBitPerComponent[k],
4488 v->NumberOfDSCSlices[k],
4489 v->AudioSampleRate[k],
4490 v->AudioSampleLayout[k],
4491 v->ODMCombineEnablePerState[i][k]);
4492 v->OutputBppPerState[i][k] = v->Outbpp;
4493 // TODO: Need some other way to handle this nonsense
4494 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4495 }
4496 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4497 v->Outbpp = TruncToValidBPP(
4498 (1.0 - v->Downspreading / 100.0) * 8100,
4499 v->OutputLinkDPLanes[k],
4500 v->HTotal[k],
4501 v->HActive[k],
4502 v->PixelClockBackEnd[k],
4503 v->ForcedOutputLinkBPP[k],
4504 v->LinkDSCEnable,
4505 v->Output[k],
4506 v->OutputFormat[k],
4507 v->DSCInputBitPerComponent[k],
4508 v->NumberOfDSCSlices[k],
4509 v->AudioSampleRate[k],
4510 v->AudioSampleLayout[k],
4511 v->ODMCombineEnablePerState[i][k]);
4512 v->OutputBppPerState[i][k] = v->Outbpp;
4513 // TODO: Need some other way to handle this nonsense
4514 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4515 }
4516 }
4517 }
4518 } else {
4519 v->OutputBppPerState[i][k] = 0;
4520 }
4521 }
4522 }
4523
4524 for (i = 0; i < v->soc.num_states; i++) {
4525 v->LinkCapacitySupport[i] = true;
4526 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4527 if (v->BlendingAndTiming[k] == k
4528 && (v->Output[k] == dm_dp ||
4529 v->Output[k] == dm_edp ||
4530 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4531 v->LinkCapacitySupport[i] = false;
4532 }
4533 }
4534 }
4535
4536 // UPTO 2172
4537 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4538 if (v->BlendingAndTiming[k] == k
4539 && (v->Output[k] == dm_dp ||
4540 v->Output[k] == dm_edp ||
4541 v->Output[k] == dm_hdmi)) {
4542 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4543 P2IWith420 = true;
4544 }
4545 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4546 && !v->DSC422NativeSupport) {
4547 DSC422NativeNotSupported = true;
4548 }
4549 }
4550 }
4551
4552 for (i = 0; i < v->soc.num_states; ++i) {
4553 v->ODMCombine4To1SupportCheckOK[i] = true;
4554 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4555 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4556 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4557 || v->Output[k] == dm_hdmi)) {
4558 v->ODMCombine4To1SupportCheckOK[i] = false;
4559 }
4560 }
4561 }
4562
4563 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4564
4565 for (i = 0; i < v->soc.num_states; i++) {
4566 v->NotEnoughDSCUnits[i] = false;
4567 v->TotalDSCUnitsRequired = 0.0;
4568 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4569 if (v->RequiresDSC[i][k] == true) {
4570 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4571 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4572 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4573 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4574 } else {
4575 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4576 }
4577 }
4578 }
4579 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4580 v->NotEnoughDSCUnits[i] = true;
4581 }
4582 }
4583 /*DSC Delay per state*/
4584
4585 for (i = 0; i < v->soc.num_states; i++) {
4586 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4587 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4588 v->BPP = 0.0;
4589 } else {
4590 v->BPP = v->OutputBppPerState[i][k];
4591 }
4592 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4593 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4594 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4595 v->DSCInputBitPerComponent[k],
4596 v->BPP,
4597 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4598 v->NumberOfDSCSlices[k],
4599 v->OutputFormat[k],
4600 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4601 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4602 v->DSCDelayPerState[i][k] = 2.0
4603 * (dscceComputeDelay(
4604 v->DSCInputBitPerComponent[k],
4605 v->BPP,
4606 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4607 v->NumberOfDSCSlices[k] / 2,
4608 v->OutputFormat[k],
4609 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4610 } else {
4611 v->DSCDelayPerState[i][k] = 4.0
4612 * (dscceComputeDelay(
4613 v->DSCInputBitPerComponent[k],
4614 v->BPP,
4615 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4616 v->NumberOfDSCSlices[k] / 4,
4617 v->OutputFormat[k],
4618 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4619 }
4620 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4621 } else {
4622 v->DSCDelayPerState[i][k] = 0.0;
4623 }
4624 }
4625 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4626 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4627 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4628 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4629 }
4630 }
4631 }
4632 }
4633
4634 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4635 //
4636 for (i = 0; i < v->soc.num_states; ++i) {
4637 for (j = 0; j <= 1; ++j) {
4638 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4639 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4640 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4641 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4642 }
4643
4644 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
4645 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
4646 CalculateSwathAndDETConfiguration(
4647 false,
4648 v->NumberOfActivePlanes,
4649 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4650 v->DETBufferSizeInKByte,
4651 v->MaximumSwathWidthLuma,
4652 v->MaximumSwathWidthChroma,
4653 v->SourceScan,
4654 v->SourcePixelFormat,
4655 v->SurfaceTiling,
4656 v->ViewportWidth,
4657 v->ViewportHeight,
4658 v->SurfaceWidthY,
4659 v->SurfaceWidthC,
4660 v->SurfaceHeightY,
4661 v->SurfaceHeightC,
4662 v->Read256BlockHeightY,
4663 v->Read256BlockHeightC,
4664 v->Read256BlockWidthY,
4665 v->Read256BlockWidthC,
4666 v->ODMCombineEnableThisState,
4667 v->BlendingAndTiming,
4668 v->BytePerPixelY,
4669 v->BytePerPixelC,
4670 v->BytePerPixelInDETY,
4671 v->BytePerPixelInDETC,
4672 v->HActive,
4673 v->HRatio,
4674 v->HRatioChroma,
4675 v->NoOfDPPThisState,
4676 v->swath_width_luma_ub_this_state,
4677 v->swath_width_chroma_ub_this_state,
4678 v->SwathWidthYThisState,
4679 v->SwathWidthCThisState,
4680 v->SwathHeightYThisState,
4681 v->SwathHeightCThisState,
4682 v->DETBufferSizeYThisState,
4683 v->DETBufferSizeCThisState,
4684 v->dummystring,
4685 &v->ViewportSizeSupport[i][j]);
4686
4687 CalculateDCFCLKDeepSleep(
4688 mode_lib,
4689 v->NumberOfActivePlanes,
4690 v->BytePerPixelY,
4691 v->BytePerPixelC,
4692 v->VRatio,
4693 v->VRatioChroma,
4694 v->SwathWidthYThisState,
4695 v->SwathWidthCThisState,
4696 v->NoOfDPPThisState,
4697 v->HRatio,
4698 v->HRatioChroma,
4699 v->PixelClock,
4700 v->PSCL_FACTOR,
4701 v->PSCL_FACTOR_CHROMA,
4702 v->RequiredDPPCLKThisState,
4703 v->ReadBandwidthLuma,
4704 v->ReadBandwidthChroma,
4705 v->ReturnBusWidth,
4706 &v->ProjectedDCFCLKDeepSleep[i][j]);
4707
4708 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4709 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4710 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4711 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4712 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4713 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4714 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4715 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4716 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4717 }
4718 }
4719 }
4720
4721 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4722 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4723 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4724 }
4725
4726 for (i = 0; i < v->soc.num_states; i++) {
4727 for (j = 0; j < 2; j++) {
4728 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4729
4730 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4731 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4732 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4733 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4734 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4735 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4736 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4737 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4738 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4739 }
4740
4741 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4742 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4743 if (v->DCCEnable[k] == true) {
4744 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4745 }
4746 }
4747
4748 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4749 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4750 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4751
4752 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4753 && v->SourceScan[k] != dm_vert) {
4754 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4755 / 2;
4756 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4757 } else {
4758 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4759 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4760 }
4761
4762 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4763 mode_lib,
4764 v->DCCEnable[k],
4765 v->Read256BlockHeightC[k],
4766 v->Read256BlockWidthC[k],
4767 v->SourcePixelFormat[k],
4768 v->SurfaceTiling[k],
4769 v->BytePerPixelC[k],
4770 v->SourceScan[k],
4771 v->SwathWidthCThisState[k],
4772 v->ViewportHeightChroma[k],
4773 v->GPUVMEnable,
4774 v->HostVMEnable,
4775 v->HostVMMaxNonCachedPageTableLevels,
4776 v->GPUVMMinPageSize,
4777 v->HostVMMinPageSize,
4778 v->PTEBufferSizeInRequestsForChroma,
4779 v->PitchC[k],
4780 0.0,
4781 &v->MacroTileWidthC[k],
4782 &v->MetaRowBytesC,
4783 &v->DPTEBytesPerRowC,
4784 &v->PTEBufferSizeNotExceededC[i][j][k],
4785 &v->dummyinteger7,
4786 &v->dpte_row_height_chroma[k],
4787 &v->dummyinteger28,
4788 &v->dummyinteger26,
4789 &v->dummyinteger23,
4790 &v->meta_row_height_chroma[k],
4791 &v->dummyinteger8,
4792 &v->dummyinteger9,
4793 &v->dummyinteger19,
4794 &v->dummyinteger20,
4795 &v->dummyinteger17,
4796 &v->dummyinteger10,
4797 &v->dummyinteger11);
4798
4799 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4800 mode_lib,
4801 v->VRatioChroma[k],
4802 v->VTAPsChroma[k],
4803 v->Interlace[k],
4804 v->ProgressiveToInterlaceUnitInOPP,
4805 v->SwathHeightCThisState[k],
4806 v->ViewportYStartC[k],
4807 &v->PrefillC[k],
4808 &v->MaxNumSwC[k]);
4809 } else {
4810 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4811 v->PTEBufferSizeInRequestsForChroma = 0;
4812 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4813 v->MetaRowBytesC = 0.0;
4814 v->DPTEBytesPerRowC = 0.0;
4815 v->PrefetchLinesC[i][j][k] = 0.0;
4816 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4817 }
4818 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4819 mode_lib,
4820 v->DCCEnable[k],
4821 v->Read256BlockHeightY[k],
4822 v->Read256BlockWidthY[k],
4823 v->SourcePixelFormat[k],
4824 v->SurfaceTiling[k],
4825 v->BytePerPixelY[k],
4826 v->SourceScan[k],
4827 v->SwathWidthYThisState[k],
4828 v->ViewportHeight[k],
4829 v->GPUVMEnable,
4830 v->HostVMEnable,
4831 v->HostVMMaxNonCachedPageTableLevels,
4832 v->GPUVMMinPageSize,
4833 v->HostVMMinPageSize,
4834 v->PTEBufferSizeInRequestsForLuma,
4835 v->PitchY[k],
4836 v->DCCMetaPitchY[k],
4837 &v->MacroTileWidthY[k],
4838 &v->MetaRowBytesY,
4839 &v->DPTEBytesPerRowY,
4840 &v->PTEBufferSizeNotExceededY[i][j][k],
4841 &v->dummyinteger7,
4842 &v->dpte_row_height[k],
4843 &v->dummyinteger29,
4844 &v->dummyinteger27,
4845 &v->dummyinteger24,
4846 &v->meta_row_height[k],
4847 &v->dummyinteger25,
4848 &v->dpte_group_bytes[k],
4849 &v->dummyinteger21,
4850 &v->dummyinteger22,
4851 &v->dummyinteger18,
4852 &v->dummyinteger5,
4853 &v->dummyinteger6);
4854 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4855 mode_lib,
4856 v->VRatio[k],
4857 v->vtaps[k],
4858 v->Interlace[k],
4859 v->ProgressiveToInterlaceUnitInOPP,
4860 v->SwathHeightYThisState[k],
4861 v->ViewportYStartY[k],
4862 &v->PrefillY[k],
4863 &v->MaxNumSwY[k]);
4864 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4865 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4866 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4867
4868 CalculateRowBandwidth(
4869 v->GPUVMEnable,
4870 v->SourcePixelFormat[k],
4871 v->VRatio[k],
4872 v->VRatioChroma[k],
4873 v->DCCEnable[k],
4874 v->HTotal[k] / v->PixelClock[k],
4875 v->MetaRowBytesY,
4876 v->MetaRowBytesC,
4877 v->meta_row_height[k],
4878 v->meta_row_height_chroma[k],
4879 v->DPTEBytesPerRowY,
4880 v->DPTEBytesPerRowC,
4881 v->dpte_row_height[k],
4882 v->dpte_row_height_chroma[k],
4883 &v->meta_row_bandwidth[i][j][k],
4884 &v->dpte_row_bandwidth[i][j][k]);
4885 }
4886 /*DCCMetaBufferSizeSupport(i, j) = True
4887 For k = 0 To NumberOfActivePlanes - 1
4888 If MetaRowBytes(i, j, k) > 24064 Then
4889 DCCMetaBufferSizeSupport(i, j) = False
4890 End If
4891 Next k*/
4892 v->DCCMetaBufferSizeSupport[i][j] = true;
4893 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4894 if (v->MetaRowBytes[i][j][k] > 24064)
4895 v->DCCMetaBufferSizeSupport[i][j] = false;
4896 }
4897 v->UrgLatency[i] = CalculateUrgentLatency(
4898 v->UrgentLatencyPixelDataOnly,
4899 v->UrgentLatencyPixelMixedWithVMData,
4900 v->UrgentLatencyVMDataOnly,
4901 v->DoUrgentLatencyAdjustment,
4902 v->UrgentLatencyAdjustmentFabricClockComponent,
4903 v->UrgentLatencyAdjustmentFabricClockReference,
4904 v->FabricClockPerState[i]);
4905
4906 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4907 CalculateUrgentBurstFactor(
4908 v->swath_width_luma_ub_this_state[k],
4909 v->swath_width_chroma_ub_this_state[k],
4910 v->SwathHeightYThisState[k],
4911 v->SwathHeightCThisState[k],
4912 v->HTotal[k] / v->PixelClock[k],
4913 v->UrgLatency[i],
4914 v->CursorBufferSize,
4915 v->CursorWidth[k][0],
4916 v->CursorBPP[k][0],
4917 v->VRatio[k],
4918 v->VRatioChroma[k],
4919 v->BytePerPixelInDETY[k],
4920 v->BytePerPixelInDETC[k],
4921 v->DETBufferSizeYThisState[k],
4922 v->DETBufferSizeCThisState[k],
4923 &v->UrgentBurstFactorCursor[k],
4924 &v->UrgentBurstFactorLuma[k],
4925 &v->UrgentBurstFactorChroma[k],
4926 &NotUrgentLatencyHiding[k]);
4927 }
4928
4929 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4930 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4931 if (NotUrgentLatencyHiding[k]) {
4932 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4933 }
4934 }
4935
4936 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4937 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4938 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4939 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4940 }
4941
4942 v->TotalVActivePixelBandwidth[i][j] = 0;
4943 v->TotalVActiveCursorBandwidth[i][j] = 0;
4944 v->TotalMetaRowBandwidth[i][j] = 0;
4945 v->TotalDPTERowBandwidth[i][j] = 0;
4946 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4947 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4948 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4949 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4950 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4951 }
4952 }
4953 }
4954
4955 //Calculate Return BW
4956 for (i = 0; i < v->soc.num_states; ++i) {
4957 for (j = 0; j <= 1; ++j) {
4958 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4959 if (v->BlendingAndTiming[k] == k) {
4960 if (v->WritebackEnable[k] == true) {
4961 v->WritebackDelayTime[k] = v->WritebackLatency
4962 + CalculateWriteBackDelay(
4963 v->WritebackPixelFormat[k],
4964 v->WritebackHRatio[k],
4965 v->WritebackVRatio[k],
4966 v->WritebackVTaps[k],
4967 v->WritebackDestinationWidth[k],
4968 v->WritebackDestinationHeight[k],
4969 v->WritebackSourceHeight[k],
4970 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4971 } else {
4972 v->WritebackDelayTime[k] = 0.0;
4973 }
4974 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4975 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4976 v->WritebackDelayTime[k] = dml_max(
4977 v->WritebackDelayTime[k],
4978 v->WritebackLatency
4979 + CalculateWriteBackDelay(
4980 v->WritebackPixelFormat[m],
4981 v->WritebackHRatio[m],
4982 v->WritebackVRatio[m],
4983 v->WritebackVTaps[m],
4984 v->WritebackDestinationWidth[m],
4985 v->WritebackDestinationHeight[m],
4986 v->WritebackSourceHeight[m],
4987 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4988 }
4989 }
4990 }
4991 }
4992 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4993 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4994 if (v->BlendingAndTiming[k] == m) {
4995 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4996 }
4997 }
4998 }
4999 v->MaxMaxVStartup[i][j] = 0;
5000 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5001 v->MaximumVStartup[i][j][k] =
5002 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5003 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5004 v->VTotal[k] - v->VActive[k]
5005 - dml_max(
5006 1.0,
5007 dml_ceil(
5008 1.0 * v->WritebackDelayTime[k]
5009 / (v->HTotal[k]
5010 / v->PixelClock[k]),
5011 1.0));
5012 if (v->MaximumVStartup[i][j][k] > 1023)
5013 v->MaximumVStartup[i][j][k] = 1023;
5014 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5015 }
5016 }
5017 }
5018
5019 ReorderingBytes = v->NumberOfChannels
5020 * dml_max3(
5021 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5022 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5023 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5024
5025 for (i = 0; i < v->soc.num_states; ++i) {
5026 for (j = 0; j <= 1; ++j) {
5027 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5028 }
5029 }
5030
5031 if (v->UseMinimumRequiredDCFCLK == true)
5032 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5033
5034 for (i = 0; i < v->soc.num_states; ++i) {
5035 for (j = 0; j <= 1; ++j) {
5036 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5037 v->ReturnBusWidth * v->DCFCLKState[i][j],
5038 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5039 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5040 double PixelDataOnlyReturnBWPerState = dml_min(
5041 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5042 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5043 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5044 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5045 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5046
5047 if (v->HostVMEnable != true) {
5048 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5049 } else {
5050 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5051 }
5052 }
5053 }
5054
5055 //Re-ordering Buffer Support Check
5056 for (i = 0; i < v->soc.num_states; ++i) {
5057 for (j = 0; j <= 1; ++j) {
5058 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5059 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5060 v->ROBSupport[i][j] = true;
5061 } else {
5062 v->ROBSupport[i][j] = false;
5063 }
5064 }
5065 }
5066
5067 //Vertical Active BW support check
5068
5069 MaxTotalVActiveRDBandwidth = 0;
5070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5071 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5072 }
5073
5074 for (i = 0; i < v->soc.num_states; ++i) {
5075 for (j = 0; j <= 1; ++j) {
5076 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5077 dml_min(
5078 v->ReturnBusWidth * v->DCFCLKState[i][j],
5079 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5080 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5081 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5082 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5083
5084 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5085 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5086 } else {
5087 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5088 }
5089 }
5090 }
5091
5092 v->UrgentLatency = CalculateUrgentLatency(
5093 v->UrgentLatencyPixelDataOnly,
5094 v->UrgentLatencyPixelMixedWithVMData,
5095 v->UrgentLatencyVMDataOnly,
5096 v->DoUrgentLatencyAdjustment,
5097 v->UrgentLatencyAdjustmentFabricClockComponent,
5098 v->UrgentLatencyAdjustmentFabricClockReference,
5099 v->FabricClock);
5100 //Prefetch Check
5101 for (i = 0; i < v->soc.num_states; ++i) {
5102 for (j = 0; j <= 1; ++j) {
5103 double VMDataOnlyReturnBWPerState;
5104 double HostVMInefficiencyFactor = 1;
5105 int NextPrefetchModeState = MinPrefetchMode;
5106 bool UnboundedRequestEnabledThisState = false;
5107 int CompressedBufferSizeInkByteThisState = 0;
5108 double dummy;
5109
5110 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5111
5112 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5113 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5114 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5115 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5116 }
5117
5118 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5119 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5120 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5121 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5122 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5123 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5124 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5125 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5126 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5127 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5128 }
5129
5130 VMDataOnlyReturnBWPerState = dml_min(
5131 dml_min(
5132 v->ReturnBusWidth * v->DCFCLKState[i][j],
5133 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5134 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5135 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5136 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5137 if (v->GPUVMEnable && v->HostVMEnable)
5138 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5139
5140 v->ExtraLatency = CalculateExtraLatency(
5141 v->RoundTripPingLatencyCycles,
5142 ReorderingBytes,
5143 v->DCFCLKState[i][j],
5144 v->TotalNumberOfActiveDPP[i][j],
5145 v->PixelChunkSizeInKByte,
5146 v->TotalNumberOfDCCActiveDPP[i][j],
5147 v->MetaChunkSize,
5148 v->ReturnBWPerState[i][j],
5149 v->GPUVMEnable,
5150 v->HostVMEnable,
5151 v->NumberOfActivePlanes,
5152 v->NoOfDPPThisState,
5153 v->dpte_group_bytes,
5154 HostVMInefficiencyFactor,
5155 v->HostVMMinPageSize,
5156 v->HostVMMaxNonCachedPageTableLevels);
5157
5158 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5159 do {
5160 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5161 v->MaxVStartup = v->NextMaxVStartup;
5162
5163 v->TWait = CalculateTWait(
5164 v->PrefetchModePerState[i][j],
5165 v->DRAMClockChangeLatency,
5166 v->UrgLatency[i],
5167 v->SREnterPlusExitTime);
5168
5169 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5170 CalculatePrefetchSchedulePerPlane(mode_lib,
5171 HostVMInefficiencyFactor,
5172 i, j, k);
5173 }
5174
5175 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5176 CalculateUrgentBurstFactor(
5177 v->swath_width_luma_ub_this_state[k],
5178 v->swath_width_chroma_ub_this_state[k],
5179 v->SwathHeightYThisState[k],
5180 v->SwathHeightCThisState[k],
5181 v->HTotal[k] / v->PixelClock[k],
5182 v->UrgLatency[i],
5183 v->CursorBufferSize,
5184 v->CursorWidth[k][0],
5185 v->CursorBPP[k][0],
5186 v->VRatioPreY[i][j][k],
5187 v->VRatioPreC[i][j][k],
5188 v->BytePerPixelInDETY[k],
5189 v->BytePerPixelInDETC[k],
5190 v->DETBufferSizeYThisState[k],
5191 v->DETBufferSizeCThisState[k],
5192 &v->UrgentBurstFactorCursorPre[k],
5193 &v->UrgentBurstFactorLumaPre[k],
5194 &v->UrgentBurstFactorChromaPre[k],
5195 &v->NotUrgentLatencyHidingPre[k]);
5196 }
5197
5198 v->MaximumReadBandwidthWithPrefetch = 0.0;
5199 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5200 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5201 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5202
5203 v->MaximumReadBandwidthWithPrefetch =
5204 v->MaximumReadBandwidthWithPrefetch
5205 + dml_max3(
5206 v->VActivePixelBandwidth[i][j][k]
5207 + v->VActiveCursorBandwidth[i][j][k]
5208 + v->NoOfDPP[i][j][k]
5209 * (v->meta_row_bandwidth[i][j][k]
5210 + v->dpte_row_bandwidth[i][j][k]),
5211 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5212 v->NoOfDPP[i][j][k]
5213 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5214 * v->UrgentBurstFactorLumaPre[k]
5215 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5216 * v->UrgentBurstFactorChromaPre[k])
5217 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5218 }
5219
5220 v->NotEnoughUrgentLatencyHidingPre = false;
5221 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5222 if (v->NotUrgentLatencyHidingPre[k] == true) {
5223 v->NotEnoughUrgentLatencyHidingPre = true;
5224 }
5225 }
5226
5227 v->PrefetchSupported[i][j] = true;
5228 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5229 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5230 v->PrefetchSupported[i][j] = false;
5231 }
5232 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5233 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5234 || v->NoTimeForPrefetch[i][j][k] == true) {
5235 v->PrefetchSupported[i][j] = false;
5236 }
5237 }
5238
5239 v->DynamicMetadataSupported[i][j] = true;
5240 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5241 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5242 v->DynamicMetadataSupported[i][j] = false;
5243 }
5244 }
5245
5246 v->VRatioInPrefetchSupported[i][j] = true;
5247 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5248 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5249 v->VRatioInPrefetchSupported[i][j] = false;
5250 }
5251 }
5252 v->AnyLinesForVMOrRowTooLarge = false;
5253 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5254 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5255 v->AnyLinesForVMOrRowTooLarge = true;
5256 }
5257 }
5258
5259 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5260
5261 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5262 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5263 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5264 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5265 - dml_max(
5266 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5267 v->NoOfDPP[i][j][k]
5268 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5269 * v->UrgentBurstFactorLumaPre[k]
5270 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5271 * v->UrgentBurstFactorChromaPre[k])
5272 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5273 }
5274 v->TotImmediateFlipBytes = 0.0;
5275 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5276 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5277 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5278 + v->DPTEBytesPerRow[i][j][k]);
5279 }
5280
5281 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5282 CalculateFlipSchedule(
5283 mode_lib,
5284 k,
5285 HostVMInefficiencyFactor,
5286 v->ExtraLatency,
5287 v->UrgLatency[i],
5288 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5289 v->MetaRowBytes[i][j][k],
5290 v->DPTEBytesPerRow[i][j][k]);
5291 }
5292 v->total_dcn_read_bw_with_flip = 0.0;
5293 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5294 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5295 + dml_max3(
5296 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5297 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5298 + v->VActiveCursorBandwidth[i][j][k],
5299 v->NoOfDPP[i][j][k]
5300 * (v->final_flip_bw[k]
5301 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5302 * v->UrgentBurstFactorLumaPre[k]
5303 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5304 * v->UrgentBurstFactorChromaPre[k])
5305 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5306 }
5307 v->ImmediateFlipSupportedForState[i][j] = true;
5308 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5309 v->ImmediateFlipSupportedForState[i][j] = false;
5310 }
5311 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5312 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5313 v->ImmediateFlipSupportedForState[i][j] = false;
5314 }
5315 }
5316 } else {
5317 v->ImmediateFlipSupportedForState[i][j] = false;
5318 }
5319
5320 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5321 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5322 NextPrefetchModeState = NextPrefetchModeState + 1;
5323 } else {
5324 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5325 }
5326 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5327 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5328 && ((v->HostVMEnable == false &&
5329 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5330 || v->ImmediateFlipSupportedForState[i][j] == true))
5331 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5332
5333 CalculateUnboundedRequestAndCompressedBufferSize(
5334 v->DETBufferSizeInKByte[0],
5335 v->ConfigReturnBufferSizeInKByte,
5336 v->UseUnboundedRequesting,
5337 v->TotalNumberOfActiveDPP[i][j],
5338 NoChroma,
5339 v->MaxNumDPP,
5340 v->CompressedBufferSegmentSizeInkByte,
5341 v->Output,
5342 &UnboundedRequestEnabledThisState,
5343 &CompressedBufferSizeInkByteThisState);
5344
5345 CalculateWatermarksAndDRAMSpeedChangeSupport(
5346 mode_lib,
5347 v->PrefetchModePerState[i][j],
5348 v->DCFCLKState[i][j],
5349 v->ReturnBWPerState[i][j],
5350 v->UrgLatency[i],
5351 v->ExtraLatency,
5352 v->SOCCLKPerState[i],
5353 v->ProjectedDCFCLKDeepSleep[i][j],
5354 v->DETBufferSizeYThisState,
5355 v->DETBufferSizeCThisState,
5356 v->SwathHeightYThisState,
5357 v->SwathHeightCThisState,
5358 v->SwathWidthYThisState,
5359 v->SwathWidthCThisState,
5360 v->NoOfDPPThisState,
5361 v->BytePerPixelInDETY,
5362 v->BytePerPixelInDETC,
5363 UnboundedRequestEnabledThisState,
5364 CompressedBufferSizeInkByteThisState,
5365 &v->DRAMClockChangeSupport[i][j],
5366 &dummy,
5367 &dummy,
5368 &dummy,
5369 &dummy);
5370 }
5371 }
5372
5373 /*PTE Buffer Size Check*/
5374 for (i = 0; i < v->soc.num_states; i++) {
5375 for (j = 0; j < 2; j++) {
5376 v->PTEBufferSizeNotExceeded[i][j] = true;
5377 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5378 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5379 v->PTEBufferSizeNotExceeded[i][j] = false;
5380 }
5381 }
5382 }
5383 }
5384
5385 /*Cursor Support Check*/
5386 v->CursorSupport = true;
5387 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5388 if (v->CursorWidth[k][0] > 0.0) {
5389 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5390 v->CursorSupport = false;
5391 }
5392 }
5393 }
5394
5395 /*Valid Pitch Check*/
5396 v->PitchSupport = true;
5397 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5398 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5399 if (v->DCCEnable[k] == true) {
5400 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5401 } else {
5402 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5403 }
5404 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5405 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5406 && v->SourcePixelFormat[k] != dm_mono_8) {
5407 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5408 if (v->DCCEnable[k] == true) {
5409 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5410 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5411 64.0 * v->Read256BlockWidthC[k]);
5412 } else {
5413 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5414 }
5415 } else {
5416 v->AlignedCPitch[k] = v->PitchC[k];
5417 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5418 }
5419 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5420 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5421 v->PitchSupport = false;
5422 }
5423 }
5424
5425 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5426 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5427 ViewportExceedsSurface = true;
5428 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5429 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5430 && v->SourcePixelFormat[k] != dm_rgbe) {
5431 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5432 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5433 ViewportExceedsSurface = true;
5434 }
5435 }
5436 }
5437 }
5438
5439 /*Mode Support, Voltage State and SOC Configuration*/
5440 for (i = v->soc.num_states - 1; i >= 0; i--) {
5441 for (j = 0; j < 2; j++) {
5442 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5443 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5444 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5445 && v->DTBCLKRequiredMoreThanSupported[i] == false
5446 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5447 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5448 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5449 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5450 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5451 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5452 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5453 && ((v->HostVMEnable == false
5454 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5455 || v->ImmediateFlipSupportedForState[i][j] == true)
5456 && FMTBufferExceeded == false) {
5457 v->ModeSupport[i][j] = true;
5458 } else {
5459 v->ModeSupport[i][j] = false;
5460 #ifdef __DML_VBA_DEBUG__
5461 if (v->ScaleRatioAndTapsSupport == false)
5462 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed");
5463 if (v->SourceFormatPixelAndScanSupport == false)
5464 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed");
5465 if (v->ViewportSizeSupport[i][j] == false)
5466 dml_print("DML SUPPORT: ViewportSizeSupport failed");
5467 if (v->LinkCapacitySupport[i] == false)
5468 dml_print("DML SUPPORT: LinkCapacitySupport failed");
5469 if (v->ODMCombine4To1SupportCheckOK[i] == false)
5470 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5471 if (v->NotEnoughDSCUnits[i] == true)
5472 dml_print("DML SUPPORT: NotEnoughDSCUnits");
5473 if (v->DTBCLKRequiredMoreThanSupported[i] == true)
5474 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported");
5475 if (v->ROBSupport[i][j] == false)
5476 dml_print("DML SUPPORT: ROBSupport failed");
5477 if (v->DISPCLK_DPPCLK_Support[i][j] == false)
5478 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed");
5479 if (v->TotalAvailablePipesSupport[i][j] == false)
5480 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5481 if (EnoughWritebackUnits == false)
5482 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5483 if (v->WritebackLatencySupport == false)
5484 dml_print("DML SUPPORT: WritebackLatencySupport failed");
5485 if (v->WritebackScaleRatioAndTapsSupport == false)
5486 dml_print("DML SUPPORT: DSC422NativeNotSupported ");
5487 if (v->CursorSupport == false)
5488 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5489 if (v->PitchSupport == false)
5490 dml_print("DML SUPPORT: PitchSupport failed");
5491 if (ViewportExceedsSurface == true)
5492 dml_print("DML SUPPORT: ViewportExceedsSurface failed");
5493 if (v->PrefetchSupported[i][j] == false)
5494 dml_print("DML SUPPORT: PrefetchSupported failed");
5495 if (v->DynamicMetadataSupported[i][j] == false)
5496 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5497 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
5498 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed");
5499 if (v->VRatioInPrefetchSupported[i][j] == false)
5500 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed");
5501 if (v->PTEBufferSizeNotExceeded[i][j] == false)
5502 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed");
5503 if (v->NonsupportedDSCInputBPC == true)
5504 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed");
5505 if (!((v->HostVMEnable == false
5506 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5507 || v->ImmediateFlipSupportedForState[i][j] == true))
5508 dml_print("DML SUPPORT: ImmediateFlipRequirement failed");
5509 if (FMTBufferExceeded == true)
5510 dml_print("DML SUPPORT: FMTBufferExceeded failed");
5511 #endif
5512 }
5513 }
5514 }
5515
5516 {
5517 unsigned int MaximumMPCCombine = 0;
5518 for (i = v->soc.num_states; i >= 0; i--) {
5519 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5520 v->VoltageLevel = i;
5521 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5522 if (v->ModeSupport[i][0] == true) {
5523 MaximumMPCCombine = 0;
5524 } else {
5525 MaximumMPCCombine = 1;
5526 }
5527 }
5528 }
5529 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5530 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5531 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5532 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5533 }
5534 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5535 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5536 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5537 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5538 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5539 v->maxMpcComb = MaximumMPCCombine;
5540 }
5541 }
5542
5543 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5544 struct display_mode_lib *mode_lib,
5545 unsigned int PrefetchMode,
5546 double DCFCLK,
5547 double ReturnBW,
5548 double UrgentLatency,
5549 double ExtraLatency,
5550 double SOCCLK,
5551 double DCFCLKDeepSleep,
5552 unsigned int DETBufferSizeY[],
5553 unsigned int DETBufferSizeC[],
5554 unsigned int SwathHeightY[],
5555 unsigned int SwathHeightC[],
5556 double SwathWidthY[],
5557 double SwathWidthC[],
5558 unsigned int DPPPerPlane[],
5559 double BytePerPixelDETY[],
5560 double BytePerPixelDETC[],
5561 bool UnboundedRequestEnabled,
5562 int unsigned CompressedBufferSizeInkByte,
5563 enum clock_change_support *DRAMClockChangeSupport,
5564 double *StutterExitWatermark,
5565 double *StutterEnterPlusExitWatermark,
5566 double *Z8StutterExitWatermark,
5567 double *Z8StutterEnterPlusExitWatermark)
5568 {
5569 struct vba_vars_st *v = &mode_lib->vba;
5570 double EffectiveLBLatencyHidingY;
5571 double EffectiveLBLatencyHidingC;
5572 double LinesInDETY[DC__NUM_DPP__MAX];
5573 double LinesInDETC;
5574 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5575 unsigned int LinesInDETCRoundedDownToSwath;
5576 double FullDETBufferingTimeY;
5577 double FullDETBufferingTimeC;
5578 double ActiveDRAMClockChangeLatencyMarginY;
5579 double ActiveDRAMClockChangeLatencyMarginC;
5580 double WritebackDRAMClockChangeLatencyMargin;
5581 double PlaneWithMinActiveDRAMClockChangeMargin;
5582 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5583 double WritebackDRAMClockChangeLatencyHiding;
5584 double TotalPixelBW = 0.0;
5585 int k, j;
5586
5587 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5588
5589 #ifdef __DML_VBA_DEBUG__
5590 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5591 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5592 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5593 #endif
5594
5595 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5596
5597 #ifdef __DML_VBA_DEBUG__
5598 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5599 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5600 #endif
5601
5602 v->TotalActiveWriteback = 0;
5603 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5604 if (v->WritebackEnable[k] == true) {
5605 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5606 }
5607 }
5608
5609 if (v->TotalActiveWriteback <= 1) {
5610 v->WritebackUrgentWatermark = v->WritebackLatency;
5611 } else {
5612 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5613 }
5614
5615 if (v->TotalActiveWriteback <= 1) {
5616 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5617 } else {
5618 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5619 }
5620
5621 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5622 TotalPixelBW = TotalPixelBW
5623 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5624 / (v->HTotal[k] / v->PixelClock[k]);
5625 }
5626
5627 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5628 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5629
5630 v->LBLatencyHidingSourceLinesY = dml_min(
5631 (double) v->MaxLineBufferLines,
5632 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5633
5634 v->LBLatencyHidingSourceLinesC = dml_min(
5635 (double) v->MaxLineBufferLines,
5636 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5637
5638 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5639
5640 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5641
5642 if (UnboundedRequestEnabled) {
5643 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5644 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5645 }
5646
5647 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5648 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5649 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5650 if (BytePerPixelDETC[k] > 0) {
5651 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5652 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5653 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5654 } else {
5655 LinesInDETC = 0;
5656 FullDETBufferingTimeC = 999999;
5657 }
5658
5659 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5660 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5661
5662 if (v->NumberOfActivePlanes > 1) {
5663 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5664 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5665 }
5666
5667 if (BytePerPixelDETC[k] > 0) {
5668 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5669 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5670
5671 if (v->NumberOfActivePlanes > 1) {
5672 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5673 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5674 }
5675 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5676 } else {
5677 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5678 }
5679
5680 if (v->WritebackEnable[k] == true) {
5681 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5682 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5683 if (v->WritebackPixelFormat[k] == dm_444_64) {
5684 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5685 }
5686 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5687 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5688 }
5689 }
5690
5691 v->MinActiveDRAMClockChangeMargin = 999999;
5692 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5693 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5694 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5695 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5696 if (v->BlendingAndTiming[k] == k) {
5697 PlaneWithMinActiveDRAMClockChangeMargin = k;
5698 } else {
5699 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5700 if (v->BlendingAndTiming[k] == j) {
5701 PlaneWithMinActiveDRAMClockChangeMargin = j;
5702 }
5703 }
5704 }
5705 }
5706 }
5707
5708 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5709
5710 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5711 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5712 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5713 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5714 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5715 }
5716 }
5717
5718 v->TotalNumberOfActiveOTG = 0;
5719
5720 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5721 if (v->BlendingAndTiming[k] == k) {
5722 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5723 }
5724 }
5725
5726 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5727 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5728 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5729 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5730 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5731 } else {
5732 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5733 }
5734
5735 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5736 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5737 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5738 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5739
5740 #ifdef __DML_VBA_DEBUG__
5741 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5742 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5743 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5744 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5745 #endif
5746 }
5747
5748 static void CalculateDCFCLKDeepSleep(
5749 struct display_mode_lib *mode_lib,
5750 unsigned int NumberOfActivePlanes,
5751 int BytePerPixelY[],
5752 int BytePerPixelC[],
5753 double VRatio[],
5754 double VRatioChroma[],
5755 double SwathWidthY[],
5756 double SwathWidthC[],
5757 unsigned int DPPPerPlane[],
5758 double HRatio[],
5759 double HRatioChroma[],
5760 double PixelClock[],
5761 double PSCL_THROUGHPUT[],
5762 double PSCL_THROUGHPUT_CHROMA[],
5763 double DPPCLK[],
5764 double ReadBandwidthLuma[],
5765 double ReadBandwidthChroma[],
5766 int ReturnBusWidth,
5767 double *DCFCLKDeepSleep)
5768 {
5769 struct vba_vars_st *v = &mode_lib->vba;
5770 double DisplayPipeLineDeliveryTimeLuma;
5771 double DisplayPipeLineDeliveryTimeChroma;
5772 double ReadBandwidth = 0.0;
5773 int k;
5774
5775 for (k = 0; k < NumberOfActivePlanes; ++k) {
5776
5777 if (VRatio[k] <= 1) {
5778 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5779 } else {
5780 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5781 }
5782 if (BytePerPixelC[k] == 0) {
5783 DisplayPipeLineDeliveryTimeChroma = 0;
5784 } else {
5785 if (VRatioChroma[k] <= 1) {
5786 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5787 } else {
5788 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5789 }
5790 }
5791
5792 if (BytePerPixelC[k] > 0) {
5793 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5794 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5795 } else {
5796 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5797 }
5798 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5799
5800 }
5801
5802 for (k = 0; k < NumberOfActivePlanes; ++k) {
5803 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5804 }
5805
5806 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5807
5808 for (k = 0; k < NumberOfActivePlanes; ++k) {
5809 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5810 }
5811 }
5812
5813 static void CalculateUrgentBurstFactor(
5814 int swath_width_luma_ub,
5815 int swath_width_chroma_ub,
5816 unsigned int SwathHeightY,
5817 unsigned int SwathHeightC,
5818 double LineTime,
5819 double UrgentLatency,
5820 double CursorBufferSize,
5821 unsigned int CursorWidth,
5822 unsigned int CursorBPP,
5823 double VRatio,
5824 double VRatioC,
5825 double BytePerPixelInDETY,
5826 double BytePerPixelInDETC,
5827 double DETBufferSizeY,
5828 double DETBufferSizeC,
5829 double *UrgentBurstFactorCursor,
5830 double *UrgentBurstFactorLuma,
5831 double *UrgentBurstFactorChroma,
5832 bool *NotEnoughUrgentLatencyHiding)
5833 {
5834 double LinesInDETLuma;
5835 double LinesInDETChroma;
5836 unsigned int LinesInCursorBuffer;
5837 double CursorBufferSizeInTime;
5838 double DETBufferSizeInTimeLuma;
5839 double DETBufferSizeInTimeChroma;
5840
5841 *NotEnoughUrgentLatencyHiding = 0;
5842
5843 if (CursorWidth > 0) {
5844 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5845 if (VRatio > 0) {
5846 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5847 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5848 *NotEnoughUrgentLatencyHiding = 1;
5849 *UrgentBurstFactorCursor = 0;
5850 } else {
5851 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5852 }
5853 } else {
5854 *UrgentBurstFactorCursor = 1;
5855 }
5856 }
5857
5858 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5859 if (VRatio > 0) {
5860 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5861 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5862 *NotEnoughUrgentLatencyHiding = 1;
5863 *UrgentBurstFactorLuma = 0;
5864 } else {
5865 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5866 }
5867 } else {
5868 *UrgentBurstFactorLuma = 1;
5869 }
5870
5871 if (BytePerPixelInDETC > 0) {
5872 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5873 if (VRatio > 0) {
5874 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5875 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5876 *NotEnoughUrgentLatencyHiding = 1;
5877 *UrgentBurstFactorChroma = 0;
5878 } else {
5879 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5880 }
5881 } else {
5882 *UrgentBurstFactorChroma = 1;
5883 }
5884 }
5885 }
5886
5887 static void CalculatePixelDeliveryTimes(
5888 unsigned int NumberOfActivePlanes,
5889 double VRatio[],
5890 double VRatioChroma[],
5891 double VRatioPrefetchY[],
5892 double VRatioPrefetchC[],
5893 unsigned int swath_width_luma_ub[],
5894 unsigned int swath_width_chroma_ub[],
5895 unsigned int DPPPerPlane[],
5896 double HRatio[],
5897 double HRatioChroma[],
5898 double PixelClock[],
5899 double PSCL_THROUGHPUT[],
5900 double PSCL_THROUGHPUT_CHROMA[],
5901 double DPPCLK[],
5902 int BytePerPixelC[],
5903 enum scan_direction_class SourceScan[],
5904 unsigned int NumberOfCursors[],
5905 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5906 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5907 unsigned int BlockWidth256BytesY[],
5908 unsigned int BlockHeight256BytesY[],
5909 unsigned int BlockWidth256BytesC[],
5910 unsigned int BlockHeight256BytesC[],
5911 double DisplayPipeLineDeliveryTimeLuma[],
5912 double DisplayPipeLineDeliveryTimeChroma[],
5913 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5914 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5915 double DisplayPipeRequestDeliveryTimeLuma[],
5916 double DisplayPipeRequestDeliveryTimeChroma[],
5917 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5918 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5919 double CursorRequestDeliveryTime[],
5920 double CursorRequestDeliveryTimePrefetch[])
5921 {
5922 double req_per_swath_ub;
5923 int k;
5924
5925 for (k = 0; k < NumberOfActivePlanes; ++k) {
5926 if (VRatio[k] <= 1) {
5927 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5928 } else {
5929 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5930 }
5931
5932 if (BytePerPixelC[k] == 0) {
5933 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5934 } else {
5935 if (VRatioChroma[k] <= 1) {
5936 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5937 } else {
5938 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5939 }
5940 }
5941
5942 if (VRatioPrefetchY[k] <= 1) {
5943 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5944 } else {
5945 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5946 }
5947
5948 if (BytePerPixelC[k] == 0) {
5949 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5950 } else {
5951 if (VRatioPrefetchC[k] <= 1) {
5952 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5953 } else {
5954 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5955 }
5956 }
5957 }
5958
5959 for (k = 0; k < NumberOfActivePlanes; ++k) {
5960 if (SourceScan[k] != dm_vert) {
5961 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5962 } else {
5963 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5964 }
5965 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5966 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5967 if (BytePerPixelC[k] == 0) {
5968 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5969 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5970 } else {
5971 if (SourceScan[k] != dm_vert) {
5972 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5973 } else {
5974 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5975 }
5976 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5977 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5978 }
5979 #ifdef __DML_VBA_DEBUG__
5980 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5981 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5982 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5983 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5984 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5985 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5986 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5987 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5988 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5989 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5990 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5991 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5992 #endif
5993 }
5994
5995 for (k = 0; k < NumberOfActivePlanes; ++k) {
5996 int cursor_req_per_width;
5997 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5998 if (NumberOfCursors[k] > 0) {
5999 if (VRatio[k] <= 1) {
6000 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6001 } else {
6002 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6003 }
6004 if (VRatioPrefetchY[k] <= 1) {
6005 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6006 } else {
6007 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6008 }
6009 } else {
6010 CursorRequestDeliveryTime[k] = 0;
6011 CursorRequestDeliveryTimePrefetch[k] = 0;
6012 }
6013 #ifdef __DML_VBA_DEBUG__
6014 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6015 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6016 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6017 #endif
6018 }
6019 }
6020
6021 static void CalculateMetaAndPTETimes(
6022 int NumberOfActivePlanes,
6023 bool GPUVMEnable,
6024 int MetaChunkSize,
6025 int MinMetaChunkSizeBytes,
6026 int HTotal[],
6027 double VRatio[],
6028 double VRatioChroma[],
6029 double DestinationLinesToRequestRowInVBlank[],
6030 double DestinationLinesToRequestRowInImmediateFlip[],
6031 bool DCCEnable[],
6032 double PixelClock[],
6033 int BytePerPixelY[],
6034 int BytePerPixelC[],
6035 enum scan_direction_class SourceScan[],
6036 int dpte_row_height[],
6037 int dpte_row_height_chroma[],
6038 int meta_row_width[],
6039 int meta_row_width_chroma[],
6040 int meta_row_height[],
6041 int meta_row_height_chroma[],
6042 int meta_req_width[],
6043 int meta_req_width_chroma[],
6044 int meta_req_height[],
6045 int meta_req_height_chroma[],
6046 int dpte_group_bytes[],
6047 int PTERequestSizeY[],
6048 int PTERequestSizeC[],
6049 int PixelPTEReqWidthY[],
6050 int PixelPTEReqHeightY[],
6051 int PixelPTEReqWidthC[],
6052 int PixelPTEReqHeightC[],
6053 int dpte_row_width_luma_ub[],
6054 int dpte_row_width_chroma_ub[],
6055 double DST_Y_PER_PTE_ROW_NOM_L[],
6056 double DST_Y_PER_PTE_ROW_NOM_C[],
6057 double DST_Y_PER_META_ROW_NOM_L[],
6058 double DST_Y_PER_META_ROW_NOM_C[],
6059 double TimePerMetaChunkNominal[],
6060 double TimePerChromaMetaChunkNominal[],
6061 double TimePerMetaChunkVBlank[],
6062 double TimePerChromaMetaChunkVBlank[],
6063 double TimePerMetaChunkFlip[],
6064 double TimePerChromaMetaChunkFlip[],
6065 double time_per_pte_group_nom_luma[],
6066 double time_per_pte_group_vblank_luma[],
6067 double time_per_pte_group_flip_luma[],
6068 double time_per_pte_group_nom_chroma[],
6069 double time_per_pte_group_vblank_chroma[],
6070 double time_per_pte_group_flip_chroma[])
6071 {
6072 unsigned int meta_chunk_width;
6073 unsigned int min_meta_chunk_width;
6074 unsigned int meta_chunk_per_row_int;
6075 unsigned int meta_row_remainder;
6076 unsigned int meta_chunk_threshold;
6077 unsigned int meta_chunks_per_row_ub;
6078 unsigned int meta_chunk_width_chroma;
6079 unsigned int min_meta_chunk_width_chroma;
6080 unsigned int meta_chunk_per_row_int_chroma;
6081 unsigned int meta_row_remainder_chroma;
6082 unsigned int meta_chunk_threshold_chroma;
6083 unsigned int meta_chunks_per_row_ub_chroma;
6084 unsigned int dpte_group_width_luma;
6085 unsigned int dpte_groups_per_row_luma_ub;
6086 unsigned int dpte_group_width_chroma;
6087 unsigned int dpte_groups_per_row_chroma_ub;
6088 int k;
6089
6090 for (k = 0; k < NumberOfActivePlanes; ++k) {
6091 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6092 if (BytePerPixelC[k] == 0) {
6093 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6094 } else {
6095 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6096 }
6097 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6098 if (BytePerPixelC[k] == 0) {
6099 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6100 } else {
6101 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6102 }
6103 }
6104
6105 for (k = 0; k < NumberOfActivePlanes; ++k) {
6106 if (DCCEnable[k] == true) {
6107 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6108 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6109 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6110 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6111 if (SourceScan[k] != dm_vert) {
6112 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6113 } else {
6114 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6115 }
6116 if (meta_row_remainder <= meta_chunk_threshold) {
6117 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6118 } else {
6119 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6120 }
6121 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6122 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6123 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6124 if (BytePerPixelC[k] == 0) {
6125 TimePerChromaMetaChunkNominal[k] = 0;
6126 TimePerChromaMetaChunkVBlank[k] = 0;
6127 TimePerChromaMetaChunkFlip[k] = 0;
6128 } else {
6129 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6130 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6131 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6132 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6133 if (SourceScan[k] != dm_vert) {
6134 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6135 } else {
6136 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6137 }
6138 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6139 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6140 } else {
6141 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6142 }
6143 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6144 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6145 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6146 }
6147 } else {
6148 TimePerMetaChunkNominal[k] = 0;
6149 TimePerMetaChunkVBlank[k] = 0;
6150 TimePerMetaChunkFlip[k] = 0;
6151 TimePerChromaMetaChunkNominal[k] = 0;
6152 TimePerChromaMetaChunkVBlank[k] = 0;
6153 TimePerChromaMetaChunkFlip[k] = 0;
6154 }
6155 }
6156
6157 for (k = 0; k < NumberOfActivePlanes; ++k) {
6158 if (GPUVMEnable == true) {
6159 if (SourceScan[k] != dm_vert) {
6160 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6161 } else {
6162 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6163 }
6164 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6165 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6166 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6167 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6168 if (BytePerPixelC[k] == 0) {
6169 time_per_pte_group_nom_chroma[k] = 0;
6170 time_per_pte_group_vblank_chroma[k] = 0;
6171 time_per_pte_group_flip_chroma[k] = 0;
6172 } else {
6173 if (SourceScan[k] != dm_vert) {
6174 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6175 } else {
6176 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6177 }
6178 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6179 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6180 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6181 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6182 }
6183 } else {
6184 time_per_pte_group_nom_luma[k] = 0;
6185 time_per_pte_group_vblank_luma[k] = 0;
6186 time_per_pte_group_flip_luma[k] = 0;
6187 time_per_pte_group_nom_chroma[k] = 0;
6188 time_per_pte_group_vblank_chroma[k] = 0;
6189 time_per_pte_group_flip_chroma[k] = 0;
6190 }
6191 }
6192 }
6193
6194 static void CalculateVMGroupAndRequestTimes(
6195 unsigned int NumberOfActivePlanes,
6196 bool GPUVMEnable,
6197 unsigned int GPUVMMaxPageTableLevels,
6198 unsigned int HTotal[],
6199 int BytePerPixelC[],
6200 double DestinationLinesToRequestVMInVBlank[],
6201 double DestinationLinesToRequestVMInImmediateFlip[],
6202 bool DCCEnable[],
6203 double PixelClock[],
6204 int dpte_row_width_luma_ub[],
6205 int dpte_row_width_chroma_ub[],
6206 int vm_group_bytes[],
6207 unsigned int dpde0_bytes_per_frame_ub_l[],
6208 unsigned int dpde0_bytes_per_frame_ub_c[],
6209 int meta_pte_bytes_per_frame_ub_l[],
6210 int meta_pte_bytes_per_frame_ub_c[],
6211 double TimePerVMGroupVBlank[],
6212 double TimePerVMGroupFlip[],
6213 double TimePerVMRequestVBlank[],
6214 double TimePerVMRequestFlip[])
6215 {
6216 int num_group_per_lower_vm_stage;
6217 int num_req_per_lower_vm_stage;
6218 int k;
6219
6220 for (k = 0; k < NumberOfActivePlanes; ++k) {
6221 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6222 if (DCCEnable[k] == false) {
6223 if (BytePerPixelC[k] > 0) {
6224 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6225 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6226 } else {
6227 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6228 }
6229 } else {
6230 if (GPUVMMaxPageTableLevels == 1) {
6231 if (BytePerPixelC[k] > 0) {
6232 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6233 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6234 } else {
6235 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6236 }
6237 } else {
6238 if (BytePerPixelC[k] > 0) {
6239 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6240 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6241 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6242 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6243 } else {
6244 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6245 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6246 }
6247 }
6248 }
6249
6250 if (DCCEnable[k] == false) {
6251 if (BytePerPixelC[k] > 0) {
6252 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6253 } else {
6254 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6255 }
6256 } else {
6257 if (GPUVMMaxPageTableLevels == 1) {
6258 if (BytePerPixelC[k] > 0) {
6259 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6260 } else {
6261 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6262 }
6263 } else {
6264 if (BytePerPixelC[k] > 0) {
6265 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6266 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6267 } else {
6268 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6269 }
6270 }
6271 }
6272
6273 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6274 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6275 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6276 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6277
6278 if (GPUVMMaxPageTableLevels > 2) {
6279 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6280 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6281 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6282 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6283 }
6284
6285 } else {
6286 TimePerVMGroupVBlank[k] = 0;
6287 TimePerVMGroupFlip[k] = 0;
6288 TimePerVMRequestVBlank[k] = 0;
6289 TimePerVMRequestFlip[k] = 0;
6290 }
6291 }
6292 }
6293
6294 static void CalculateStutterEfficiency(
6295 struct display_mode_lib *mode_lib,
6296 int CompressedBufferSizeInkByte,
6297 bool UnboundedRequestEnabled,
6298 int ConfigReturnBufferSizeInKByte,
6299 int MetaFIFOSizeInKEntries,
6300 int ZeroSizeBufferEntries,
6301 int NumberOfActivePlanes,
6302 int ROBBufferSizeInKByte,
6303 double TotalDataReadBandwidth,
6304 double DCFCLK,
6305 double ReturnBW,
6306 double COMPBUF_RESERVED_SPACE_64B,
6307 double COMPBUF_RESERVED_SPACE_ZS,
6308 double SRExitTime,
6309 double SRExitZ8Time,
6310 bool SynchronizedVBlank,
6311 double Z8StutterEnterPlusExitWatermark,
6312 double StutterEnterPlusExitWatermark,
6313 bool ProgressiveToInterlaceUnitInOPP,
6314 bool Interlace[],
6315 double MinTTUVBlank[],
6316 int DPPPerPlane[],
6317 unsigned int DETBufferSizeY[],
6318 int BytePerPixelY[],
6319 double BytePerPixelDETY[],
6320 double SwathWidthY[],
6321 int SwathHeightY[],
6322 int SwathHeightC[],
6323 double NetDCCRateLuma[],
6324 double NetDCCRateChroma[],
6325 double DCCFractionOfZeroSizeRequestsLuma[],
6326 double DCCFractionOfZeroSizeRequestsChroma[],
6327 int HTotal[],
6328 int VTotal[],
6329 double PixelClock[],
6330 double VRatio[],
6331 enum scan_direction_class SourceScan[],
6332 int BlockHeight256BytesY[],
6333 int BlockWidth256BytesY[],
6334 int BlockHeight256BytesC[],
6335 int BlockWidth256BytesC[],
6336 int DCCYMaxUncompressedBlock[],
6337 int DCCCMaxUncompressedBlock[],
6338 int VActive[],
6339 bool DCCEnable[],
6340 bool WritebackEnable[],
6341 double ReadBandwidthPlaneLuma[],
6342 double ReadBandwidthPlaneChroma[],
6343 double meta_row_bw[],
6344 double dpte_row_bw[],
6345 double *StutterEfficiencyNotIncludingVBlank,
6346 double *StutterEfficiency,
6347 int *NumberOfStutterBurstsPerFrame,
6348 double *Z8StutterEfficiencyNotIncludingVBlank,
6349 double *Z8StutterEfficiency,
6350 int *Z8NumberOfStutterBurstsPerFrame,
6351 double *StutterPeriod)
6352 {
6353 struct vba_vars_st *v = &mode_lib->vba;
6354
6355 double DETBufferingTimeY;
6356 double SwathWidthYCriticalPlane = 0;
6357 double VActiveTimeCriticalPlane = 0;
6358 double FrameTimeCriticalPlane = 0;
6359 int BytePerPixelYCriticalPlane = 0;
6360 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6361 double MinTTUVBlankCriticalPlane = 0;
6362 double TotalCompressedReadBandwidth;
6363 double TotalRowReadBandwidth;
6364 double AverageDCCCompressionRate;
6365 double EffectiveCompressedBufferSize;
6366 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6367 double StutterBurstTime;
6368 int TotalActiveWriteback;
6369 double LinesInDETY;
6370 double LinesInDETYRoundedDownToSwath;
6371 double MaximumEffectiveCompressionLuma;
6372 double MaximumEffectiveCompressionChroma;
6373 double TotalZeroSizeRequestReadBandwidth;
6374 double TotalZeroSizeCompressedReadBandwidth;
6375 double AverageDCCZeroSizeFraction;
6376 double AverageZeroSizeCompressionRate;
6377 int TotalNumberOfActiveOTG = 0;
6378 double LastStutterPeriod = 0.0;
6379 double LastZ8StutterPeriod = 0.0;
6380 int k;
6381
6382 TotalZeroSizeRequestReadBandwidth = 0;
6383 TotalZeroSizeCompressedReadBandwidth = 0;
6384 TotalRowReadBandwidth = 0;
6385 TotalCompressedReadBandwidth = 0;
6386
6387 for (k = 0; k < NumberOfActivePlanes; ++k) {
6388 if (DCCEnable[k] == true) {
6389 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6390 || DCCYMaxUncompressedBlock[k] < 256) {
6391 MaximumEffectiveCompressionLuma = 2;
6392 } else {
6393 MaximumEffectiveCompressionLuma = 4;
6394 }
6395 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6396 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6397 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6398 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6399 if (ReadBandwidthPlaneChroma[k] > 0) {
6400 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6401 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6402 MaximumEffectiveCompressionChroma = 2;
6403 } else {
6404 MaximumEffectiveCompressionChroma = 4;
6405 }
6406 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6407 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6408 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6409 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6410 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6411 }
6412 } else {
6413 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6414 }
6415 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6416 }
6417
6418 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6419 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6420
6421 #ifdef __DML_VBA_DEBUG__
6422 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6423 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6424 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6425 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6426 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6427 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6428 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6429 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6430 #endif
6431
6432 if (AverageDCCZeroSizeFraction == 1) {
6433 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6434 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6435 } else if (AverageDCCZeroSizeFraction > 0) {
6436 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6437 EffectiveCompressedBufferSize = dml_min(
6438 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6439 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6440 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6441 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6442 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6443 dml_print(
6444 "DML::%s: min 2 = %f\n",
6445 __func__,
6446 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6447 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6448 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6449 } else {
6450 EffectiveCompressedBufferSize = dml_min(
6451 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6452 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6453 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6454 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6455 }
6456
6457 #ifdef __DML_VBA_DEBUG__
6458 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6459 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6460 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6461 #endif
6462
6463 *StutterPeriod = 0;
6464 for (k = 0; k < NumberOfActivePlanes; ++k) {
6465 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6466 / BytePerPixelDETY[k] / SwathWidthY[k];
6467 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6468 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6469 #ifdef __DML_VBA_DEBUG__
6470 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6471 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6472 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6473 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6474 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6475 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6476 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6477 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6478 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6479 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6480 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6481 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6482 #endif
6483
6484 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6485 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6486
6487 *StutterPeriod = DETBufferingTimeY;
6488 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6489 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6490 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6491 SwathWidthYCriticalPlane = SwathWidthY[k];
6492 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6493 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6494
6495 #ifdef __DML_VBA_DEBUG__
6496 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6497 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6498 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6499 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6500 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6501 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6502 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6503 #endif
6504 }
6505 }
6506
6507 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6508 #ifdef __DML_VBA_DEBUG__
6509 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6510 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6511 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6512 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6513 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6514 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6515 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6516 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6517 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6518 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6519 #endif
6520
6521 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6522 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6523 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6524 #ifdef __DML_VBA_DEBUG__
6525 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6526 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6527 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6528 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6529 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6530 #endif
6531 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6532
6533 dml_print(
6534 "DML::%s: Time to finish residue swath=%f\n",
6535 __func__,
6536 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6537
6538 TotalActiveWriteback = 0;
6539 for (k = 0; k < NumberOfActivePlanes; ++k) {
6540 if (WritebackEnable[k]) {
6541 TotalActiveWriteback = TotalActiveWriteback + 1;
6542 }
6543 }
6544
6545 if (TotalActiveWriteback == 0) {
6546 #ifdef __DML_VBA_DEBUG__
6547 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6548 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6549 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6550 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6551 #endif
6552 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6553 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6554 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6555 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6556 } else {
6557 *StutterEfficiencyNotIncludingVBlank = 0.;
6558 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6559 *NumberOfStutterBurstsPerFrame = 0;
6560 *Z8NumberOfStutterBurstsPerFrame = 0;
6561 }
6562 #ifdef __DML_VBA_DEBUG__
6563 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6564 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6565 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6566 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6567 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6568 #endif
6569
6570 for (k = 0; k < NumberOfActivePlanes; ++k) {
6571 if (v->BlendingAndTiming[k] == k) {
6572 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6573 }
6574 }
6575
6576 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6577 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6578
6579 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6580 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6581 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6582 } else {
6583 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6584 }
6585 } else {
6586 *StutterEfficiency = 0;
6587 }
6588
6589 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6590 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6591 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6592 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6593 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6594 } else {
6595 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6596 }
6597 } else {
6598 *Z8StutterEfficiency = 0.;
6599 }
6600
6601 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6602 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6603 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6604 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6605 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6606 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6607 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6608 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6609 }
6610
6611 static void CalculateSwathAndDETConfiguration(
6612 bool ForceSingleDPP,
6613 int NumberOfActivePlanes,
6614 bool DETSharedByAllDPP,
6615 unsigned int DETBufferSizeInKByteA[],
6616 double MaximumSwathWidthLuma[],
6617 double MaximumSwathWidthChroma[],
6618 enum scan_direction_class SourceScan[],
6619 enum source_format_class SourcePixelFormat[],
6620 enum dm_swizzle_mode SurfaceTiling[],
6621 int ViewportWidth[],
6622 int ViewportHeight[],
6623 int SurfaceWidthY[],
6624 int SurfaceWidthC[],
6625 int SurfaceHeightY[],
6626 int SurfaceHeightC[],
6627 int Read256BytesBlockHeightY[],
6628 int Read256BytesBlockHeightC[],
6629 int Read256BytesBlockWidthY[],
6630 int Read256BytesBlockWidthC[],
6631 enum odm_combine_mode ODMCombineEnabled[],
6632 int BlendingAndTiming[],
6633 int BytePerPixY[],
6634 int BytePerPixC[],
6635 double BytePerPixDETY[],
6636 double BytePerPixDETC[],
6637 int HActive[],
6638 double HRatio[],
6639 double HRatioChroma[],
6640 int DPPPerPlane[],
6641 int swath_width_luma_ub[],
6642 int swath_width_chroma_ub[],
6643 double SwathWidth[],
6644 double SwathWidthChroma[],
6645 int SwathHeightY[],
6646 int SwathHeightC[],
6647 unsigned int DETBufferSizeY[],
6648 unsigned int DETBufferSizeC[],
6649 bool ViewportSizeSupportPerPlane[],
6650 bool *ViewportSizeSupport)
6651 {
6652 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6653 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6654 int MinimumSwathHeightY;
6655 int MinimumSwathHeightC;
6656 int RoundedUpMaxSwathSizeBytesY;
6657 int RoundedUpMaxSwathSizeBytesC;
6658 int RoundedUpMinSwathSizeBytesY;
6659 int RoundedUpMinSwathSizeBytesC;
6660 int RoundedUpSwathSizeBytesY;
6661 int RoundedUpSwathSizeBytesC;
6662 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6663 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6664 int k;
6665
6666 CalculateSwathWidth(
6667 ForceSingleDPP,
6668 NumberOfActivePlanes,
6669 SourcePixelFormat,
6670 SourceScan,
6671 ViewportWidth,
6672 ViewportHeight,
6673 SurfaceWidthY,
6674 SurfaceWidthC,
6675 SurfaceHeightY,
6676 SurfaceHeightC,
6677 ODMCombineEnabled,
6678 BytePerPixY,
6679 BytePerPixC,
6680 Read256BytesBlockHeightY,
6681 Read256BytesBlockHeightC,
6682 Read256BytesBlockWidthY,
6683 Read256BytesBlockWidthC,
6684 BlendingAndTiming,
6685 HActive,
6686 HRatio,
6687 DPPPerPlane,
6688 SwathWidthSingleDPP,
6689 SwathWidthSingleDPPChroma,
6690 SwathWidth,
6691 SwathWidthChroma,
6692 MaximumSwathHeightY,
6693 MaximumSwathHeightC,
6694 swath_width_luma_ub,
6695 swath_width_chroma_ub);
6696
6697 *ViewportSizeSupport = true;
6698 for (k = 0; k < NumberOfActivePlanes; ++k) {
6699 unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
6700
6701 if (DETSharedByAllDPP && DPPPerPlane[k])
6702 DETBufferSizeInKByte /= DPPPerPlane[k];
6703 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6704 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6705 if (SurfaceTiling[k] == dm_sw_linear
6706 || (SourcePixelFormat[k] == dm_444_64
6707 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6708 && SourceScan[k] != dm_vert)) {
6709 MinimumSwathHeightY = MaximumSwathHeightY[k];
6710 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6711 MinimumSwathHeightY = MaximumSwathHeightY[k];
6712 } else {
6713 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6714 }
6715 MinimumSwathHeightC = MaximumSwathHeightC[k];
6716 } else {
6717 if (SurfaceTiling[k] == dm_sw_linear) {
6718 MinimumSwathHeightY = MaximumSwathHeightY[k];
6719 MinimumSwathHeightC = MaximumSwathHeightC[k];
6720 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6721 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6722 MinimumSwathHeightC = MaximumSwathHeightC[k];
6723 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6724 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6725 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6726 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6727 MinimumSwathHeightY = MaximumSwathHeightY[k];
6728 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6729 } else {
6730 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6731 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6732 }
6733 }
6734
6735 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6736 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6737 if (SourcePixelFormat[k] == dm_420_10) {
6738 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6739 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6740 }
6741 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6742 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6743 if (SourcePixelFormat[k] == dm_420_10) {
6744 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6745 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6746 }
6747
6748 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6749 SwathHeightY[k] = MaximumSwathHeightY[k];
6750 SwathHeightC[k] = MaximumSwathHeightC[k];
6751 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6752 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6753 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6754 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6755 SwathHeightY[k] = MinimumSwathHeightY;
6756 SwathHeightC[k] = MaximumSwathHeightC[k];
6757 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6758 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6759 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6760 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6761 SwathHeightY[k] = MaximumSwathHeightY[k];
6762 SwathHeightC[k] = MinimumSwathHeightC;
6763 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6764 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6765 } else {
6766 SwathHeightY[k] = MinimumSwathHeightY;
6767 SwathHeightC[k] = MinimumSwathHeightC;
6768 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6769 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6770 }
6771 {
6772 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6773 if (SwathHeightC[k] == 0) {
6774 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6775 DETBufferSizeC[k] = 0;
6776 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6777 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6778 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6779 } else {
6780 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6781 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6782 }
6783
6784 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6785 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6786 *ViewportSizeSupport = false;
6787 ViewportSizeSupportPerPlane[k] = false;
6788 } else {
6789 ViewportSizeSupportPerPlane[k] = true;
6790 }
6791 }
6792 }
6793 }
6794
6795 static void CalculateSwathWidth(
6796 bool ForceSingleDPP,
6797 int NumberOfActivePlanes,
6798 enum source_format_class SourcePixelFormat[],
6799 enum scan_direction_class SourceScan[],
6800 int ViewportWidth[],
6801 int ViewportHeight[],
6802 int SurfaceWidthY[],
6803 int SurfaceWidthC[],
6804 int SurfaceHeightY[],
6805 int SurfaceHeightC[],
6806 enum odm_combine_mode ODMCombineEnabled[],
6807 int BytePerPixY[],
6808 int BytePerPixC[],
6809 int Read256BytesBlockHeightY[],
6810 int Read256BytesBlockHeightC[],
6811 int Read256BytesBlockWidthY[],
6812 int Read256BytesBlockWidthC[],
6813 int BlendingAndTiming[],
6814 int HActive[],
6815 double HRatio[],
6816 int DPPPerPlane[],
6817 double SwathWidthSingleDPPY[],
6818 double SwathWidthSingleDPPC[],
6819 double SwathWidthY[],
6820 double SwathWidthC[],
6821 int MaximumSwathHeightY[],
6822 int MaximumSwathHeightC[],
6823 int swath_width_luma_ub[],
6824 int swath_width_chroma_ub[])
6825 {
6826 enum odm_combine_mode MainPlaneODMCombine;
6827 int j, k;
6828
6829 #ifdef __DML_VBA_DEBUG__
6830 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6831 #endif
6832
6833 for (k = 0; k < NumberOfActivePlanes; ++k) {
6834 if (SourceScan[k] != dm_vert) {
6835 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6836 } else {
6837 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6838 }
6839
6840 #ifdef __DML_VBA_DEBUG__
6841 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6842 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6843 #endif
6844
6845 MainPlaneODMCombine = ODMCombineEnabled[k];
6846 for (j = 0; j < NumberOfActivePlanes; ++j) {
6847 if (BlendingAndTiming[k] == j) {
6848 MainPlaneODMCombine = ODMCombineEnabled[j];
6849 }
6850 }
6851
6852 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6853 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6854 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6855 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6856 } else if (DPPPerPlane[k] == 2) {
6857 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6858 } else {
6859 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6860 }
6861
6862 #ifdef __DML_VBA_DEBUG__
6863 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6864 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6865 #endif
6866
6867 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6868 SwathWidthC[k] = SwathWidthY[k] / 2;
6869 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6870 } else {
6871 SwathWidthC[k] = SwathWidthY[k];
6872 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6873 }
6874
6875 if (ForceSingleDPP == true) {
6876 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6877 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6878 }
6879 {
6880 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6881 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6882
6883 #ifdef __DML_VBA_DEBUG__
6884 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6885 #endif
6886
6887 if (SourceScan[k] != dm_vert) {
6888 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6889 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6890 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6891 if (BytePerPixC[k] > 0) {
6892 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6893
6894 swath_width_chroma_ub[k] = dml_min(
6895 surface_width_ub_c,
6896 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6897 } else {
6898 swath_width_chroma_ub[k] = 0;
6899 }
6900 } else {
6901 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6902 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6903 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6904 if (BytePerPixC[k] > 0) {
6905 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6906
6907 swath_width_chroma_ub[k] = dml_min(
6908 surface_height_ub_c,
6909 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6910 } else {
6911 swath_width_chroma_ub[k] = 0;
6912 }
6913 }
6914 }
6915 }
6916 }
6917
6918 static double CalculateExtraLatency(
6919 int RoundTripPingLatencyCycles,
6920 int ReorderingBytes,
6921 double DCFCLK,
6922 int TotalNumberOfActiveDPP,
6923 int PixelChunkSizeInKByte,
6924 int TotalNumberOfDCCActiveDPP,
6925 int MetaChunkSize,
6926 double ReturnBW,
6927 bool GPUVMEnable,
6928 bool HostVMEnable,
6929 int NumberOfActivePlanes,
6930 int NumberOfDPP[],
6931 int dpte_group_bytes[],
6932 double HostVMInefficiencyFactor,
6933 double HostVMMinPageSize,
6934 int HostVMMaxNonCachedPageTableLevels)
6935 {
6936 double ExtraLatencyBytes;
6937 double ExtraLatency;
6938
6939 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6940 ReorderingBytes,
6941 TotalNumberOfActiveDPP,
6942 PixelChunkSizeInKByte,
6943 TotalNumberOfDCCActiveDPP,
6944 MetaChunkSize,
6945 GPUVMEnable,
6946 HostVMEnable,
6947 NumberOfActivePlanes,
6948 NumberOfDPP,
6949 dpte_group_bytes,
6950 HostVMInefficiencyFactor,
6951 HostVMMinPageSize,
6952 HostVMMaxNonCachedPageTableLevels);
6953
6954 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6955
6956 #ifdef __DML_VBA_DEBUG__
6957 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6958 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6959 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6960 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6961 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6962 #endif
6963
6964 return ExtraLatency;
6965 }
6966
6967 static double CalculateExtraLatencyBytes(
6968 int ReorderingBytes,
6969 int TotalNumberOfActiveDPP,
6970 int PixelChunkSizeInKByte,
6971 int TotalNumberOfDCCActiveDPP,
6972 int MetaChunkSize,
6973 bool GPUVMEnable,
6974 bool HostVMEnable,
6975 int NumberOfActivePlanes,
6976 int NumberOfDPP[],
6977 int dpte_group_bytes[],
6978 double HostVMInefficiencyFactor,
6979 double HostVMMinPageSize,
6980 int HostVMMaxNonCachedPageTableLevels)
6981 {
6982 double ret;
6983 int HostVMDynamicLevels = 0, k;
6984
6985 if (GPUVMEnable == true && HostVMEnable == true) {
6986 if (HostVMMinPageSize < 2048) {
6987 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6988 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6989 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6990 } else {
6991 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6992 }
6993 } else {
6994 HostVMDynamicLevels = 0;
6995 }
6996
6997 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6998
6999 if (GPUVMEnable == true) {
7000 for (k = 0; k < NumberOfActivePlanes; ++k) {
7001 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7002 }
7003 }
7004 return ret;
7005 }
7006
7007 static double CalculateUrgentLatency(
7008 double UrgentLatencyPixelDataOnly,
7009 double UrgentLatencyPixelMixedWithVMData,
7010 double UrgentLatencyVMDataOnly,
7011 bool DoUrgentLatencyAdjustment,
7012 double UrgentLatencyAdjustmentFabricClockComponent,
7013 double UrgentLatencyAdjustmentFabricClockReference,
7014 double FabricClock)
7015 {
7016 double ret;
7017
7018 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7019 if (DoUrgentLatencyAdjustment == true) {
7020 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7021 }
7022 return ret;
7023 }
7024
7025 static noinline_for_stack void UseMinimumDCFCLK(
7026 struct display_mode_lib *mode_lib,
7027 int MaxPrefetchMode,
7028 int ReorderingBytes)
7029 {
7030 struct vba_vars_st *v = &mode_lib->vba;
7031 int dummy1, i, j, k;
7032 double NormalEfficiency, dummy2, dummy3;
7033 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7034
7035 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7036 for (i = 0; i < v->soc.num_states; ++i) {
7037 for (j = 0; j <= 1; ++j) {
7038 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7039 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7040 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7041 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7042 double MinimumTWait;
7043 double NonDPTEBandwidth;
7044 double DPTEBandwidth;
7045 double DCFCLKRequiredForAverageBandwidth;
7046 double ExtraLatencyBytes;
7047 double ExtraLatencyCycles;
7048 double DCFCLKRequiredForPeakBandwidth;
7049 int NoOfDPPState[DC__NUM_DPP__MAX];
7050 double MinimumTvmPlus2Tr0;
7051
7052 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7053 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7054 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7055 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7056 }
7057
7058 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7059 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7060 }
7061
7062 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7063 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7064 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7065 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7066 DCFCLKRequiredForAverageBandwidth = dml_max3(
7067 v->ProjectedDCFCLKDeepSleep[i][j],
7068 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7069 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7070 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7071
7072 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7073 ReorderingBytes,
7074 v->TotalNumberOfActiveDPP[i][j],
7075 v->PixelChunkSizeInKByte,
7076 v->TotalNumberOfDCCActiveDPP[i][j],
7077 v->MetaChunkSize,
7078 v->GPUVMEnable,
7079 v->HostVMEnable,
7080 v->NumberOfActivePlanes,
7081 NoOfDPPState,
7082 v->dpte_group_bytes,
7083 1,
7084 v->HostVMMinPageSize,
7085 v->HostVMMaxNonCachedPageTableLevels);
7086 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7087 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7088 double DCFCLKCyclesRequiredInPrefetch;
7089 double ExpectedPrefetchBWAcceleration;
7090 double PrefetchTime;
7091
7092 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7093 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7094 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7095 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7096 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7097 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7098 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7099 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7100 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7101 DynamicMetadataVMExtraLatency[k] =
7102 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7103 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7104 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7105 - v->UrgLatency[i]
7106 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7107 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7108 - DynamicMetadataVMExtraLatency[k];
7109
7110 if (PrefetchTime > 0) {
7111 double ExpectedVRatioPrefetch;
7112 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7113 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7114 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7115 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7116 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7117 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7118 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7119 }
7120 } else {
7121 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7122 }
7123 if (v->DynamicMetadataEnable[k] == true) {
7124 double TSetupPipe;
7125 double TdmbfPipe;
7126 double TdmsksPipe;
7127 double TdmecPipe;
7128 double AllowedTimeForUrgentExtraLatency;
7129
7130 CalculateVupdateAndDynamicMetadataParameters(
7131 v->MaxInterDCNTileRepeaters,
7132 v->RequiredDPPCLK[i][j][k],
7133 v->RequiredDISPCLK[i][j],
7134 v->ProjectedDCFCLKDeepSleep[i][j],
7135 v->PixelClock[k],
7136 v->HTotal[k],
7137 v->VTotal[k] - v->VActive[k],
7138 v->DynamicMetadataTransmittedBytes[k],
7139 v->DynamicMetadataLinesBeforeActiveRequired[k],
7140 v->Interlace[k],
7141 v->ProgressiveToInterlaceUnitInOPP,
7142 &TSetupPipe,
7143 &TdmbfPipe,
7144 &TdmecPipe,
7145 &TdmsksPipe,
7146 &dummy1,
7147 &dummy2,
7148 &dummy3);
7149 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7150 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7151 if (AllowedTimeForUrgentExtraLatency > 0) {
7152 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7153 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7154 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7155 } else {
7156 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7157 }
7158 }
7159 }
7160 DCFCLKRequiredForPeakBandwidth = 0;
7161 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7162 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7163 }
7164 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7165 * (v->GPUVMEnable == true ?
7166 (v->HostVMEnable == true ?
7167 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7168 0);
7169 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7170 double MaximumTvmPlus2Tr0PlusTsw;
7171 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7172 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7173 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7174 } else {
7175 DCFCLKRequiredForPeakBandwidth = dml_max3(
7176 DCFCLKRequiredForPeakBandwidth,
7177 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7178 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7179 }
7180 }
7181 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7182 }
7183 }
7184 }
7185
7186 static void CalculateUnboundedRequestAndCompressedBufferSize(
7187 unsigned int DETBufferSizeInKByte,
7188 int ConfigReturnBufferSizeInKByte,
7189 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7190 int TotalActiveDPP,
7191 bool NoChromaPlanes,
7192 int MaxNumDPP,
7193 int CompressedBufferSegmentSizeInkByteFinal,
7194 enum output_encoder_class *Output,
7195 bool *UnboundedRequestEnabled,
7196 int *CompressedBufferSizeInkByte)
7197 {
7198 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7199
7200 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7201 *CompressedBufferSizeInkByte = (
7202 *UnboundedRequestEnabled == true ?
7203 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7204 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7205 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7206
7207 #ifdef __DML_VBA_DEBUG__
7208 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7209 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7210 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7211 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7212 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7213 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7214 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7215 #endif
7216 }
7217
7218 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7219 {
7220 bool ret_val = false;
7221
7222 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7223 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7224 ret_val = false;
7225 }
7226 return (ret_val);
7227 }
7228
7229