1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5
6 #include "dml2_internal_shared_types.h"
7 #include "dml2_core_dcn4_calcs.h"
8 #include "dml2_debug.h"
9 #include "lib_float_math.h"
10 #include "dml_top_types.h"
11
12 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
13 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4
14 #define DML_MAX_COMPRESSION_RATIO 4
15 //#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
16 //#define DML_GLOBAL_PREFETCH_CHECK
17 #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
18 #define DML_MAX_VSTARTUP_START 1023
19
dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)20 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
21 {
22 switch (bw_type) {
23 case (dml2_core_internal_bw_sdp):
24 return("dml2_core_internal_bw_sdp");
25 case (dml2_core_internal_bw_dram):
26 return("dml2_core_internal_bw_dram");
27 case (dml2_core_internal_bw_max):
28 return("dml2_core_internal_bw_max");
29 default:
30 return("dml2_core_internal_bw_unknown");
31 }
32 }
33
dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)34 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
35 {
36 switch (dml2_core_internal_soc_state_type) {
37 case (dml2_core_internal_soc_state_sys_idle):
38 return("dml2_core_internal_soc_state_sys_idle");
39 case (dml2_core_internal_soc_state_sys_active):
40 return("dml2_core_internal_soc_state_sys_active");
41 case (dml2_core_internal_soc_state_svp_prefetch):
42 return("dml2_core_internal_soc_state_svp_prefetch");
43 case dml2_core_internal_soc_state_max:
44 default:
45 return("dml2_core_internal_soc_state_unknown");
46 }
47 }
48
dml2_core_div_rem(double dividend,unsigned int divisor,unsigned int * remainder)49 static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
50 {
51 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
52 return dividend / divisor;
53 }
54
dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info * support,bool fail_only)55 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
56 {
57 DML_LOG_VERBOSE("DML: ===================================== \n");
58 DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
59 if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
60 DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
61 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
62 DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
63 if (!fail_only || support->ViewportSizeSupport == 0)
64 DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
65 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
66 DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
67 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
68 DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
69 if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
70 DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
71 if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
72 DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
73 if (!fail_only || support->ExceededMultistreamSlots == 1)
74 DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
75 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
76 DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
77 if (!fail_only || support->NotEnoughLanesForMSO == 1)
78 DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
79 if (!fail_only || support->P2IWith420 == 1)
80 DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
81 if (!fail_only || support->DSC422NativeNotSupported == 1)
82 DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
83 if (!fail_only || support->DSCSlicesODMModeSupported == 0)
84 DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
85 if (!fail_only || support->NotEnoughDSCUnits == 1)
86 DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
87 if (!fail_only || support->NotEnoughDSCSlices == 1)
88 DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
89 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
90 DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
91 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
92 DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
93 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
94 DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
95 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
96 DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
97 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
98 DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
99 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
100 DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
101 if (!fail_only || support->ROBSupport == 0)
102 DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
103 if (!fail_only || support->OutstandingRequestsSupport == 0)
104 DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
105 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
106 DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
107 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
108 DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
109 if (!fail_only || support->TotalAvailablePipesSupport == 0)
110 DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
111 if (!fail_only || support->NumberOfOTGSupport == 0)
112 DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
113 if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
114 DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
115 if (!fail_only || support->NumberOfDP2p0Support == 0)
116 DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
117 if (!fail_only || support->EnoughWritebackUnits == 0)
118 DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
119 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
120 DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
121 if (!fail_only || support->WritebackLatencySupport == 0)
122 DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
123 if (!fail_only || support->CursorSupport == 0)
124 DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
125 if (!fail_only || support->PitchSupport == 0)
126 DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
127 if (!fail_only || support->ViewportExceedsSurface == 1)
128 DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
129 if (!fail_only || support->PrefetchSupported == 0)
130 DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
131 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
132 DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
133 if (!fail_only || support->AvgBandwidthSupport == 0)
134 DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
135 if (!fail_only || support->DynamicMetadataSupported == 0)
136 DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
137 if (!fail_only || support->VRatioInPrefetchSupported == 0)
138 DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
139 if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
140 DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
141 if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
142 DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
143 if (!fail_only || support->ExceededMALLSize == 1)
144 DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
145 if (!fail_only || support->g6_temp_read_support == 0)
146 DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
147 if (!fail_only || support->ImmediateFlipSupport == 0)
148 DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
149 if (!fail_only || support->LinkCapacitySupport == 0)
150 DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
151
152 if (!fail_only || support->ModeSupport == 0)
153 DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
154 DML_LOG_VERBOSE("DML: ===================================== \n");
155 }
156
get_stream_output_bpp(double * out_bpp,const struct dml2_display_cfg * display_cfg)157 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
158 {
159 for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
160 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
161 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
162 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
163 case dml2_444:
164 out_bpp[k] = bpc * 3;
165 break;
166 case dml2_s422:
167 out_bpp[k] = bpc * 2;
168 break;
169 case dml2_n422:
170 out_bpp[k] = bpc * 2;
171 break;
172 case dml2_420:
173 default:
174 out_bpp[k] = bpc * 1.5;
175 break;
176 }
177 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
178 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
179 } else {
180 out_bpp[k] = 0;
181 }
182 DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
183 DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
184 DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
185 }
186 }
187
dml_round_to_multiple(unsigned int num,unsigned int multiple,bool up)188 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
189 {
190 unsigned int remainder;
191
192 if (multiple == 0)
193 return num;
194
195 remainder = num % multiple;
196 if (remainder == 0)
197 return num;
198
199 if (up)
200 return (num + multiple - remainder);
201 else
202 return (num - remainder);
203 }
204
dml_get_num_active_pipes(int unsigned num_planes,const struct core_display_cfg_support_info * cfg_support_info)205 static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
206 {
207 unsigned int num_active_pipes = 0;
208
209 for (unsigned int k = 0; k < num_planes; k++) {
210 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
211 }
212
213 DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
214 return num_active_pipes;
215 }
216
dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info * cfg_support_info,unsigned int * pipe_plane)217 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
218 {
219 unsigned int pipe_idx = 0;
220
221 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
222 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
223 }
224
225 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
226 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
227 pipe_plane[pipe_idx] = plane_idx;
228 pipe_idx++;
229 }
230 }
231 }
232
dml_is_phantom_pipe(const struct dml2_plane_parameters * plane_cfg)233 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
234 {
235 bool is_phantom = false;
236
237 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
238 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
239 is_phantom = true;
240 }
241
242 return is_phantom;
243 }
244
dml_get_is_phantom_pipe(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)245 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
246 {
247 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
248
249 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
250 DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
251 return is_phantom;
252 }
253
254 #define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
255 { \
256 unsigned int plane_idx; \
257 plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
258 return (type) interval_var[plane_idx]; \
259 }
260
261 dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
262 dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
263 dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
264 dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
265 dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
266 dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
267
268 dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
269 dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
270 dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
271 dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
272 dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
273 dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
274 dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
275 dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
276 dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
277
278 #define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
279 { \
280 return (type) interval_var[plane_idx]; \
281 }
282
283 dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
284 dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
285 dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
286 dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
287 dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
288 dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
289 dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
290 dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
291 dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
292 dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
293 dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
294
295 #define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
296 { \
297 return (type) interval_var[plane_idx][array_idx]; \
298 }
299
300 dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
301 dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
302
303 #define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
304 { \
305 return (type) internal_var; \
306 }
307
308 dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
309 dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
310 dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
311 dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
312 dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
313 dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
314 dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
315
316 dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
317 dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
318 dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
319 dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
320 dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
321 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
322 dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
323 dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
324 dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
325 dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
326 dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
327 dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
328 dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
329 dml_get_var_func(stutter_efficiency_no_vblank_z8, double, mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank);
330 dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
331 dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
332 dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
333 dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
334 dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
335 dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
336 dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
337
338 dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
339 dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
340
341 dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
342 dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
343
344 dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
345 dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
346
347 dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
348 dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
349
350 dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
351 dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
352 dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
353
354 dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
355 dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
356 dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
357
358 dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
359 dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
360 dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
361 dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
362 dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
363
364 dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
365 dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
366 dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
367 dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
368
369 dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
370 dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
371 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
372 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
373
374 dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
375 dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
376 dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
377 dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
378
379 dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
380 dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
381 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
382 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
383
384 dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
385
386 dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
387 dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
388 dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
389 dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
390 dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
391 dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
392
CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,unsigned int nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,bool is_mrq_present,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)393 static void CalculateMaxDETAndMinCompressedBufferSize(
394 unsigned int ConfigReturnBufferSizeInKByte,
395 unsigned int ConfigReturnBufferSegmentSizeInKByte,
396 unsigned int ROBBufferSizeInKByte,
397 unsigned int MaxNumDPP,
398 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
399 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
400 bool is_mrq_present,
401
402 // Output
403 unsigned int *MaxTotalDETInKByte,
404 unsigned int *nomDETInKByte,
405 unsigned int *MinCompressedBufferSizeInKByte)
406 {
407 if (is_mrq_present)
408 *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
409 else
410 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
411
412 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
413 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
414
415 DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
416 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
417 DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
418 DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
419 DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
420 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
421 DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
422
423 if (nomDETInKByteOverrideEnable) {
424 *nomDETInKByte = nomDETInKByteOverrideValue;
425 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
426 }
427 }
428
PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg * display_cfg,bool ptoi_supported,double * PixelClockBackEnd)429 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
430 {
431 //unsigned int num_active_planes = display_cfg->num_planes;
432
433 //Progressive To Interlace Unit Effect
434 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
435 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
436 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
437 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
438 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
439 }
440 }
441 }
442
dml_is_420(enum dml2_source_format_class source_format)443 static bool dml_is_420(enum dml2_source_format_class source_format)
444 {
445 bool val = false;
446
447 switch (source_format) {
448 case dml2_444_8:
449 val = 0;
450 break;
451 case dml2_444_16:
452 val = 0;
453 break;
454 case dml2_444_32:
455 val = 0;
456 break;
457 case dml2_444_64:
458 val = 0;
459 break;
460 case dml2_420_8:
461 val = 1;
462 break;
463 case dml2_420_10:
464 val = 1;
465 break;
466 case dml2_420_12:
467 val = 1;
468 break;
469 case dml2_422_planar_8:
470 val = 0;
471 break;
472 case dml2_422_planar_10:
473 val = 0;
474 break;
475 case dml2_422_planar_12:
476 val = 0;
477 break;
478 case dml2_422_packed_8:
479 val = 0;
480 break;
481 case dml2_422_packed_10:
482 val = 0;
483 break;
484 case dml2_422_packed_12:
485 val = 0;
486 break;
487 case dml2_rgbe_alpha:
488 val = 0;
489 break;
490 case dml2_rgbe:
491 val = 0;
492 break;
493 case dml2_mono_8:
494 val = 0;
495 break;
496 case dml2_mono_16:
497 val = 0;
498 break;
499 default:
500 DML_ASSERT(0);
501 break;
502 }
503 return val;
504 }
505
dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)506 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
507 {
508 if (sw_mode == dml2_sw_linear)
509 return 256;
510 else if (sw_mode == dml2_sw_256b_2d)
511 return 256;
512 else if (sw_mode == dml2_sw_4kb_2d)
513 return 4096;
514 else if (sw_mode == dml2_sw_64kb_2d)
515 return 65536;
516 else if (sw_mode == dml2_sw_256kb_2d)
517 return 262144;
518 else if (sw_mode == dml2_gfx11_sw_linear)
519 return 256;
520 else if (sw_mode == dml2_gfx11_sw_64kb_d)
521 return 65536;
522 else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
523 return 65536;
524 else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
525 return 65536;
526 else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
527 return 65536;
528 else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
529 return 262144;
530 else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
531 return 262144;
532 else {
533 DML_ASSERT(0);
534 return 256;
535 }
536 }
537
dml_is_vertical_rotation(enum dml2_rotation_angle Scan)538 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
539 {
540 bool is_vert = false;
541 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
542 is_vert = true;
543 } else {
544 is_vert = false;
545 }
546 return is_vert;
547 }
548
dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)549 static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
550 {
551 int unsigned version = 0;
552
553 if (sw_mode == dml2_sw_linear ||
554 sw_mode == dml2_sw_256b_2d ||
555 sw_mode == dml2_sw_4kb_2d ||
556 sw_mode == dml2_sw_64kb_2d ||
557 sw_mode == dml2_sw_256kb_2d) {
558 version = 12;
559 } else if (sw_mode == dml2_gfx11_sw_linear ||
560 sw_mode == dml2_gfx11_sw_64kb_d ||
561 sw_mode == dml2_gfx11_sw_64kb_d_t ||
562 sw_mode == dml2_gfx11_sw_64kb_d_x ||
563 sw_mode == dml2_gfx11_sw_64kb_r_x ||
564 sw_mode == dml2_gfx11_sw_256kb_d_x ||
565 sw_mode == dml2_gfx11_sw_256kb_r_x) {
566 version = 11;
567 } else {
568 DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
569 DML_ASSERT(0);
570 }
571
572 return version;
573 }
574
CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,unsigned int pitch_y,unsigned int pitch_c,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC,bool * surf_linear128_l,bool * surf_linear128_c)575 static void CalculateBytePerPixelAndBlockSizes(
576 enum dml2_source_format_class SourcePixelFormat,
577 enum dml2_swizzle_mode SurfaceTiling,
578 unsigned int pitch_y,
579 unsigned int pitch_c,
580
581 // Output
582 unsigned int *BytePerPixelY,
583 unsigned int *BytePerPixelC,
584 double *BytePerPixelDETY,
585 double *BytePerPixelDETC,
586 unsigned int *BlockHeight256BytesY,
587 unsigned int *BlockHeight256BytesC,
588 unsigned int *BlockWidth256BytesY,
589 unsigned int *BlockWidth256BytesC,
590 unsigned int *MacroTileHeightY,
591 unsigned int *MacroTileHeightC,
592 unsigned int *MacroTileWidthY,
593 unsigned int *MacroTileWidthC,
594 bool *surf_linear128_l,
595 bool *surf_linear128_c)
596 {
597 *BytePerPixelDETY = 0;
598 *BytePerPixelDETC = 0;
599 *BytePerPixelY = 1;
600 *BytePerPixelC = 1;
601
602 if (SourcePixelFormat == dml2_444_64) {
603 *BytePerPixelDETY = 8;
604 *BytePerPixelDETC = 0;
605 *BytePerPixelY = 8;
606 *BytePerPixelC = 0;
607 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
608 *BytePerPixelDETY = 4;
609 *BytePerPixelDETC = 0;
610 *BytePerPixelY = 4;
611 *BytePerPixelC = 0;
612 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
613 *BytePerPixelDETY = 2;
614 *BytePerPixelDETC = 0;
615 *BytePerPixelY = 2;
616 *BytePerPixelC = 0;
617 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
618 *BytePerPixelDETY = 1;
619 *BytePerPixelDETC = 0;
620 *BytePerPixelY = 1;
621 *BytePerPixelC = 0;
622 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
623 *BytePerPixelDETY = 4;
624 *BytePerPixelDETC = 1;
625 *BytePerPixelY = 4;
626 *BytePerPixelC = 1;
627 } else if (SourcePixelFormat == dml2_420_8) {
628 *BytePerPixelDETY = 1;
629 *BytePerPixelDETC = 2;
630 *BytePerPixelY = 1;
631 *BytePerPixelC = 2;
632 } else if (SourcePixelFormat == dml2_420_12) {
633 *BytePerPixelDETY = 2;
634 *BytePerPixelDETC = 4;
635 *BytePerPixelY = 2;
636 *BytePerPixelC = 4;
637 } else if (SourcePixelFormat == dml2_420_10) {
638 *BytePerPixelDETY = (double)(4.0 / 3);
639 *BytePerPixelDETC = (double)(8.0 / 3);
640 *BytePerPixelY = 2;
641 *BytePerPixelC = 4;
642 } else {
643 DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
644 DML_ASSERT(0);
645 }
646
647 DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
648 DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
649 DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
650 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
651 DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
652 DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y);
653 DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c);
654 DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
655 DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
656
657 if (dml_get_gfx_version(SurfaceTiling) == 11) {
658 *surf_linear128_l = 0;
659 *surf_linear128_c = 0;
660 } else {
661 if (SurfaceTiling == dml2_sw_linear) {
662 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
663
664 if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
665 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
666 }
667 }
668
669 if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
670 if (SurfaceTiling == dml2_sw_linear) {
671 *BlockHeight256BytesY = 1;
672 } else if (SourcePixelFormat == dml2_444_64) {
673 *BlockHeight256BytesY = 4;
674 } else if (SourcePixelFormat == dml2_444_8) {
675 *BlockHeight256BytesY = 16;
676 } else {
677 *BlockHeight256BytesY = 8;
678 }
679 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
680 *BlockHeight256BytesC = 0;
681 *BlockWidth256BytesC = 0;
682 } else { // dual plane
683 if (SurfaceTiling == dml2_sw_linear) {
684 *BlockHeight256BytesY = 1;
685 *BlockHeight256BytesC = 1;
686 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
687 *BlockHeight256BytesY = 8;
688 *BlockHeight256BytesC = 16;
689 } else if (SourcePixelFormat == dml2_420_8) {
690 *BlockHeight256BytesY = 16;
691 *BlockHeight256BytesC = 8;
692 } else {
693 *BlockHeight256BytesY = 8;
694 *BlockHeight256BytesC = 8;
695 }
696 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
697 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
698 }
699 DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
700 DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
701 DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
702 DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
703
704 if (dml_get_gfx_version(SurfaceTiling) == 11) {
705 if (SurfaceTiling == dml2_gfx11_sw_linear) {
706 *MacroTileHeightY = *BlockHeight256BytesY;
707 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
708 *MacroTileHeightC = *BlockHeight256BytesC;
709 if (*MacroTileHeightC == 0) {
710 *MacroTileWidthC = 0;
711 } else {
712 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
713 }
714 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
715 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
716 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
717 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
718 if (*MacroTileHeightC == 0) {
719 *MacroTileWidthC = 0;
720 } else {
721 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
722 }
723 } else {
724 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
725 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
726 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
727 if (*MacroTileHeightC == 0) {
728 *MacroTileWidthC = 0;
729 } else {
730 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
731 }
732 }
733 } else {
734 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
735 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
736
737 if (SurfaceTiling == dml2_sw_linear) {
738 macro_tile_scale = 1;
739 } else if (SurfaceTiling == dml2_sw_4kb_2d) {
740 macro_tile_scale = 4;
741 } else if (SurfaceTiling == dml2_sw_64kb_2d) {
742 macro_tile_scale = 16;
743 } else if (SurfaceTiling == dml2_sw_256kb_2d) {
744 macro_tile_scale = 32;
745 } else {
746 DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
747 DML_ASSERT(0);
748 }
749
750 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
751 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
752 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
753 if (*MacroTileHeightC == 0) {
754 *MacroTileWidthC = 0;
755 } else {
756 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
757 }
758 }
759
760 DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
761 DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
762 DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
763 DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
764 }
765
CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum dml2_source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)766 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
767 double HRatio,
768 double HRatioChroma,
769 double VRatio,
770 double VRatioChroma,
771 double MaxDCHUBToPSCLThroughput,
772 double MaxPSCLToLBThroughput,
773 double PixelClock,
774 enum dml2_source_format_class SourcePixelFormat,
775 unsigned int HTaps,
776 unsigned int HTapsChroma,
777 unsigned int VTaps,
778 unsigned int VTapsChroma,
779
780 // Output
781 double *PSCL_THROUGHPUT,
782 double *PSCL_THROUGHPUT_CHROMA,
783 double *DPPCLKUsingSingleDPP)
784 {
785 double DPPCLKUsingSingleDPPLuma;
786 double DPPCLKUsingSingleDPPChroma;
787
788 if (HRatio > 1) {
789 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
790 } else {
791 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
792 }
793
794 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
795
796 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
797 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
798
799 if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
800 *PSCL_THROUGHPUT_CHROMA = 0;
801 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
802 } else {
803 if (HRatioChroma > 1) {
804 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
805 } else {
806 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
807 }
808 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
809 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
810 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
811 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
812 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
813 }
814 }
815
CalculateSwathWidth(const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum dml2_odm_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],bool surf_linear128_l[],bool surf_linear128_c[],unsigned int DPPPerSurface[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],unsigned int SwathWidthSingleDPPY[],unsigned int SwathWidthSingleDPPC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])816 static void CalculateSwathWidth(
817 const struct dml2_display_cfg *display_cfg,
818 bool ForceSingleDPP,
819 unsigned int NumberOfActiveSurfaces,
820 enum dml2_odm_mode ODMMode[],
821 unsigned int BytePerPixY[],
822 unsigned int BytePerPixC[],
823 unsigned int Read256BytesBlockHeightY[],
824 unsigned int Read256BytesBlockHeightC[],
825 unsigned int Read256BytesBlockWidthY[],
826 unsigned int Read256BytesBlockWidthC[],
827 bool surf_linear128_l[],
828 bool surf_linear128_c[],
829 unsigned int DPPPerSurface[],
830
831 // Output
832 unsigned int req_per_swath_ub_l[],
833 unsigned int req_per_swath_ub_c[],
834 unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
835 unsigned int SwathWidthSingleDPPC[],
836 unsigned int SwathWidthY[], // per-pipe
837 unsigned int SwathWidthC[], // per-pipe
838 unsigned int MaximumSwathHeightY[],
839 unsigned int MaximumSwathHeightC[],
840 unsigned int swath_width_luma_ub[], // per-pipe
841 unsigned int swath_width_chroma_ub[]) // per-pipe
842 {
843 enum dml2_odm_mode MainSurfaceODMMode;
844 double odm_hactive_factor = 1.0;
845 unsigned int req_width_horz_y;
846 unsigned int req_width_horz_c;
847 unsigned int surface_width_ub_l;
848 unsigned int surface_height_ub_l;
849 unsigned int surface_width_ub_c;
850 unsigned int surface_height_ub_c;
851
852 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
853 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
854
855 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
856 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
857 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
858 } else {
859 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
860 }
861
862 DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
863 DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
864 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
865
866 MainSurfaceODMMode = ODMMode[k];
867
868 if (ForceSingleDPP) {
869 SwathWidthY[k] = SwathWidthSingleDPPY[k];
870 } else {
871 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
872 odm_hactive_factor = 4.0;
873 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
874 odm_hactive_factor = 3.0;
875 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
876 odm_hactive_factor = 2.0;
877
878 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
879 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
880 } else if (DPPPerSurface[k] == 2) {
881 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
882 } else {
883 SwathWidthY[k] = SwathWidthSingleDPPY[k];
884 }
885 }
886
887 DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
888 DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
889 DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
890 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
891 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
892
893 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
894 SwathWidthC[k] = SwathWidthY[k] / 2;
895 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
896 } else {
897 SwathWidthC[k] = SwathWidthY[k];
898 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
899 }
900
901 if (ForceSingleDPP == true) {
902 SwathWidthY[k] = SwathWidthSingleDPPY[k];
903 SwathWidthC[k] = SwathWidthSingleDPPC[k];
904 }
905
906 req_width_horz_y = Read256BytesBlockWidthY[k];
907 req_width_horz_c = Read256BytesBlockWidthC[k];
908
909 if (surf_linear128_l[k])
910 req_width_horz_y = req_width_horz_y / 2;
911
912 if (surf_linear128_c[k])
913 req_width_horz_c = req_width_horz_c / 2;
914
915 surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
916 surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
917 surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
918 surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
919
920 DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
921 DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
922 DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
923 DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
924 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
925 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
926 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
927 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
928 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
929 DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
930 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
931 DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
932 DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
933 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
934
935 req_per_swath_ub_l[k] = 0;
936 req_per_swath_ub_c[k] = 0;
937 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
938 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
939 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
940 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
941 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
942 } else {
943 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
944 }
945 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
946
947 if (BytePerPixC[k] > 0) {
948 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
949 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
950 } else {
951 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
952 }
953 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
954 } else {
955 swath_width_chroma_ub[k] = 0;
956 }
957 } else {
958 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
959 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
960
961 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
962 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
963 } else {
964 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
965 }
966 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
967 if (BytePerPixC[k] > 0) {
968 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
969 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
970 } else {
971 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
972 }
973 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
974 } else {
975 swath_width_chroma_ub[k] = 0;
976 }
977 }
978
979 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
980 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
981 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
982 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
983 DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
984 DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
985 }
986 }
987
UnboundedRequest(bool unb_req_force_en,bool unb_req_force_val,unsigned int TotalNumberOfActiveDPP,bool NoChromaOrLinear)988 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
989 {
990 bool unb_req_ok = false;
991 bool unb_req_en = false;
992
993 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
994 unb_req_en = unb_req_ok;
995
996 if (unb_req_force_en) {
997 unb_req_en = unb_req_force_val && unb_req_ok;
998 }
999 DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
1000 DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
1001 DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
1002 DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
1003 return unb_req_en;
1004 }
1005
CalculateDETBufferSize(struct dml2_core_shared_CalculateDETBufferSize_locals * l,const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInkByte,unsigned int CompressedBufferSegmentSizeInkByte,double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int full_swath_bytes_l[],unsigned int full_swath_bytes_c[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)1006 static void CalculateDETBufferSize(
1007 struct dml2_core_shared_CalculateDETBufferSize_locals *l,
1008 const struct dml2_display_cfg *display_cfg,
1009 bool ForceSingleDPP,
1010 unsigned int NumberOfActiveSurfaces,
1011 bool UnboundedRequestEnabled,
1012 unsigned int nomDETInKByte,
1013 unsigned int MaxTotalDETInKByte,
1014 unsigned int ConfigReturnBufferSizeInKByte,
1015 unsigned int MinCompressedBufferSizeInKByte,
1016 unsigned int ConfigReturnBufferSegmentSizeInkByte,
1017 unsigned int CompressedBufferSegmentSizeInkByte,
1018 double ReadBandwidthLuma[],
1019 double ReadBandwidthChroma[],
1020 unsigned int full_swath_bytes_l[],
1021 unsigned int full_swath_bytes_c[],
1022 unsigned int DPPPerSurface[],
1023 // Output
1024 unsigned int DETBufferSizeInKByte[],
1025 unsigned int *CompressedBufferSizeInkByte)
1026 {
1027 memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
1028
1029 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
1030 bool NextPotentialSurfaceToAssignDETPieceFound;
1031 bool MinimizeReallocationSuccess = false;
1032
1033 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
1034 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
1035 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
1036 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
1037 DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
1038 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
1039 DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
1040 DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
1041
1042 // Note: Will use default det size if that fits 2 swaths
1043 if (UnboundedRequestEnabled) {
1044 if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
1045 DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
1046 } else {
1047 DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
1048 }
1049 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
1050 } else {
1051 l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
1052 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1053 DETBufferSizeInKByte[k] = 0;
1054 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
1055 l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
1056 } else {
1057 l->max_minDET = nomDETInKByte;
1058 }
1059 l->minDET = 128;
1060 l->minDET_pipe = 0;
1061
1062 // add DET resource until can hold 2 full swaths
1063 while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
1064 if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
1065 l->minDET_pipe = l->minDET;
1066 l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
1067 }
1068
1069 DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
1070 DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
1071 DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
1072 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
1073 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
1074
1075 if (l->minDET_pipe == 0) {
1076 l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
1077 DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
1078 }
1079
1080 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1081 DETBufferSizeInKByte[k] = 0;
1082 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
1083 DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1084 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1085 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
1086 DETBufferSizeInKByte[k] = l->minDET_pipe;
1087 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
1088 }
1089
1090 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
1091 DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
1092 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1093 DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
1094 }
1095
1096 if (display_cfg->minimize_det_reallocation) {
1097 MinimizeReallocationSuccess = true;
1098 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
1099 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
1100 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
1101 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
1102
1103 // Calculate total pixel rate
1104 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1105 l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
1106 }
1107
1108 // Calculate per stream DET budget
1109 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1110 l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
1111 l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
1112 }
1113
1114 // Calculate the per stream total bandwidth
1115 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1116 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1117 l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1118
1119 // Check the minimum can be satisfied by budget
1120 if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1121 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1122 } else {
1123 MinimizeReallocationSuccess = false;
1124 break;
1125 }
1126 }
1127 }
1128
1129 if (MinimizeReallocationSuccess) {
1130 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
1131 // budget proportionally across its planes
1132 l->ResidualDETAfterRounding = MaxTotalDETInKByte;
1133
1134 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1135 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1136 l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
1137 * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
1138
1139 if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
1140 l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
1141 if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
1142 l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
1143
1144 /* split the additional budgeted DET among the pipes per plane */
1145 DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
1146 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
1147 }
1148
1149 // Round down to segment size
1150 DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
1151
1152 l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1153 }
1154 }
1155 }
1156 }
1157
1158 if (!MinimizeReallocationSuccess) {
1159 l->TotalBandwidth = 0;
1160 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1161 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1162 l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1163 }
1164 }
1165 DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1166 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1167 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1168 }
1169 DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1170 DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
1171 l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
1172 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1173
1174 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1175 DETPieceAssignedToThisSurfaceAlready[k] = true;
1176 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
1177 DETPieceAssignedToThisSurfaceAlready[k] = true;
1178 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1179 } else {
1180 DETPieceAssignedToThisSurfaceAlready[k] = false;
1181 }
1182 DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
1183 DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
1184 }
1185
1186 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
1187 NextPotentialSurfaceToAssignDETPieceFound = false;
1188 l->NextSurfaceToAssignDETPiece = 0;
1189
1190 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1191 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
1192 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
1193 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1194 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1195 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
1196 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
1197 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
1198 l->NextSurfaceToAssignDETPiece = k;
1199 NextPotentialSurfaceToAssignDETPieceFound = true;
1200 }
1201 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1202 DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1203 }
1204
1205 if (NextPotentialSurfaceToAssignDETPieceFound) {
1206 l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
1207 math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
1208 ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
1209 * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
1210 math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
1211
1212 DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
1213 DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
1214 DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1215 DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1216 DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
1217 DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
1218 DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1219
1220 DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
1221 DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1222
1223 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
1224 DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
1225 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1226 }
1227 }
1228 }
1229 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1230 }
1231 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
1232
1233 DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1234 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
1235 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1236 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1237 }
1238 }
1239
CalculateRequiredDispclk(enum dml2_odm_mode ODMMode,double PixelClock,bool isTMDS420)1240 static double CalculateRequiredDispclk(
1241 enum dml2_odm_mode ODMMode,
1242 double PixelClock,
1243 bool isTMDS420)
1244 {
1245 double DispClk;
1246
1247 if (ODMMode == dml2_odm_mode_combine_4to1) {
1248 DispClk = PixelClock / 4.0;
1249 } else if (ODMMode == dml2_odm_mode_combine_3to1) {
1250 DispClk = PixelClock / 3.0;
1251 } else if (ODMMode == dml2_odm_mode_combine_2to1) {
1252 DispClk = PixelClock / 2.0;
1253 } else {
1254 DispClk = PixelClock;
1255 }
1256
1257 if (isTMDS420) {
1258 double TMDS420MinPixClock = PixelClock / 2.0;
1259 DispClk = math_max2(DispClk, TMDS420MinPixClock);
1260 }
1261
1262 return DispClk;
1263 }
1264
TruncToValidBPP(struct dml2_core_shared_TruncToValidBPP_locals * l,double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum dml2_output_encoder_class Output,enum dml2_output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,unsigned int * RequiredSlots)1265 static double TruncToValidBPP(
1266 struct dml2_core_shared_TruncToValidBPP_locals *l,
1267 double LinkBitRate,
1268 unsigned int Lanes,
1269 unsigned int HTotal,
1270 unsigned int HActive,
1271 double PixelClock,
1272 double DesiredBPP,
1273 bool DSCEnable,
1274 enum dml2_output_encoder_class Output,
1275 enum dml2_output_format_class Format,
1276 unsigned int DSCInputBitPerComponent,
1277 unsigned int DSCSlices,
1278 unsigned int AudioRate,
1279 unsigned int AudioLayout,
1280 enum dml2_odm_mode ODMModeNoDSC,
1281 enum dml2_odm_mode ODMModeDSC,
1282
1283 // Output
1284 unsigned int *RequiredSlots)
1285 {
1286 double MaxLinkBPP;
1287 unsigned int MinDSCBPP;
1288 double MaxDSCBPP;
1289 unsigned int NonDSCBPP0;
1290 unsigned int NonDSCBPP1;
1291 unsigned int NonDSCBPP2;
1292 enum dml2_odm_mode ODMMode;
1293
1294 if (Format == dml2_420) {
1295 NonDSCBPP0 = 12;
1296 NonDSCBPP1 = 15;
1297 NonDSCBPP2 = 18;
1298 MinDSCBPP = 6;
1299 MaxDSCBPP = 16;
1300 } else if (Format == dml2_444) {
1301 NonDSCBPP0 = 24;
1302 NonDSCBPP1 = 30;
1303 NonDSCBPP2 = 36;
1304 MinDSCBPP = 8;
1305 MaxDSCBPP = 16;
1306 } else {
1307
1308 if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
1309 NonDSCBPP0 = 24;
1310 NonDSCBPP1 = 24;
1311 NonDSCBPP2 = 24;
1312 } else {
1313 NonDSCBPP0 = 16;
1314 NonDSCBPP1 = 20;
1315 NonDSCBPP2 = 24;
1316 }
1317 if (Format == dml2_n422 || Output == dml2_hdmifrl) {
1318 MinDSCBPP = 7;
1319 MaxDSCBPP = 16;
1320 } else {
1321 MinDSCBPP = 8;
1322 MaxDSCBPP = 16;
1323 }
1324 }
1325
1326 if (Output == dml2_dp2p0) {
1327 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
1328 } else if (DSCEnable && Output == dml2_dp) {
1329 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
1330 } else {
1331 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
1332 }
1333
1334 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
1335
1336 if (ODMMode == dml2_odm_mode_split_1to2) {
1337 MaxLinkBPP = 2 * MaxLinkBPP;
1338 }
1339
1340 if (DesiredBPP == 0) {
1341 if (DSCEnable) {
1342 if (MaxLinkBPP < MinDSCBPP) {
1343 return __DML2_CALCS_DPP_INVALID__;
1344 } else if (MaxLinkBPP >= MaxDSCBPP) {
1345 return MaxDSCBPP;
1346 } else {
1347 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
1348 }
1349 } else {
1350 if (MaxLinkBPP >= NonDSCBPP2) {
1351 return NonDSCBPP2;
1352 } else if (MaxLinkBPP >= NonDSCBPP1) {
1353 return NonDSCBPP1;
1354 } else if (MaxLinkBPP >= NonDSCBPP0) {
1355 return NonDSCBPP0;
1356 } else {
1357 return __DML2_CALCS_DPP_INVALID__;
1358 }
1359 }
1360 } else {
1361 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
1362 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
1363 return __DML2_CALCS_DPP_INVALID__;
1364 } else {
1365 return DesiredBPP;
1366 }
1367 }
1368 }
1369
1370 // updated for dcn4
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1371 static unsigned int dscceComputeDelay(
1372 unsigned int bpc,
1373 double BPP,
1374 unsigned int sliceWidth,
1375 unsigned int numSlices,
1376 enum dml2_output_format_class pixelFormat,
1377 enum dml2_output_encoder_class Output)
1378 {
1379 // valid bpc = source bits per component in the set of {8, 10, 12}
1380 // valid bpp = increments of 1/16 of a bit
1381 // min = 6/7/8 in N420/N422/444, respectively
1382 // max = such that compression is 1:1
1383 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
1384 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
1385 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
1386
1387 // fixed value
1388 unsigned int rcModelSize = 8192;
1389
1390 // N422/N420 operate at 2 pixels per clock
1391 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
1392
1393 if (pixelFormat == dml2_420)
1394 pixelsPerClock = 2;
1395 // #all other modes operate at 1 pixel per clock
1396 else if (pixelFormat == dml2_444)
1397 pixelsPerClock = 1;
1398 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1399 pixelsPerClock = 2;
1400 else
1401 pixelsPerClock = 1;
1402
1403 //initial transmit delay as per PPS
1404 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
1405
1406 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
1407 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
1408
1409 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
1410
1411 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
1412 if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
1413 initial_xmit_delay++;
1414 }
1415 }
1416
1417 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
1418 if (bpc == 8)
1419 ssm_group_priming_delay = 83;
1420 else if (bpc == 10)
1421 ssm_group_priming_delay = 91;
1422 else if (bpc == 12)
1423 ssm_group_priming_delay = 115;
1424 else if (bpc == 14)
1425 ssm_group_priming_delay = 123;
1426 else
1427 ssm_group_priming_delay = 128;
1428
1429 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
1430 slice_width_groups = (slice_width_modified + 2) / 3;
1431
1432 //determine number of padded pixels in the last group of a slice line, computed as
1433 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
1434
1435 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
1436 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
1437
1438 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
1439 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
1440 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
1441
1442 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
1443 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
1444
1445 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
1446 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
1447
1448 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
1449 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
1450 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
1451
1452 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
1453 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
1454 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
1455
1456 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
1457 ssm_pipeline_delay = 2;
1458
1459 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
1460 obsm_pipeline_delay = 1;
1461
1462 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
1463 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1464 cycles_per_group = 6;
1465 else
1466 cycles_per_group = 3;
1467 //delay of the bit stream contruction layer in pixels is the sum of:
1468 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
1469 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
1470 //3. additional group of delay if initial transmit delay is reached exactly in a group
1471 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
1472 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
1473 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
1474
1475 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
1476 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
1477
1478 DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc);
1479 DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP);
1480 DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
1481 DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices);
1482 DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
1483 DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output);
1484 DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels);
1485 return pixels;
1486 }
1487
1488 //updated in dcn4
dscComputeDelay(enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1489 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
1490 {
1491 unsigned int Delay = 0;
1492 unsigned int dispclk_per_dscclk = 3;
1493
1494 // sfr
1495 Delay = Delay + 2;
1496
1497 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1498 dispclk_per_dscclk = 3 * 2;
1499 }
1500
1501 if (pixelFormat == dml2_420) {
1502 //dscc top delay for pixel compression layer
1503 Delay = Delay + 16 * dispclk_per_dscclk;
1504
1505 // dscc - input deserializer
1506 Delay = Delay + 5;
1507
1508 // dscc - input cdc fifo
1509 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1510
1511 // dscc - output cdc fifo
1512 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1513
1514 // dscc - cdc uncertainty
1515 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1516 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1517 //dscc top delay for pixel compression layer
1518 Delay = Delay + 16 * dispclk_per_dscclk;
1519 // dsccif
1520 Delay = Delay + 1;
1521 // dscc - input deserializer
1522 Delay = Delay + 5;
1523 // dscc - input cdc fifo
1524 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1525
1526
1527 // dscc - output cdc fifo
1528 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1529 // dscc - cdc uncertainty
1530 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1531 } else if (pixelFormat == dml2_s422) {
1532 //dscc top delay for pixel compression layer
1533 Delay = Delay + 17 * dispclk_per_dscclk;
1534
1535 // dscc - input deserializer
1536 Delay = Delay + 3;
1537 // dscc - input cdc fifo
1538 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1539 // dscc - output cdc fifo
1540 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1541 // dscc - cdc uncertainty
1542 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1543 } else {
1544 //dscc top delay for pixel compression layer
1545 Delay = Delay + 16 * dispclk_per_dscclk;
1546 // dscc - input deserializer
1547 Delay = Delay + 3;
1548 // dscc - input cdc fifo
1549 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1550 // dscc - output cdc fifo
1551 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1552
1553 // dscc - cdc uncertainty
1554 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1555 }
1556
1557 // sft
1558 Delay = Delay + 1;
1559 DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
1560 DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay);
1561
1562 return Delay;
1563 }
1564
CalculateHostVMDynamicLevels(bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)1565 static unsigned int CalculateHostVMDynamicLevels(
1566 bool GPUVMEnable,
1567 bool HostVMEnable,
1568 unsigned int HostVMMinPageSize,
1569 unsigned int HostVMMaxNonCachedPageTableLevels)
1570 {
1571 unsigned int HostVMDynamicLevels = 0;
1572
1573 if (GPUVMEnable && HostVMEnable) {
1574 if (HostVMMinPageSize < 2048)
1575 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1576 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
1577 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
1578 else
1579 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
1580 } else {
1581 HostVMDynamicLevels = 0;
1582 }
1583 return HostVMDynamicLevels;
1584 }
1585
CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params * p)1586 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
1587 {
1588 unsigned int extra_dpde_bytes;
1589 unsigned int extra_mpde_bytes;
1590 unsigned int MacroTileSizeBytes;
1591 unsigned int vp_height_dpte_ub;
1592
1593 unsigned int meta_surface_bytes;
1594 unsigned int vm_bytes;
1595 unsigned int vp_height_meta_ub;
1596 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
1597
1598 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
1599 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
1600 if (p->SurfaceTiling == dml2_sw_linear) {
1601 *p->meta_row_height = 32;
1602 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1603 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
1604 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1605 *p->meta_row_height = *p->MetaRequestHeight;
1606 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1607 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1608 } else {
1609 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
1610 }
1611 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
1612 } else {
1613 *p->meta_row_height = *p->MetaRequestWidth;
1614 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1615 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
1616 } else {
1617 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
1618 }
1619 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
1620 }
1621
1622 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1623 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
1624 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1625 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1626 } else {
1627 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1628 }
1629
1630 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
1631 DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
1632 DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
1633 if (p->GPUVMEnable == true) {
1634 double meta_vmpg_bytes = 4.0 * 1024.0;
1635 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
1636 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
1637 } else {
1638 *p->meta_pte_bytes_per_frame_ub = 0;
1639 extra_mpde_bytes = 0;
1640 }
1641
1642 if (!p->DCCEnable || !p->mrq_present) {
1643 *p->meta_pte_bytes_per_frame_ub = 0;
1644 extra_mpde_bytes = 0;
1645 *p->meta_row_bytes = 0;
1646 }
1647
1648 if (!p->GPUVMEnable) {
1649 *p->PixelPTEBytesPerRow = 0;
1650 *p->PixelPTEBytesPerRowStorage = 0;
1651 *p->dpte_row_width_ub = 0;
1652 *p->dpte_row_height = 0;
1653 *p->dpte_row_height_linear = 0;
1654 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1655 *p->dpte_row_width_ub_one_row_per_frame = 0;
1656 *p->dpte_row_height_one_row_per_frame = 0;
1657 *p->vmpg_width = 0;
1658 *p->vmpg_height = 0;
1659 *p->PixelPTEReqWidth = 0;
1660 *p->PixelPTEReqHeight = 0;
1661 *p->PTERequestSize = 0;
1662 *p->dpde0_bytes_per_frame_ub = 0;
1663 return 0;
1664 }
1665
1666 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
1667
1668 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1669 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
1670 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1671 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
1672 } else {
1673 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
1674 }
1675
1676 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
1677 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
1678 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
1679 } else {
1680 *p->dpde0_bytes_per_frame_ub = 0;
1681 extra_dpde_bytes = 0;
1682 }
1683
1684 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
1685
1686 DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
1687 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1688 DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
1689 DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
1690 DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
1691 DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
1692 DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
1693 DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
1694 DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
1695 DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
1696 DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
1697 DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
1698 DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
1699 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
1700 DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
1701 DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
1702 DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
1703
1704 if (p->SurfaceTiling == dml2_sw_linear) {
1705 *p->PixelPTEReqHeight = 1;
1706 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1707 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1708 *p->PTERequestSize = 64;
1709
1710 *p->vmpg_height = 1;
1711 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
1712 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
1713 *p->PixelPTEReqHeight = p->MacroTileHeight;
1714 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1715 *p->PTERequestSize = 64;
1716
1717 *p->vmpg_height = p->MacroTileHeight;
1718 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1719
1720 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
1721 // one 64KB tile, is 16x16x256B req
1722 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
1723 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
1724 *p->PTERequestSize = 128;
1725
1726 *p->vmpg_height = *p->PixelPTEReqHeight;
1727 *p->vmpg_width = *p->PixelPTEReqWidth;
1728 } else {
1729 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
1730 *p->PixelPTEReqHeight = p->MacroTileHeight;
1731 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1732 *p->PTERequestSize = 64;
1733
1734 *p->vmpg_height = p->MacroTileHeight;
1735 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1736
1737 if (p->GPUVMEnable == true) {
1738 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
1739 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
1740 DML_ASSERT(0);
1741 }
1742 }
1743
1744 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1745 DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
1746 DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
1747 DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
1748 DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
1749 DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch);
1750 DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
1751 DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
1752
1753 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
1754 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
1755 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1756 *p->dpte_row_height_linear = 0;
1757
1758 if (p->SurfaceTiling == dml2_sw_linear) {
1759 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
1760 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
1761 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1762
1763 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
1764 *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
1765 if (*p->dpte_row_height_linear > 128)
1766 *p->dpte_row_height_linear = 128;
1767
1768 #ifdef __DML_VBA_DEBUG__
1769 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
1770 #endif
1771
1772 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1773 *p->dpte_row_height = *p->PixelPTEReqHeight;
1774
1775 if (p->GPUVMMinPageSizeKBytes > 64) {
1776 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
1777 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1778 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
1779 } else {
1780 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
1781 }
1782 #ifdef __DML_VBA_DEBUG__
1783 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
1784 #endif
1785
1786 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
1787 } else {
1788 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
1789
1790 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1791 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
1792 } else {
1793 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
1794 }
1795
1796 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
1797 #ifdef __DML_VBA_DEBUG__
1798 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
1799 #endif
1800 }
1801
1802 if (p->GPUVMEnable != true) {
1803 *p->PixelPTEBytesPerRow = 0;
1804 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1805 }
1806
1807 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
1808
1809 #ifdef __DML_VBA_DEBUG__
1810 DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1811 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1812 DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
1813 DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
1814 DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
1815 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
1816 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
1817 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
1818 DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
1819 DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
1820 DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
1821 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
1822 #endif
1823
1824 return vm_bytes;
1825 } // CalculateVMAndRowBytes
1826
CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dml2_rotation_angle RotationAngle,bool mirrored,bool ViewportStationary,unsigned int SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,unsigned int * VInitPreFill,unsigned int * MaxNumSwath)1827 static unsigned int CalculatePrefetchSourceLines(
1828 double VRatio,
1829 unsigned int VTaps,
1830 bool Interlace,
1831 bool ProgressiveToInterlaceUnitInOPP,
1832 unsigned int SwathHeight,
1833 enum dml2_rotation_angle RotationAngle,
1834 bool mirrored,
1835 bool ViewportStationary,
1836 unsigned int SwathWidth,
1837 unsigned int ViewportHeight,
1838 unsigned int ViewportXStart,
1839 unsigned int ViewportYStart,
1840
1841 // Output
1842 unsigned int *VInitPreFill,
1843 unsigned int *MaxNumSwath)
1844 {
1845
1846 unsigned int vp_start_rot = 0;
1847 unsigned int sw0_tmp = 0;
1848 unsigned int MaxPartialSwath = 0;
1849 double numLines = 0;
1850
1851 #ifdef __DML_VBA_DEBUG__
1852 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
1853 DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps);
1854 DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
1855 DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
1856 DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
1857 DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
1858 #endif
1859 if (ProgressiveToInterlaceUnitInOPP)
1860 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
1861 else
1862 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
1863
1864 if (ViewportStationary) {
1865 if (RotationAngle == dml2_rotation_180) {
1866 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
1867 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
1868 vp_start_rot = ViewportXStart;
1869 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
1870 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
1871 } else {
1872 vp_start_rot = ViewportYStart;
1873 }
1874 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
1875 if (sw0_tmp < *VInitPreFill) {
1876 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
1877 } else {
1878 *MaxNumSwath = 1;
1879 }
1880 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
1881 } else {
1882 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
1883 if (*VInitPreFill > 1) {
1884 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
1885 } else {
1886 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
1887 }
1888 }
1889 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
1890
1891 #ifdef __DML_VBA_DEBUG__
1892 DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
1893 DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
1894 DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
1895 DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
1896 DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
1897 #endif
1898 return (unsigned int)(numLines);
1899
1900 }
1901
CalculateRowBandwidth(bool GPUVMEnable,bool use_one_row_for_frame,enum dml2_source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,bool mrq_present,unsigned int meta_row_bytes_per_row_ub_l,unsigned int meta_row_bytes_per_row_ub_c,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,double * dpte_row_bw,double * meta_row_bw)1902 static void CalculateRowBandwidth(
1903 bool GPUVMEnable,
1904 bool use_one_row_for_frame,
1905 enum dml2_source_format_class SourcePixelFormat,
1906 double VRatio,
1907 double VRatioChroma,
1908 bool DCCEnable,
1909 double LineTime,
1910 unsigned int PixelPTEBytesPerRowLuma,
1911 unsigned int PixelPTEBytesPerRowChroma,
1912 unsigned int dpte_row_height_luma,
1913 unsigned int dpte_row_height_chroma,
1914
1915 bool mrq_present,
1916 unsigned int meta_row_bytes_per_row_ub_l,
1917 unsigned int meta_row_bytes_per_row_ub_c,
1918 unsigned int meta_row_height_luma,
1919 unsigned int meta_row_height_chroma,
1920
1921 // Output
1922 double *dpte_row_bw,
1923 double *meta_row_bw)
1924 {
1925 if (!DCCEnable || !mrq_present) {
1926 *meta_row_bw = 0;
1927 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1928 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
1929 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
1930 } else {
1931 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
1932 }
1933
1934 if (GPUVMEnable != true) {
1935 *dpte_row_bw = 0;
1936 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1937 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1938 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
1939 } else {
1940 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1941 }
1942 }
1943
CalculateMALLUseForStaticScreen(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool is_using_mall_for_ss[])1944 static void CalculateMALLUseForStaticScreen(
1945 const struct dml2_display_cfg *display_cfg,
1946 unsigned int NumberOfActiveSurfaces,
1947 unsigned int MALLAllocatedForDCN,
1948 unsigned int SurfaceSizeInMALL[],
1949 bool one_row_per_frame_fits_in_buffer[],
1950
1951 // Output
1952 bool is_using_mall_for_ss[])
1953 {
1954
1955 unsigned int SurfaceToAddToMALL;
1956 bool CanAddAnotherSurfaceToMALL;
1957 unsigned int TotalSurfaceSizeInMALL;
1958
1959 TotalSurfaceSizeInMALL = 0;
1960 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1961 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
1962 if (is_using_mall_for_ss[k])
1963 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1964 #ifdef __DML_VBA_DEBUG__
1965 DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
1966 DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
1967 #endif
1968 }
1969
1970 SurfaceToAddToMALL = 0;
1971 CanAddAnotherSurfaceToMALL = true;
1972 while (CanAddAnotherSurfaceToMALL) {
1973 CanAddAnotherSurfaceToMALL = false;
1974 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1975 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
1976 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
1977 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
1978 CanAddAnotherSurfaceToMALL = true;
1979 SurfaceToAddToMALL = k;
1980 DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
1981 }
1982 }
1983 if (CanAddAnotherSurfaceToMALL) {
1984 is_using_mall_for_ss[SurfaceToAddToMALL] = true;
1985 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
1986
1987 #ifdef __DML_VBA_DEBUG__
1988 DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
1989 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
1990 #endif
1991 }
1992 }
1993 }
1994
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum dml2_source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dml2_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dml2_rotation_angle RotationAngle,enum dml2_core_internal_request_type * RequestLuma,enum dml2_core_internal_request_type * RequestChroma,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1995 static void CalculateDCCConfiguration(
1996 bool DCCEnabled,
1997 bool DCCProgrammingAssumesScanDirectionUnknown,
1998 enum dml2_source_format_class SourcePixelFormat,
1999 unsigned int SurfaceWidthLuma,
2000 unsigned int SurfaceWidthChroma,
2001 unsigned int SurfaceHeightLuma,
2002 unsigned int SurfaceHeightChroma,
2003 unsigned int nomDETInKByte,
2004 unsigned int RequestHeight256ByteLuma,
2005 unsigned int RequestHeight256ByteChroma,
2006 enum dml2_swizzle_mode TilingFormat,
2007 unsigned int BytePerPixelY,
2008 unsigned int BytePerPixelC,
2009 double BytePerPixelDETY,
2010 double BytePerPixelDETC,
2011 enum dml2_rotation_angle RotationAngle,
2012
2013 // Output
2014 enum dml2_core_internal_request_type *RequestLuma,
2015 enum dml2_core_internal_request_type *RequestChroma,
2016 unsigned int *MaxUncompressedBlockLuma,
2017 unsigned int *MaxUncompressedBlockChroma,
2018 unsigned int *MaxCompressedBlockLuma,
2019 unsigned int *MaxCompressedBlockChroma,
2020 unsigned int *IndependentBlockLuma,
2021 unsigned int *IndependentBlockChroma)
2022 {
2023 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
2024
2025 unsigned int segment_order_horz_contiguous_luma;
2026 unsigned int segment_order_horz_contiguous_chroma;
2027 unsigned int segment_order_vert_contiguous_luma;
2028 unsigned int segment_order_vert_contiguous_chroma;
2029
2030 unsigned int req128_horz_wc_l;
2031 unsigned int req128_horz_wc_c;
2032 unsigned int req128_vert_wc_l;
2033 unsigned int req128_vert_wc_c;
2034
2035 unsigned int yuv420;
2036 unsigned int horz_div_l;
2037 unsigned int horz_div_c;
2038 unsigned int vert_div_l;
2039 unsigned int vert_div_c;
2040
2041 unsigned int swath_buf_size;
2042 double detile_buf_vp_horz_limit;
2043 double detile_buf_vp_vert_limit;
2044
2045 unsigned int MAS_vp_horz_limit;
2046 unsigned int MAS_vp_vert_limit;
2047 unsigned int max_vp_horz_width;
2048 unsigned int max_vp_vert_height;
2049 unsigned int eff_surf_width_l;
2050 unsigned int eff_surf_width_c;
2051 unsigned int eff_surf_height_l;
2052 unsigned int eff_surf_height_c;
2053
2054 unsigned int full_swath_bytes_horz_wc_l;
2055 unsigned int full_swath_bytes_horz_wc_c;
2056 unsigned int full_swath_bytes_vert_wc_l;
2057 unsigned int full_swath_bytes_vert_wc_c;
2058
2059 if (dml_is_420(SourcePixelFormat))
2060 yuv420 = 1;
2061 else
2062 yuv420 = 0;
2063 horz_div_l = 1;
2064 horz_div_c = 1;
2065 vert_div_l = 1;
2066 vert_div_c = 1;
2067
2068 if (BytePerPixelY == 1)
2069 vert_div_l = 0;
2070 if (BytePerPixelC == 1)
2071 vert_div_c = 0;
2072
2073 if (BytePerPixelC == 0) {
2074 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2075 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2076 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2077 } else {
2078 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2079 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2080 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2081 }
2082
2083 if (SourcePixelFormat == dml2_420_10) {
2084 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2085 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2086 }
2087
2088 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
2089 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
2090
2091 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
2092 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2093 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2094 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2095 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2096 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2097 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2098 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2099
2100 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2101 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2102 if (BytePerPixelC > 0) {
2103 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2104 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2105 } else {
2106 full_swath_bytes_horz_wc_c = 0;
2107 full_swath_bytes_vert_wc_c = 0;
2108 }
2109
2110 if (SourcePixelFormat == dml2_420_10) {
2111 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2112 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2113 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2114 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2115 }
2116
2117 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2118 req128_horz_wc_l = 0;
2119 req128_horz_wc_c = 0;
2120 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2121 req128_horz_wc_l = 0;
2122 req128_horz_wc_c = 1;
2123 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2124 req128_horz_wc_l = 1;
2125 req128_horz_wc_c = 0;
2126 } else {
2127 req128_horz_wc_l = 1;
2128 req128_horz_wc_c = 1;
2129 }
2130
2131 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2132 req128_vert_wc_l = 0;
2133 req128_vert_wc_c = 0;
2134 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2135 req128_vert_wc_l = 0;
2136 req128_vert_wc_c = 1;
2137 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2138 req128_vert_wc_l = 1;
2139 req128_vert_wc_c = 0;
2140 } else {
2141 req128_vert_wc_l = 1;
2142 req128_vert_wc_c = 1;
2143 }
2144
2145 if (BytePerPixelY == 2) {
2146 segment_order_horz_contiguous_luma = 0;
2147 segment_order_vert_contiguous_luma = 1;
2148 } else {
2149 segment_order_horz_contiguous_luma = 1;
2150 segment_order_vert_contiguous_luma = 0;
2151 }
2152
2153 if (BytePerPixelC == 2) {
2154 segment_order_horz_contiguous_chroma = 0;
2155 segment_order_vert_contiguous_chroma = 1;
2156 } else {
2157 segment_order_horz_contiguous_chroma = 1;
2158 segment_order_vert_contiguous_chroma = 0;
2159 }
2160 #ifdef __DML_VBA_DEBUG__
2161 DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2162 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2163 DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2164 DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2165 DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2166 DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2167 DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2168 DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2169 DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2170 #endif
2171 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2172 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2173 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2174 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2175 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2176 } else {
2177 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2178 }
2179 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2180 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2181 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2182 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2183 } else {
2184 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2185 }
2186 } else if (!dml_is_vertical_rotation(RotationAngle)) {
2187 if (req128_horz_wc_l == 0) {
2188 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2189 } else if (segment_order_horz_contiguous_luma == 0) {
2190 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2191 } else {
2192 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2193 }
2194 if (req128_horz_wc_c == 0) {
2195 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2196 } else if (segment_order_horz_contiguous_chroma == 0) {
2197 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2198 } else {
2199 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2200 }
2201 } else {
2202 if (req128_vert_wc_l == 0) {
2203 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2204 } else if (segment_order_vert_contiguous_luma == 0) {
2205 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2206 } else {
2207 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2208 }
2209 if (req128_vert_wc_c == 0) {
2210 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2211 } else if (segment_order_vert_contiguous_chroma == 0) {
2212 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2213 } else {
2214 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2215 }
2216 }
2217
2218 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
2219 *MaxUncompressedBlockLuma = 256;
2220 *MaxCompressedBlockLuma = 256;
2221 *IndependentBlockLuma = 0;
2222 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
2223 *MaxUncompressedBlockLuma = 256;
2224 *MaxCompressedBlockLuma = 128;
2225 *IndependentBlockLuma = 128;
2226 } else {
2227 *MaxUncompressedBlockLuma = 256;
2228 *MaxCompressedBlockLuma = 64;
2229 *IndependentBlockLuma = 64;
2230 }
2231
2232 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
2233 *MaxUncompressedBlockChroma = 256;
2234 *MaxCompressedBlockChroma = 256;
2235 *IndependentBlockChroma = 0;
2236 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
2237 *MaxUncompressedBlockChroma = 256;
2238 *MaxCompressedBlockChroma = 128;
2239 *IndependentBlockChroma = 128;
2240 } else {
2241 *MaxUncompressedBlockChroma = 256;
2242 *MaxCompressedBlockChroma = 64;
2243 *IndependentBlockChroma = 64;
2244 }
2245
2246 if (DCCEnabled != true || BytePerPixelC == 0) {
2247 *MaxUncompressedBlockChroma = 0;
2248 *MaxCompressedBlockChroma = 0;
2249 *IndependentBlockChroma = 0;
2250 }
2251
2252 if (DCCEnabled != true) {
2253 *MaxUncompressedBlockLuma = 0;
2254 *MaxCompressedBlockLuma = 0;
2255 *IndependentBlockLuma = 0;
2256 }
2257
2258 #ifdef __DML_VBA_DEBUG__
2259 DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2260 DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2261 DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2262 DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2263 DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2264 DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2265 #endif
2266
2267 }
2268
calculate_mcache_row_bytes(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_row_bytes_params * p)2269 static void calculate_mcache_row_bytes(
2270 struct dml2_core_internal_scratch *scratch,
2271 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
2272 {
2273 unsigned int vmpg_bytes = 0;
2274 unsigned int blk_bytes = 0;
2275 float meta_per_mvmpg_per_channel = 0;
2276 unsigned int est_blk_per_vmpg = 2;
2277 unsigned int mvmpg_per_row_ub = 0;
2278 unsigned int full_vp_width_mvmpg_aligned = 0;
2279 unsigned int full_vp_height_mvmpg_aligned = 0;
2280 unsigned int meta_per_mvmpg_per_channel_ub = 0;
2281 unsigned int mvmpg_per_mcache;
2282
2283 #ifdef __DML_VBA_DEBUG__
2284 DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans);
2285 DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
2286 DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
2287 DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
2288 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2289 DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
2290 DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
2291 DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
2292 DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
2293 DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
2294 DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
2295 DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
2296 DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width);
2297 DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height);
2298 DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
2299 DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
2300 DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
2301 #endif
2302 DML_ASSERT(p->mcache_line_size_bytes != 0);
2303 DML_ASSERT(p->mcache_size_bytes != 0);
2304
2305 *p->mvmpg_width = 0;
2306 *p->mvmpg_height = 0;
2307
2308 if (p->full_vp_height == 0 && p->full_vp_width == 0) {
2309 *p->num_mcaches = 0;
2310 *p->mcache_row_bytes = 0;
2311 *p->mcache_row_bytes_per_channel = 0;
2312 } else {
2313 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
2314
2315 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
2316 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
2317
2318 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
2319 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
2320 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
2321 *p->mvmpg_width = p->blk_width;
2322 *p->mvmpg_height = p->blk_height;
2323 if (p->gpuvm_enable) {
2324 if (vmpg_bytes >= blk_bytes) {
2325 *p->mvmpg_width = p->vmpg_width;
2326 *p->mvmpg_height = p->vmpg_height;
2327 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
2328 DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
2329 DML_ASSERT(0);
2330 }
2331 }
2332
2333 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
2334 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
2335 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
2336
2337 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
2338
2339 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
2340 if (!p->surf_vert) { //horizontal access
2341 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
2342 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
2343 else
2344 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
2345 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
2346 } else { //vertical access
2347 if (p->vp_stationary == 1)
2348 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
2349 else
2350 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
2351 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
2352 }
2353
2354 if (p->gpuvm_enable) {
2355 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
2356
2357 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
2358 if (p->surf_vert && vmpg_bytes > blk_bytes) {
2359 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
2360 }
2361
2362 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
2363 } else {
2364 meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
2365
2366 if (!p->surf_vert)
2367 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
2368 else
2369 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
2370 }
2371
2372 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
2373
2374 //but for 4KB vmpg with 64KB tile blk
2375 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
2376 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
2377
2378 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
2379 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
2380 if (p->gpuvm_enable || p->surf_vert) {
2381 *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
2382 *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans;
2383 } else { // horizontal and gpuvm disable
2384 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
2385 if (p->mcache_line_size_bytes != 0)
2386 *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
2387 }
2388
2389 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
2390 if (p->mcache_size_bytes != 0)
2391 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1);
2392
2393 mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
2394 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
2395
2396 #ifdef __DML_VBA_DEBUG__
2397 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2398 DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
2399 DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
2400 DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
2401 DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
2402 DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
2403 DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
2404 DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
2405 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
2406 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
2407 #endif
2408 }
2409
2410 #ifdef __DML_VBA_DEBUG__
2411 DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
2412 DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
2413 DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
2414 #endif
2415 DML_ASSERT(*p->num_mcaches > 0);
2416 }
2417
calculate_mcache_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_setting_params * p)2418 static void calculate_mcache_setting(
2419 struct dml2_core_internal_scratch *scratch,
2420 struct dml2_core_calcs_calculate_mcache_setting_params *p)
2421 {
2422 unsigned int n;
2423
2424 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
2425 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
2426
2427 *p->num_mcaches_l = 0;
2428 *p->mcache_row_bytes_l = 0;
2429 *p->mcache_row_bytes_per_channel_l = 0;
2430 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
2431 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
2432
2433 *p->num_mcaches_c = 0;
2434 *p->mcache_row_bytes_c = 0;
2435 *p->mcache_row_bytes_per_channel_c = 0;
2436 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
2437 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
2438
2439 *p->mall_comb_mcache_l = 0;
2440 *p->mall_comb_mcache_c = 0;
2441 *p->lc_comb_mcache = 0;
2442
2443 if (!p->dcc_enable)
2444 return;
2445
2446 l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
2447
2448 l->l_p.num_chans = p->num_chans;
2449 l->l_p.mem_word_bytes = p->mem_word_bytes;
2450 l->l_p.mcache_size_bytes = p->mcache_size_bytes;
2451 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2452 l->l_p.gpuvm_enable = p->gpuvm_enable;
2453 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2454 l->l_p.surf_vert = p->surf_vert;
2455 l->l_p.vp_stationary = p->vp_stationary;
2456 l->l_p.tiling_mode = p->tiling_mode;
2457 l->l_p.vp_start_x = p->vp_start_x_l;
2458 l->l_p.vp_start_y = p->vp_start_y_l;
2459 l->l_p.full_vp_width = p->full_vp_width_l;
2460 l->l_p.full_vp_height = p->full_vp_height_l;
2461 l->l_p.blk_width = p->blk_width_l;
2462 l->l_p.blk_height = p->blk_height_l;
2463 l->l_p.vmpg_width = p->vmpg_width_l;
2464 l->l_p.vmpg_height = p->vmpg_height_l;
2465 l->l_p.full_swath_bytes = p->full_swath_bytes_l;
2466 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
2467
2468 // output
2469 l->l_p.num_mcaches = p->num_mcaches_l;
2470 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
2471 l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l;
2472 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
2473 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
2474 l->l_p.mvmpg_width = &l->mvmpg_width_l;
2475 l->l_p.mvmpg_height = &l->mvmpg_height_l;
2476 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
2477 l->l_p.meta_row_width_ub = &l->meta_row_width_l;
2478 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
2479
2480 calculate_mcache_row_bytes(scratch, &l->l_p);
2481 DML_ASSERT(*p->num_mcaches_l > 0);
2482
2483 if (l->is_dual_plane) {
2484 l->c_p.num_chans = p->num_chans;
2485 l->c_p.mem_word_bytes = p->mem_word_bytes;
2486 l->c_p.mcache_size_bytes = p->mcache_size_bytes;
2487 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2488 l->c_p.gpuvm_enable = p->gpuvm_enable;
2489 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2490 l->c_p.surf_vert = p->surf_vert;
2491 l->c_p.vp_stationary = p->vp_stationary;
2492 l->c_p.tiling_mode = p->tiling_mode;
2493 l->c_p.vp_start_x = p->vp_start_x_c;
2494 l->c_p.vp_start_y = p->vp_start_y_c;
2495 l->c_p.full_vp_width = p->full_vp_width_c;
2496 l->c_p.full_vp_height = p->full_vp_height_c;
2497 l->c_p.blk_width = p->blk_width_c;
2498 l->c_p.blk_height = p->blk_height_c;
2499 l->c_p.vmpg_width = p->vmpg_width_c;
2500 l->c_p.vmpg_height = p->vmpg_height_c;
2501 l->c_p.full_swath_bytes = p->full_swath_bytes_c;
2502 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
2503
2504 // output
2505 l->c_p.num_mcaches = p->num_mcaches_c;
2506 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
2507 l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c;
2508 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
2509 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
2510 l->c_p.mvmpg_width = &l->mvmpg_width_c;
2511 l->c_p.mvmpg_height = &l->mvmpg_height_c;
2512 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
2513 l->c_p.meta_row_width_ub = &l->meta_row_width_c;
2514 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
2515
2516 calculate_mcache_row_bytes(scratch, &l->c_p);
2517 DML_ASSERT(*p->num_mcaches_c > 0);
2518 }
2519
2520 // Sharing for iMALL access
2521 l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes;
2522 l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes;
2523 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
2524 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
2525
2526 if (p->imall_enable) {
2527 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
2528
2529 if (l->is_dual_plane)
2530 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
2531 }
2532
2533 if (!p->surf_vert) // horizonatal access
2534 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
2535 else // vertical access
2536 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
2537
2538 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
2539 if (*p->num_mcaches_l) {
2540 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
2541 }
2542 if (l->is_dual_plane) {
2543 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
2544
2545 /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */
2546 if (l->mcache_remainder_l && l->mcache_remainder_c) {
2547 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
2548 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
2549 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
2550 }
2551 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
2552 }
2553 }
2554
2555 #ifdef __DML_VBA_DEBUG__
2556 DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
2557 DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
2558 DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
2559 DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
2560 DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
2561 DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
2562 DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
2563 DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
2564 DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
2565 DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
2566
2567 if (l->is_dual_plane) {
2568 DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
2569 DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
2570 DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
2571 DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
2572 DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
2573 DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
2574 DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
2575 DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
2576 DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
2577 DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
2578 }
2579 #endif
2580 // calculate split_coordinate
2581 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
2582 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
2583
2584 for (n = 0; n < *p->num_mcaches_l - 1; n++) {
2585 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
2586 }
2587 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2588
2589 if (l->is_dual_plane) {
2590 for (n = 0; n < *p->num_mcaches_c - 1; n++) {
2591 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
2592 }
2593 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2594 }
2595 #ifdef __DML_VBA_DEBUG__
2596 for (n = 0; n < *p->num_mcaches_l; n++)
2597 DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2598
2599 if (l->is_dual_plane) {
2600 for (n = 0; n < *p->num_mcaches_c; n++)
2601 DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2602 }
2603 #endif
2604
2605 // Luma/Chroma combine in the last mcache
2606 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
2607 if (*p->lc_comb_mcache && l->is_dual_plane) {
2608 for (n = 0; n < *p->num_mcaches_l - 1; n++)
2609 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
2610 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2611
2612 for (n = 0; n < *p->num_mcaches_c - 1; n++)
2613 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
2614 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2615
2616 #ifdef __DML_VBA_DEBUG__
2617 for (n = 0; n < *p->num_mcaches_l; n++)
2618 DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2619
2620 for (n = 0; n < *p->num_mcaches_c; n++)
2621 DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2622 #endif
2623 }
2624
2625 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
2626 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
2627 }
2628
calculate_mall_bw_overhead_factor(double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes)2629 static void calculate_mall_bw_overhead_factor(
2630 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
2631 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
2632
2633 // input
2634 const struct dml2_display_cfg *display_cfg,
2635 unsigned int num_active_planes)
2636 {
2637 for (unsigned int k = 0; k < num_active_planes; ++k) {
2638 mall_prefetch_sdp_overhead_factor[k] = 1.0;
2639 mall_prefetch_dram_overhead_factor[k] = 1.0;
2640
2641 // SDP - on the return side
2642 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
2643 mall_prefetch_sdp_overhead_factor[k] = 1.25;
2644 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
2645 mall_prefetch_sdp_overhead_factor[k] = 0.25;
2646
2647 // DRAM
2648 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
2649 mall_prefetch_dram_overhead_factor[k] = 2.0;
2650
2651 #ifdef __DML_VBA_DEBUG__
2652 DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
2653 DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
2654 #endif
2655 }
2656 }
2657
dml_get_return_bandwidth_available(const struct dml2_soc_bb * soc,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool is_avg_bw,bool is_hvm_en,bool is_hvm_only,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2658 static double dml_get_return_bandwidth_available(
2659 const struct dml2_soc_bb *soc,
2660 enum dml2_core_internal_soc_state_type state_type,
2661 enum dml2_core_internal_bw_type bw_type,
2662 bool is_avg_bw,
2663 bool is_hvm_en,
2664 bool is_hvm_only,
2665 double dcfclk_mhz,
2666 double fclk_mhz,
2667 double dram_bw_mbps)
2668 {
2669 double return_bw_mbps = 0.;
2670 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
2671 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
2672 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
2673
2674 double derate_sdp_factor;
2675 double derate_fabric_factor;
2676 double derate_dram_factor;
2677
2678 double derate_sdp_bandwidth;
2679 double derate_fabric_bandwidth;
2680 double derate_dram_bandwidth;
2681
2682 if (is_avg_bw) {
2683 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2684 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
2685 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
2686 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
2687 } else { // just assume sys_active
2688 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
2689 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
2690 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
2691 }
2692 } else { // urgent bw
2693 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2694 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
2695 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
2696 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2697
2698 if (is_hvm_en) {
2699 if (is_hvm_only)
2700 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
2701 else
2702 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2703 } else {
2704 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2705 }
2706 } else { // just assume sys_active
2707 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
2708 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
2709
2710 if (is_hvm_en) {
2711 if (is_hvm_only)
2712 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
2713 else
2714 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2715 } else {
2716 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
2717 }
2718 }
2719 }
2720
2721 derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
2722 derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
2723 derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
2724
2725 if (bw_type == dml2_core_internal_bw_sdp)
2726 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
2727 else // dml2_core_internal_bw_dram
2728 return_bw_mbps = derate_dram_bandwidth;
2729
2730 DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
2731 DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
2732 DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
2733 DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
2734 DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
2735 DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2736 DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2737 DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
2738 DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
2739 DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
2740 DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
2741 DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
2742 DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
2743 DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
2744 return return_bw_mbps;
2745 }
2746
calculate_bandwidth_available(double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_min[dml2_core_internal_soc_state_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],const struct dml2_soc_bb * soc,bool HostVMEnable,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2747 static noinline_for_stack void calculate_bandwidth_available(
2748 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
2749 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2750 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
2751 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2752 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
2753 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
2754
2755 const struct dml2_soc_bb *soc,
2756 bool HostVMEnable,
2757 double dcfclk_mhz,
2758 double fclk_mhz,
2759 double dram_bw_mbps)
2760 {
2761 unsigned int n, m;
2762
2763 DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2764 DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2765 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
2766
2767 // Calculate all the bandwidth availabe
2768 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2769 for (n = 0; n < dml2_core_internal_bw_max; n++) {
2770 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
2771 m, // soc_state
2772 n, // bw_type
2773 1, // avg_bw
2774 HostVMEnable,
2775 0, // hvm_only
2776 dcfclk_mhz,
2777 fclk_mhz,
2778 dram_bw_mbps);
2779
2780 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2781
2782
2783 #ifdef __DML_VBA_DEBUG__
2784 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
2785 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
2786 #endif
2787
2788 // urg_bandwidth_available_vm_only is indexed by soc_state
2789 if (n == dml2_core_internal_bw_dram) {
2790 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2791 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2792 }
2793 }
2794
2795 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2796 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2797
2798 #ifdef __DML_VBA_DEBUG__
2799 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
2800 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
2801 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
2802 #endif
2803 }
2804 }
2805
calculate_avg_bandwidth_required(double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double mall_prefetch_dram_overhead_factor[],double mall_prefetch_sdp_overhead_factor[])2806 static void calculate_avg_bandwidth_required(
2807 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2808
2809 // input
2810 const struct dml2_display_cfg *display_cfg,
2811 unsigned int num_active_planes,
2812 double ReadBandwidthLuma[],
2813 double ReadBandwidthChroma[],
2814 double cursor_bw[],
2815 double dcc_dram_bw_nom_overhead_factor_p0[],
2816 double dcc_dram_bw_nom_overhead_factor_p1[],
2817 double mall_prefetch_dram_overhead_factor[],
2818 double mall_prefetch_sdp_overhead_factor[])
2819 {
2820 unsigned int n, m, k;
2821 double sdp_overhead_factor;
2822 double dram_overhead_factor_p0;
2823 double dram_overhead_factor_p1;
2824
2825 // Average BW support check
2826 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2827 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
2828 avg_bandwidth_required[m][n] = 0;
2829 }
2830 }
2831
2832 // SysActive and SVP Prefetch AVG bandwidth Check
2833 for (k = 0; k < num_active_planes; ++k) {
2834 #ifdef __DML_VBA_DEBUG__
2835 DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k);
2836 DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
2837 DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
2838 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
2839 DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
2840 DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
2841 DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
2842 #endif
2843
2844 sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
2845 dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
2846 dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
2847
2848 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
2849 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
2850 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
2851 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2852 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2853 }
2854 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2855 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2856
2857 #ifdef __DML_VBA_DEBUG__
2858 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
2859 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
2860 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
2861 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
2862 #endif
2863 }
2864 }
2865
CalculateVMRowAndSwath(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateVMRowAndSwath_params * p)2866 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
2867 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
2868 {
2869 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
2870
2871 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
2872
2873 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2874 if (p->display_cfg->gpuvm_enable == true) {
2875 p->vm_group_bytes[k] = 512;
2876 p->dpte_group_bytes[k] = 512;
2877 } else {
2878 p->vm_group_bytes[k] = 0;
2879 p->dpte_group_bytes[k] = 0;
2880 }
2881
2882 if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
2883 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
2884 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
2885 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
2886 } else {
2887 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
2888 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
2889 }
2890
2891 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2892 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2893 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2894 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
2895 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
2896 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2897 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2898 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
2899 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2900 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
2901 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
2902 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
2903 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
2904 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2905 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2906 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2907 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
2908 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
2909 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
2910 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
2911 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
2912 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
2913 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
2914
2915 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
2916 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
2917 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
2918 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
2919 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
2920 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
2921 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
2922 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
2923 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
2924 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
2925 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
2926 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
2927 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
2928 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
2929
2930 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
2931 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
2932 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
2933 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
2934 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
2935 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
2936
2937 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
2938
2939 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2940 p->myPipe[k].VRatioChroma,
2941 p->myPipe[k].VTapsChroma,
2942 p->myPipe[k].InterlaceEnable,
2943 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
2944 p->myPipe[k].SwathHeightC,
2945 p->myPipe[k].RotationAngle,
2946 p->myPipe[k].mirrored,
2947 p->myPipe[k].ViewportStationary,
2948 p->SwathWidthC[k],
2949 p->myPipe[k].ViewportHeightC,
2950 p->myPipe[k].ViewportXStartC,
2951 p->myPipe[k].ViewportYStartC,
2952
2953 // Output
2954 &p->VInitPreFillC[k],
2955 &p->MaxNumSwathC[k]);
2956 } else {
2957 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
2958 s->PTEBufferSizeInRequestsForChroma[k] = 0;
2959 s->PixelPTEBytesPerRowC[k] = 0;
2960 s->PixelPTEBytesPerRowStorageC[k] = 0;
2961 s->vm_bytes_c = 0;
2962 p->MaxNumSwathC[k] = 0;
2963 p->PrefetchSourceLinesC[k] = 0;
2964 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2965 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2966 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2967 }
2968
2969 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2970 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2971 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2972 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
2973 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
2974 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2975 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2976 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
2977 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2978 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
2979 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
2980 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
2981 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
2982 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2983 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2984 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2985 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
2986 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
2987 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
2988 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
2989 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
2990 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
2991 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
2992
2993 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
2994 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
2995 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
2996 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
2997 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
2998 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
2999 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
3000 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
3001 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
3002 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
3003 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
3004 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
3005 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
3006 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
3007
3008 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
3009 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
3010 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
3011 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
3012 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
3013 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
3014
3015 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
3016
3017 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
3018 p->myPipe[k].VRatio,
3019 p->myPipe[k].VTaps,
3020 p->myPipe[k].InterlaceEnable,
3021 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
3022 p->myPipe[k].SwathHeightY,
3023 p->myPipe[k].RotationAngle,
3024 p->myPipe[k].mirrored,
3025 p->myPipe[k].ViewportStationary,
3026 p->SwathWidthY[k],
3027 p->myPipe[k].ViewportHeight,
3028 p->myPipe[k].ViewportXStart,
3029 p->myPipe[k].ViewportYStart,
3030
3031 // Output
3032 &p->VInitPreFillY[k],
3033 &p->MaxNumSwathY[k]);
3034
3035 #ifdef __DML_VBA_DEBUG__
3036 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
3037 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
3038 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
3039 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
3040 #endif
3041 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
3042 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
3043 p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
3044 p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
3045
3046 #ifdef __DML_VBA_DEBUG__
3047 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
3048 DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
3049 #endif
3050 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
3051 p->PTEBufferSizeNotExceeded[k] = true;
3052 } else {
3053 p->PTEBufferSizeNotExceeded[k] = false;
3054 }
3055
3056 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
3057 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
3058 #ifdef __DML_VBA_DEBUG__
3059 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
3060 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3061 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3062 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
3063 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
3064 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
3065 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
3066 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3067
3068 DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
3069 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
3070 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
3071 DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
3072 }
3073 #endif
3074 }
3075
3076 CalculateMALLUseForStaticScreen(
3077 p->display_cfg,
3078 p->NumberOfActiveSurfaces,
3079 p->MALLAllocatedForDCN,
3080 p->SurfaceSizeInMALL,
3081 s->one_row_per_frame_fits_in_buffer,
3082 // Output
3083 p->is_using_mall_for_ss);
3084
3085 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3086 if (p->display_cfg->gpuvm_enable) {
3087 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
3088 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
3089 }
3090 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3091 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
3092 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
3093 } else {
3094 p->PTE_BUFFER_MODE[k] = 0;
3095 p->BIGK_FRAGMENT_SIZE[k] = 0;
3096 }
3097 }
3098
3099 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3100 p->DCCMetaBufferSizeNotExceeded[k] = true;
3101 #ifdef __DML_VBA_DEBUG__
3102 DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
3103 DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
3104 #endif
3105 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3106 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
3107
3108 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
3109
3110 if (p->use_one_row_for_frame[k]) {
3111 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
3112 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
3113 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3114 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
3115 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
3116 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
3117 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
3118 }
3119
3120 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
3121 p->DCCMetaBufferSizeNotExceeded[k] = true;
3122 } else {
3123 p->DCCMetaBufferSizeNotExceeded[k] = false;
3124
3125 #ifdef __DML_VBA_DEBUG__
3126 DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
3127 DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
3128 DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
3129 #endif
3130 }
3131
3132 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
3133 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
3134 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
3135 p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
3136 p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
3137
3138 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
3139 if (p->use_one_row_for_frame[k])
3140 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
3141
3142 CalculateRowBandwidth(
3143 p->display_cfg->gpuvm_enable,
3144 p->use_one_row_for_frame[k],
3145 p->myPipe[k].SourcePixelFormat,
3146 p->myPipe[k].VRatio,
3147 p->myPipe[k].VRatioChroma,
3148 p->myPipe[k].DCCEnable,
3149 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
3150 s->PixelPTEBytesPerRowY[k],
3151 s->PixelPTEBytesPerRowC[k],
3152 p->dpte_row_height_luma[k],
3153 p->dpte_row_height_chroma[k],
3154
3155 p->mrq_present,
3156 p->meta_row_bytes_per_row_ub_l[k],
3157 p->meta_row_bytes_per_row_ub_c[k],
3158 p->meta_row_height_luma[k],
3159 p->meta_row_height_chroma[k],
3160
3161 // Output
3162 &p->dpte_row_bw[k],
3163 &p->meta_row_bw[k]);
3164 #ifdef __DML_VBA_DEBUG__
3165 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
3166 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
3167 DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
3168 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
3169 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
3170 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3171 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
3172 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
3173 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3174 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
3175 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3176 DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
3177 DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
3178 DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
3179 #endif
3180 }
3181 }
3182
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int urgent_ramp_uclk_cycles,unsigned int df_qos_response_time_fclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_urgent_ramp_latency_margin,double fabric_max_transport_latency_margin)3183 static double CalculateUrgentLatency(
3184 double UrgentLatencyPixelDataOnly,
3185 double UrgentLatencyPixelMixedWithVMData,
3186 double UrgentLatencyVMDataOnly,
3187 bool DoUrgentLatencyAdjustment,
3188 double UrgentLatencyAdjustmentFabricClockComponent,
3189 double UrgentLatencyAdjustmentFabricClockReference,
3190 double FabricClock,
3191 double uclk_freq_mhz,
3192 enum dml2_qos_param_type qos_type,
3193 unsigned int urgent_ramp_uclk_cycles,
3194 unsigned int df_qos_response_time_fclk_cycles,
3195 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3196 unsigned int mall_overhead_fclk_cycles,
3197 double umc_urgent_ramp_latency_margin,
3198 double fabric_max_transport_latency_margin)
3199 {
3200 double urgent_latency = 0;
3201 if (qos_type == dml2_qos_param_type_dcn4x) {
3202 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
3203 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
3204 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
3205 } else {
3206 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
3207 if (DoUrgentLatencyAdjustment == true) {
3208 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
3209 }
3210 }
3211 #ifdef __DML_VBA_DEBUG__
3212 if (qos_type == dml2_qos_param_type_dcn4x) {
3213 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
3214 DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
3215 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3216 DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
3217 } else {
3218 DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
3219 DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
3220 DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
3221 DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
3222 DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
3223 }
3224 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3225 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
3226 #endif
3227 return urgent_latency;
3228 }
3229
CalculateTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int trip_to_memory_uclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3230 static double CalculateTripToMemory(
3231 double UrgLatency,
3232 double FabricClock,
3233 double uclk_freq_mhz,
3234 enum dml2_qos_param_type qos_type,
3235 unsigned int trip_to_memory_uclk_cycles,
3236 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3237 unsigned int mall_overhead_fclk_cycles,
3238 double umc_max_latency_margin,
3239 double fabric_max_transport_latency_margin)
3240 {
3241 double trip_to_memory_us;
3242 if (qos_type == dml2_qos_param_type_dcn4x) {
3243 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
3244 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3245 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3246 } else {
3247 trip_to_memory_us = UrgLatency;
3248 }
3249
3250 #ifdef __DML_VBA_DEBUG__
3251 if (qos_type == dml2_qos_param_type_dcn4x) {
3252 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
3253 DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
3254 DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
3255 DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
3256 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3257 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3258 DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
3259 DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
3260 } else {
3261 DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3262 }
3263 DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
3264 #endif
3265
3266
3267 return trip_to_memory_us;
3268 }
3269
CalculateMetaTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int meta_trip_to_memory_uclk_cycles,unsigned int meta_trip_to_memory_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3270 static double CalculateMetaTripToMemory(
3271 double UrgLatency,
3272 double FabricClock,
3273 double uclk_freq_mhz,
3274 enum dml2_qos_param_type qos_type,
3275 unsigned int meta_trip_to_memory_uclk_cycles,
3276 unsigned int meta_trip_to_memory_fclk_cycles,
3277 double umc_max_latency_margin,
3278 double fabric_max_transport_latency_margin)
3279 {
3280 double meta_trip_to_memory_us;
3281 if (qos_type == dml2_qos_param_type_dcn4x) {
3282 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3283 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3284 } else {
3285 meta_trip_to_memory_us = UrgLatency;
3286 }
3287
3288 #ifdef __DML_VBA_DEBUG__
3289 if (qos_type == dml2_qos_param_type_dcn4x) {
3290 DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
3291 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
3292 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
3293 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3294 } else {
3295 DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3296 }
3297 DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
3298 #endif
3299
3300
3301 return meta_trip_to_memory_us;
3302 }
3303
calculate_cursor_req_attributes(unsigned int cursor_width,unsigned int cursor_bpp,unsigned int * cursor_lines_per_chunk,unsigned int * cursor_bytes_per_line,unsigned int * cursor_bytes_per_chunk,unsigned int * cursor_bytes)3304 static void calculate_cursor_req_attributes(
3305 unsigned int cursor_width,
3306 unsigned int cursor_bpp,
3307
3308 // output
3309 unsigned int *cursor_lines_per_chunk,
3310 unsigned int *cursor_bytes_per_line,
3311 unsigned int *cursor_bytes_per_chunk,
3312 unsigned int *cursor_bytes)
3313 {
3314 unsigned int cursor_bytes_per_req = 0;
3315 unsigned int cursor_width_bytes = 0;
3316 unsigned int cursor_height = 0;
3317
3318 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
3319 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
3320 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
3321
3322 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
3323
3324 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
3325 if (cursor_width_bytes <= 64)
3326 cursor_bytes_per_req = 64;
3327 else if (cursor_width_bytes <= 128)
3328 cursor_bytes_per_req = 128;
3329 else
3330 cursor_bytes_per_req = 256;
3331
3332 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
3333 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
3334
3335 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
3336 if (cursor_bpp == 2) {
3337 *cursor_lines_per_chunk = 16;
3338 } else if (cursor_bpp == 32) {
3339 if (cursor_width <= 32)
3340 *cursor_lines_per_chunk = 16;
3341 else if (cursor_width <= 64)
3342 *cursor_lines_per_chunk = 8;
3343 else if (cursor_width <= 128)
3344 *cursor_lines_per_chunk = 4;
3345 else
3346 *cursor_lines_per_chunk = 2;
3347 } else if (cursor_bpp == 64) {
3348 if (cursor_width <= 16)
3349 *cursor_lines_per_chunk = 16;
3350 else if (cursor_width <= 32)
3351 *cursor_lines_per_chunk = 8;
3352 else if (cursor_width <= 64)
3353 *cursor_lines_per_chunk = 4;
3354 else if (cursor_width <= 128)
3355 *cursor_lines_per_chunk = 2;
3356 else
3357 *cursor_lines_per_chunk = 1;
3358 } else {
3359 if (cursor_width > 0) {
3360 DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
3361 DML_ASSERT(0);
3362 }
3363 }
3364
3365 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
3366
3367 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
3368 // Only cursor_width is provided for worst case sizing so assume that the cursor is square
3369 cursor_height = cursor_width;
3370 *cursor_bytes = *cursor_bytes_per_line * cursor_height;
3371 #ifdef __DML_VBA_DEBUG__
3372 DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
3373 DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width);
3374 DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
3375 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
3376 DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
3377 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
3378 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
3379 DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
3380 DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1));
3381 #endif
3382 }
3383
calculate_cursor_urgent_burst_factor(unsigned int CursorBufferSize,unsigned int CursorWidth,unsigned int cursor_bytes_per_chunk,unsigned int cursor_lines_per_chunk,double LineTime,double UrgentLatency,double * UrgentBurstFactorCursor,bool * NotEnoughUrgentLatencyHiding)3384 static void calculate_cursor_urgent_burst_factor(
3385 unsigned int CursorBufferSize,
3386 unsigned int CursorWidth,
3387 unsigned int cursor_bytes_per_chunk,
3388 unsigned int cursor_lines_per_chunk,
3389 double LineTime,
3390 double UrgentLatency,
3391
3392 double *UrgentBurstFactorCursor,
3393 bool *NotEnoughUrgentLatencyHiding)
3394 {
3395 unsigned int LinesInCursorBuffer = 0;
3396 double CursorBufferSizeInTime = 0;
3397
3398 if (CursorWidth > 0) {
3399 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
3400
3401 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
3402 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3403 *NotEnoughUrgentLatencyHiding = 1;
3404 *UrgentBurstFactorCursor = 1;
3405 } else {
3406 *NotEnoughUrgentLatencyHiding = 0;
3407 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3408 }
3409
3410 #ifdef __DML_VBA_DEBUG__
3411 DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
3412 DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
3413 DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
3414 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
3415 DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
3416 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
3417 DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3418 #endif
3419
3420 }
3421 }
3422
CalculateUrgentBurstFactor(const struct dml2_plane_parameters * plane_cfg,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)3423 static void CalculateUrgentBurstFactor(
3424 const struct dml2_plane_parameters *plane_cfg,
3425 unsigned int swath_width_luma_ub,
3426 unsigned int swath_width_chroma_ub,
3427 unsigned int SwathHeightY,
3428 unsigned int SwathHeightC,
3429 double LineTime,
3430 double UrgentLatency,
3431 double VRatio,
3432 double VRatioC,
3433 double BytePerPixelInDETY,
3434 double BytePerPixelInDETC,
3435 unsigned int DETBufferSizeY,
3436 unsigned int DETBufferSizeC,
3437 // Output
3438 double *UrgentBurstFactorLuma,
3439 double *UrgentBurstFactorChroma,
3440 bool *NotEnoughUrgentLatencyHiding)
3441 {
3442 double LinesInDETLuma;
3443 double LinesInDETChroma;
3444 double DETBufferSizeInTimeLuma;
3445 double DETBufferSizeInTimeChroma;
3446
3447 *NotEnoughUrgentLatencyHiding = 0;
3448 *UrgentBurstFactorLuma = 0;
3449 *UrgentBurstFactorChroma = 0;
3450
3451 #ifdef __DML_VBA_DEBUG__
3452 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
3453 DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC);
3454 DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
3455 DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
3456 DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
3457 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3458 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
3459 #endif
3460 DML_ASSERT(VRatio > 0);
3461
3462 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3463
3464 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3465 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3466 *NotEnoughUrgentLatencyHiding = 1;
3467 *UrgentBurstFactorLuma = 1;
3468 } else {
3469 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3470 }
3471
3472 if (BytePerPixelInDETC > 0) {
3473 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3474
3475 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3476 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3477 *NotEnoughUrgentLatencyHiding = 1;
3478 *UrgentBurstFactorChroma = 1;
3479 } else {
3480 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3481 }
3482 }
3483
3484 #ifdef __DML_VBA_DEBUG__
3485 DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
3486 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
3487 DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
3488 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
3489 DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
3490 DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3491 #endif
3492 }
3493
CalculateDCFCLKDeepSleepTdlut(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double dispclk,unsigned int tdlut_bytes_to_deliver[],double prefetch_swath_time_us[],double * DCFClkDeepSleep)3494 static void CalculateDCFCLKDeepSleepTdlut(
3495 const struct dml2_display_cfg *display_cfg,
3496 unsigned int NumberOfActiveSurfaces,
3497 unsigned int BytePerPixelY[],
3498 unsigned int BytePerPixelC[],
3499 unsigned int SwathWidthY[],
3500 unsigned int SwathWidthC[],
3501 unsigned int DPPPerSurface[],
3502 double PSCL_THROUGHPUT[],
3503 double PSCL_THROUGHPUT_CHROMA[],
3504 double Dppclk[],
3505 double ReadBandwidthLuma[],
3506 double ReadBandwidthChroma[],
3507 unsigned int ReturnBusWidth,
3508
3509 double dispclk,
3510 unsigned int tdlut_bytes_to_deliver[],
3511 double prefetch_swath_time_us[],
3512
3513 // Output
3514 double *DCFClkDeepSleep)
3515 {
3516 double DisplayPipeLineDeliveryTimeLuma;
3517 double DisplayPipeLineDeliveryTimeChroma;
3518 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
3519 double ReadBandwidth = 0.0;
3520
3521 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3522 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
3523
3524 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
3525 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
3526 } else {
3527 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3528 }
3529 if (BytePerPixelC[k] == 0) {
3530 DisplayPipeLineDeliveryTimeChroma = 0;
3531 } else {
3532 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
3533 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
3534 } else {
3535 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3536 }
3537 }
3538
3539 if (BytePerPixelC[k] > 0) {
3540 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3541 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3542 } else {
3543 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3544 }
3545 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
3546
3547 // adjust for 3dlut delivery time
3548 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
3549 double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
3550
3551 DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3552 DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
3553 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
3554 DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
3555
3556 // increase the deepsleep dcfclk to match the original dispclk throughput rate
3557 if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
3558 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
3559 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
3560 }
3561 }
3562
3563 #ifdef __DML_VBA_DEBUG__
3564 DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
3565 DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3566 #endif
3567 }
3568
3569 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3570 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3571 }
3572
3573 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
3574
3575 #ifdef __DML_VBA_DEBUG__
3576 DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
3577 DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3578 DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3579 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3580 #endif
3581
3582 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3583 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3584 }
3585
3586 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3587 }
3588
CalculateDCFCLKDeepSleep(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)3589 static noinline_for_stack void CalculateDCFCLKDeepSleep(
3590 const struct dml2_display_cfg *display_cfg,
3591 unsigned int NumberOfActiveSurfaces,
3592 unsigned int BytePerPixelY[],
3593 unsigned int BytePerPixelC[],
3594 unsigned int SwathWidthY[],
3595 unsigned int SwathWidthC[],
3596 unsigned int DPPPerSurface[],
3597 double PSCL_THROUGHPUT[],
3598 double PSCL_THROUGHPUT_CHROMA[],
3599 double Dppclk[],
3600 double ReadBandwidthLuma[],
3601 double ReadBandwidthChroma[],
3602 unsigned int ReturnBusWidth,
3603
3604 // Output
3605 double *DCFClkDeepSleep)
3606 {
3607 double zero_double[DML2_MAX_PLANES];
3608 unsigned int zero_integer[DML2_MAX_PLANES];
3609
3610 memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
3611 memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
3612
3613 CalculateDCFCLKDeepSleepTdlut(
3614 display_cfg,
3615 NumberOfActiveSurfaces,
3616 BytePerPixelY,
3617 BytePerPixelC,
3618 SwathWidthY,
3619 SwathWidthC,
3620 DPPPerSurface,
3621 PSCL_THROUGHPUT,
3622 PSCL_THROUGHPUT_CHROMA,
3623 Dppclk,
3624 ReadBandwidthLuma,
3625 ReadBandwidthChroma,
3626 ReturnBusWidth,
3627 0,
3628 zero_integer, //tdlut_bytes_to_deliver,
3629 zero_double, //prefetch_swath_time_us,
3630
3631 // Output
3632 DCFClkDeepSleep);
3633 }
3634
CalculateWriteBackDelay(enum dml2_source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)3635 static double CalculateWriteBackDelay(
3636 enum dml2_source_format_class WritebackPixelFormat,
3637 double WritebackHRatio,
3638 double WritebackVRatio,
3639 unsigned int WritebackVTaps,
3640 unsigned int WritebackDestinationWidth,
3641 unsigned int WritebackDestinationHeight,
3642 unsigned int WritebackSourceHeight,
3643 unsigned int HTotal)
3644 {
3645 double CalculateWriteBackDelay;
3646 double Line_length;
3647 double Output_lines_last_notclamped;
3648 double WritebackVInit;
3649
3650 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3651 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
3652 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
3653 if (Output_lines_last_notclamped < 0) {
3654 CalculateWriteBackDelay = 0;
3655 } else {
3656 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3657 }
3658 return CalculateWriteBackDelay;
3659 }
3660
CalculateMaxVStartup(bool ptoi_supported,unsigned int vblank_nom_default_us,const struct dml2_timing_cfg * timing,double write_back_delay_us)3661 static unsigned int CalculateMaxVStartup(
3662 bool ptoi_supported,
3663 unsigned int vblank_nom_default_us,
3664 const struct dml2_timing_cfg *timing,
3665 double write_back_delay_us)
3666 {
3667 unsigned int vblank_size = 0;
3668 unsigned int max_vstartup_lines = 0;
3669
3670 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
3671 unsigned int vblank_actual = timing->v_total - timing->v_active;
3672 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
3673 unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom;
3674
3675 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
3676
3677 if (timing->interlaced && !ptoi_supported)
3678 max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0));
3679 else
3680 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
3681 #ifdef __DML_VBA_DEBUG__
3682 DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom);
3683 DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
3684 DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us);
3685 DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
3686 DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
3687 DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
3688 #endif
3689 max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
3690 return max_vstartup_lines;
3691 }
3692
CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params * p)3693 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
3694 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
3695 {
3696 unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
3697 unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
3698 unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
3699 unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
3700 unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
3701 unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
3702
3703 unsigned int TotalActiveDPP = 0;
3704 bool NoChromaOrLinear = true;
3705 unsigned int SurfaceDoingUnboundedRequest = 0;
3706 unsigned int DETBufferSizeInKByteForSwathCalculation;
3707
3708 const long TTUFIFODEPTH = 8;
3709 const long MAXIMUMCOMPRESSION = 4;
3710
3711 #ifdef __DML_VBA_DEBUG__
3712 DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
3713 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3714 DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
3715 }
3716 #endif
3717 CalculateSwathWidth(
3718 p->display_cfg,
3719 p->ForceSingleDPP,
3720 p->NumberOfActiveSurfaces,
3721 p->ODMMode,
3722 p->BytePerPixY,
3723 p->BytePerPixC,
3724 p->Read256BytesBlockHeightY,
3725 p->Read256BytesBlockHeightC,
3726 p->Read256BytesBlockWidthY,
3727 p->Read256BytesBlockWidthC,
3728 p->surf_linear128_l,
3729 p->surf_linear128_c,
3730 p->DPPPerSurface,
3731
3732 // Output
3733 p->req_per_swath_ub_l,
3734 p->req_per_swath_ub_c,
3735 SwathWidthSingleDPP,
3736 SwathWidthSingleDPPChroma,
3737 p->SwathWidth,
3738 p->SwathWidthChroma,
3739 MaximumSwathHeightY,
3740 MaximumSwathHeightC,
3741 p->swath_width_luma_ub,
3742 p->swath_width_chroma_ub);
3743
3744 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3745 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
3746 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
3747 #ifdef __DML_VBA_DEBUG__
3748 DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
3749 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
3750 DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
3751 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
3752 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3753 DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
3754 DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
3755 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
3756 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3757 #endif
3758 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
3759 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
3760 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
3761 }
3762 }
3763
3764 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3765 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
3766 if (p->DPPPerSurface[k] > 0)
3767 SurfaceDoingUnboundedRequest = k;
3768 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
3769 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3770 NoChromaOrLinear = false;
3771 }
3772 }
3773
3774 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
3775
3776 CalculateDETBufferSize(
3777 &scratch->CalculateDETBufferSize_locals,
3778 p->display_cfg,
3779 p->ForceSingleDPP,
3780 p->NumberOfActiveSurfaces,
3781 *p->UnboundedRequestEnabled,
3782 p->nomDETInKByte,
3783 p->MaxTotalDETInKByte,
3784 p->ConfigReturnBufferSizeInKByte,
3785 p->MinCompressedBufferSizeInKByte,
3786 p->ConfigReturnBufferSegmentSizeInkByte,
3787 p->CompressedBufferSegmentSizeInkByte,
3788 p->ReadBandwidthLuma,
3789 p->ReadBandwidthChroma,
3790 p->full_swath_bytes_l,
3791 p->full_swath_bytes_c,
3792 p->DPPPerSurface,
3793
3794 // Output
3795 p->DETBufferSizeInKByte, // per hubp pipe
3796 p->CompressedBufferSizeInkByte);
3797
3798 #ifdef __DML_VBA_DEBUG__
3799 DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
3800 DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
3801 DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
3802 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
3803 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
3804 #endif
3805
3806 *p->ViewportSizeSupport = true;
3807 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3808
3809 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
3810 #ifdef __DML_VBA_DEBUG__
3811 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3812 #endif
3813 if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3814 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3815 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3816 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3817 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3818
3819 if (p->surf_linear128_l[k])
3820 p->request_size_bytes_luma[k] = 128;
3821 else
3822 p->request_size_bytes_luma[k] = 256;
3823
3824 if (p->surf_linear128_c[k])
3825 p->request_size_bytes_chroma[k] = 128;
3826 else
3827 p->request_size_bytes_chroma[k] = 256;
3828
3829 } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3830 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3831 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3832 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3833 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3834 p->request_size_bytes_luma[k] = 256;
3835 p->request_size_bytes_chroma[k] = 256;
3836
3837 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3838 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3839 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3840 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3841 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3842 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3843 p->request_size_bytes_chroma[k] = 256;
3844
3845 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3846 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3847 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3848 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3849 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3850 p->request_size_bytes_luma[k] = 256;
3851 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3852
3853 } else {
3854 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3855 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3856 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3857 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3858 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3859 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3860 }
3861
3862 if (p->SwathHeightC[k] == 0)
3863 p->request_size_bytes_chroma[k] = 0;
3864
3865 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
3866 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
3867 *p->ViewportSizeSupport = false;
3868 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
3869 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
3870 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3871 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
3872 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
3873 DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
3874 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
3875 p->ViewportSizeSupportPerSurface[k] = false;
3876 } else {
3877 p->ViewportSizeSupportPerSurface[k] = true;
3878 }
3879
3880 if (p->SwathHeightC[k] == 0) {
3881 #ifdef __DML_VBA_DEBUG__
3882 DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
3883 #endif
3884 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
3885 p->DETBufferSizeC[k] = 0;
3886 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
3887 #ifdef __DML_VBA_DEBUG__
3888 DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
3889 #endif
3890 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3891 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3892 } else {
3893 #ifdef __DML_VBA_DEBUG__
3894 DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
3895 #endif
3896 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
3897 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
3898 }
3899
3900 #ifdef __DML_VBA_DEBUG__
3901 DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3902 DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
3903 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3904 DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3905 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
3906 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3907 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3908 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3909 DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
3910 DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
3911 #endif
3912
3913 }
3914
3915 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
3916 if (*p->UnboundedRequestEnabled) {
3917 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
3918 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
3919 #ifdef __DML_VBA_DEBUG__
3920 DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
3921 DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
3922 #endif
3923 }
3924 #ifdef __DML_VBA_DEBUG__
3925 DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
3926 #endif
3927
3928 *p->hw_debug5 = false;
3929 #ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
3930 if (p->NumberOfActiveSurfaces > 1)
3931 *p->hw_debug5 = true;
3932 #else
3933 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3934 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
3935 && p->display_cfg->plane_descriptors[k].surface.dcc.enable
3936 && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
3937 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
3938 *p->hw_debug5 = true;
3939 #ifdef __DML_VBA_DEBUG__
3940 DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
3941 DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
3942 DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
3943 DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
3944 DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3945 DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
3946 #endif
3947 }
3948 #endif
3949 }
3950
DecideODMMode(unsigned int HActive,double MaxDispclk,unsigned int MaximumPixelsPerLinePerDSCUnit,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne)3951 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
3952 double MaxDispclk,
3953 unsigned int MaximumPixelsPerLinePerDSCUnit,
3954 enum dml2_output_format_class OutFormat,
3955 bool UseDSC,
3956 unsigned int NumberOfDSCSlices,
3957 double SurfaceRequiredDISPCLKWithoutODMCombine,
3958 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
3959 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
3960 double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
3961 {
3962 enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
3963 enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
3964 enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
3965 enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
3966
3967 MinimumRequiredODMModeForMaxDispClock =
3968 (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
3969 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
3970 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3971 if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
3972 ODMMode = MinimumRequiredODMModeForMaxDispClock;
3973
3974 if (UseDSC) {
3975 MinimumRequiredODMModeForMaxDSCHActive =
3976 (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
3977 (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
3978 (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3979 if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
3980 ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
3981 }
3982
3983 if (OutFormat == dml2_420) {
3984 MinimumRequiredODMModeForMax420HActive =
3985 (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
3986 (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
3987 (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3988 if (ODMMode < MinimumRequiredODMModeForMax420HActive)
3989 ODMMode = MinimumRequiredODMModeForMax420HActive;
3990 }
3991
3992 if (UseDSC) {
3993 if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
3994 ODMMode = dml2_odm_mode_combine_2to1;
3995 if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
3996 ODMMode = dml2_odm_mode_combine_3to1;
3997 if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
3998 ODMMode = dml2_odm_mode_combine_4to1;
3999 }
4000
4001 return ODMMode;
4002 }
4003
CalculateODMConstraints(enum dml2_odm_mode ODMUse,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne,unsigned int MaximumPixelsPerLinePerDSCUnit,double * DISPCLKRequired,unsigned int * NumberOfDPPRequired,unsigned int * MaxHActiveForDSC,unsigned int * MaxDSCSlices,unsigned int * MaxHActiveFor420)4004 static void CalculateODMConstraints(
4005 enum dml2_odm_mode ODMUse,
4006 double SurfaceRequiredDISPCLKWithoutODMCombine,
4007 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4008 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4009 double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4010 unsigned int MaximumPixelsPerLinePerDSCUnit,
4011 /* Output */
4012 double *DISPCLKRequired,
4013 unsigned int *NumberOfDPPRequired,
4014 unsigned int *MaxHActiveForDSC,
4015 unsigned int *MaxDSCSlices,
4016 unsigned int *MaxHActiveFor420)
4017 {
4018 switch (ODMUse) {
4019 case dml2_odm_mode_combine_2to1:
4020 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4021 *NumberOfDPPRequired = 2;
4022 break;
4023 case dml2_odm_mode_combine_3to1:
4024 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4025 *NumberOfDPPRequired = 3;
4026 break;
4027 case dml2_odm_mode_combine_4to1:
4028 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4029 *NumberOfDPPRequired = 4;
4030 break;
4031 case dml2_odm_mode_auto:
4032 case dml2_odm_mode_split_1to2:
4033 case dml2_odm_mode_mso_1to2:
4034 case dml2_odm_mode_mso_1to4:
4035 case dml2_odm_mode_bypass:
4036 default:
4037 *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
4038 *NumberOfDPPRequired = 1;
4039 break;
4040 }
4041 *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
4042 *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
4043 *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
4044 }
4045
ValidateODMMode(enum dml2_odm_mode ODMMode,double MaxDispclk,unsigned int HActive,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,unsigned int TotalNumberOfActiveDPP,unsigned int TotalNumberOfActiveOPP,unsigned int MaxNumDPP,unsigned int MaxNumOPP,double DISPCLKRequired,unsigned int NumberOfDPPRequired,unsigned int MaxHActiveForDSC,unsigned int MaxDSCSlices,unsigned int MaxHActiveFor420)4046 static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
4047 double MaxDispclk,
4048 unsigned int HActive,
4049 enum dml2_output_format_class OutFormat,
4050 bool UseDSC,
4051 unsigned int NumberOfDSCSlices,
4052 unsigned int TotalNumberOfActiveDPP,
4053 unsigned int TotalNumberOfActiveOPP,
4054 unsigned int MaxNumDPP,
4055 unsigned int MaxNumOPP,
4056 double DISPCLKRequired,
4057 unsigned int NumberOfDPPRequired,
4058 unsigned int MaxHActiveForDSC,
4059 unsigned int MaxDSCSlices,
4060 unsigned int MaxHActiveFor420)
4061 {
4062 bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
4063 bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
4064 unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
4065 unsigned int h_timing_div_mode =
4066 (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
4067 (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
4068
4069 if (DISPCLKRequired > MaxDispclk)
4070 return false;
4071 if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP || (TotalNumberOfActiveOPP + NumberOfDPPRequired) > MaxNumOPP)
4072 return false;
4073 if (are_odm_segments_symmetrical) {
4074 if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
4075 return false;
4076 }
4077 if (HActive % h_timing_div_mode)
4078 /*
4079 * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
4080 * OTG_H_SYNC_A_START/END all need to be visible by h timing div
4081 * mode. This logic only checks H active.
4082 */
4083 return false;
4084
4085 if (UseDSC) {
4086 if (HActive > MaxHActiveForDSC)
4087 return false;
4088 if (NumberOfDSCSlices > MaxDSCSlices)
4089 return false;
4090 if (HActive % NumberOfDSCSlices)
4091 return false;
4092 if (NumberOfDSCSlices % NumberOfDPPRequired)
4093 return false;
4094 if (is_max_dsc_slice_required) {
4095 if (NumberOfDSCSlices != MaxDSCSlices)
4096 return false;
4097 }
4098 }
4099
4100 if (OutFormat == dml2_420) {
4101 if (HActive > MaxHActiveFor420)
4102 return false;
4103 }
4104
4105 return true;
4106 }
4107
CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum dml2_output_format_class OutFormat,enum dml2_output_encoder_class Output,enum dml2_odm_mode ODMUse,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int TotalNumberOfActiveOPP,unsigned int MaxNumDPP,unsigned int MaxNumOPP,double PixelClock,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum dml2_odm_mode * ODMMode,double * RequiredDISPCLKPerSurface)4108 static noinline_for_stack void CalculateODMMode(
4109 unsigned int MaximumPixelsPerLinePerDSCUnit,
4110 unsigned int HActive,
4111 enum dml2_output_format_class OutFormat,
4112 enum dml2_output_encoder_class Output,
4113 enum dml2_odm_mode ODMUse,
4114 double MaxDispclk,
4115 bool DSCEnable,
4116 unsigned int TotalNumberOfActiveDPP,
4117 unsigned int TotalNumberOfActiveOPP,
4118 unsigned int MaxNumDPP,
4119 unsigned int MaxNumOPP,
4120 double PixelClock,
4121 unsigned int NumberOfDSCSlices,
4122
4123 // Output
4124 bool *TotalAvailablePipesSupport,
4125 unsigned int *NumberOfDPP,
4126 enum dml2_odm_mode *ODMMode,
4127 double *RequiredDISPCLKPerSurface)
4128 {
4129 double SurfaceRequiredDISPCLKWithoutODMCombine;
4130 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4131 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4132 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4133 double DISPCLKRequired;
4134 unsigned int NumberOfDPPRequired;
4135 unsigned int MaxHActiveForDSC;
4136 unsigned int MaxDSCSlices;
4137 unsigned int MaxHActiveFor420;
4138 bool success;
4139 bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
4140 enum dml2_odm_mode DecidedODMMode;
4141 bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi);
4142
4143 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420);
4144 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420);
4145 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420);
4146 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420);
4147 #ifdef __DML_VBA_DEBUG__
4148 DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse);
4149 DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output);
4150 DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
4151 DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
4152 DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
4153 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
4154 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
4155 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
4156 DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4157 #endif
4158 if (ODMUse == dml2_odm_mode_auto)
4159 DecidedODMMode = DecideODMMode(HActive,
4160 MaxDispclk,
4161 MaximumPixelsPerLinePerDSCUnit,
4162 OutFormat,
4163 UseDSC,
4164 NumberOfDSCSlices,
4165 SurfaceRequiredDISPCLKWithoutODMCombine,
4166 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4167 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4168 SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4169 else
4170 DecidedODMMode = ODMUse;
4171 CalculateODMConstraints(DecidedODMMode,
4172 SurfaceRequiredDISPCLKWithoutODMCombine,
4173 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4174 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4175 SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4176 MaximumPixelsPerLinePerDSCUnit,
4177 &DISPCLKRequired,
4178 &NumberOfDPPRequired,
4179 &MaxHActiveForDSC,
4180 &MaxDSCSlices,
4181 &MaxHActiveFor420);
4182 success = ValidateODMMode(DecidedODMMode,
4183 MaxDispclk,
4184 HActive,
4185 OutFormat,
4186 UseDSC,
4187 NumberOfDSCSlices,
4188 TotalNumberOfActiveDPP,
4189 TotalNumberOfActiveOPP,
4190 MaxNumDPP,
4191 MaxNumOPP,
4192 DISPCLKRequired,
4193 NumberOfDPPRequired,
4194 MaxHActiveForDSC,
4195 MaxDSCSlices,
4196 MaxHActiveFor420);
4197
4198 *ODMMode = DecidedODMMode;
4199 *TotalAvailablePipesSupport = success;
4200 *NumberOfDPP = NumberOfDPPRequired;
4201 *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
4202 #ifdef __DML_VBA_DEBUG__
4203 DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
4204 DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
4205 DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
4206 DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
4207 #endif
4208 }
4209
CalculateOutputLink(struct dml2_core_internal_scratch * s,double PHYCLK,double PHYCLKD18,double PHYCLKD32,double Downspreading,enum dml2_output_encoder_class Output,enum dml2_output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,enum dml2_dsc_enable_option DSCEnable,unsigned int OutputLinkDPLanes,enum dml2_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,bool * RequiresFEC,double * OutBpp,enum dml2_core_internal_output_type * OutputType,enum dml2_core_internal_output_type_rate * OutputRate,unsigned int * RequiredSlots)4210 static noinline_for_stack void CalculateOutputLink(
4211 struct dml2_core_internal_scratch *s,
4212 double PHYCLK,
4213 double PHYCLKD18,
4214 double PHYCLKD32,
4215 double Downspreading,
4216 enum dml2_output_encoder_class Output,
4217 enum dml2_output_format_class OutputFormat,
4218 unsigned int HTotal,
4219 unsigned int HActive,
4220 double PixelClockBackEnd,
4221 double ForcedOutputLinkBPP,
4222 unsigned int DSCInputBitPerComponent,
4223 unsigned int NumberOfDSCSlices,
4224 double AudioSampleRate,
4225 unsigned int AudioSampleLayout,
4226 enum dml2_odm_mode ODMModeNoDSC,
4227 enum dml2_odm_mode ODMModeDSC,
4228 enum dml2_dsc_enable_option DSCEnable,
4229 unsigned int OutputLinkDPLanes,
4230 enum dml2_output_link_dp_rate OutputLinkDPRate,
4231
4232 // Output
4233 bool *RequiresDSC,
4234 bool *RequiresFEC,
4235 double *OutBpp,
4236 enum dml2_core_internal_output_type *OutputType,
4237 enum dml2_core_internal_output_type_rate *OutputRate,
4238 unsigned int *RequiredSlots)
4239 {
4240 bool LinkDSCEnable;
4241 unsigned int dummy;
4242 *RequiresDSC = false;
4243 *RequiresFEC = false;
4244 *OutBpp = 0;
4245
4246 *OutputType = dml2_core_internal_output_type_unknown;
4247 *OutputRate = dml2_core_internal_output_rate_unknown;
4248
4249 #ifdef __DML_VBA_DEBUG__
4250 DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
4251 DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
4252 DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4253 DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
4254 DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
4255 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
4256 DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
4257 DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
4258 DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
4259 DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output);
4260 DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
4261 #endif
4262 {
4263 if (Output == dml2_hdmi) {
4264 *RequiresDSC = false;
4265 *RequiresFEC = false;
4266 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
4267 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4268 //OutputTypeAndRate = "HDMI";
4269 *OutputType = dml2_core_internal_output_type_hdmi;
4270 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
4271 if (DSCEnable == dml2_dsc_enable) {
4272 *RequiresDSC = true;
4273 LinkDSCEnable = true;
4274 if (Output == dml2_dp || Output == dml2_dp2p0) {
4275 *RequiresFEC = true;
4276 } else {
4277 *RequiresFEC = false;
4278 }
4279 } else {
4280 *RequiresDSC = false;
4281 LinkDSCEnable = false;
4282 if (Output == dml2_dp2p0) {
4283 *RequiresFEC = true;
4284 } else {
4285 *RequiresFEC = false;
4286 }
4287 }
4288 if (Output == dml2_dp2p0) {
4289 *OutBpp = 0;
4290 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
4291 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4292 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4293 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4294 *RequiresDSC = true;
4295 LinkDSCEnable = true;
4296 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4297 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4298 }
4299 //OutputTypeAndRate = Output & " UHBR10";
4300 *OutputType = dml2_core_internal_output_type_dp2p0;
4301 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
4302 }
4303 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
4304 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4305 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4306
4307 if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4308 *RequiresDSC = true;
4309 LinkDSCEnable = true;
4310 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4311 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4312 }
4313 //OutputTypeAndRate = Output & " UHBR13p5";
4314 *OutputType = dml2_core_internal_output_type_dp2p0;
4315 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
4316 }
4317 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
4318 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4319 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4320 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4321 *RequiresDSC = true;
4322 LinkDSCEnable = true;
4323 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4324 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4325 }
4326 //OutputTypeAndRate = Output & " UHBR20";
4327 *OutputType = dml2_core_internal_output_type_dp2p0;
4328 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
4329 }
4330 } else { // output is dp or edp
4331 *OutBpp = 0;
4332 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
4333 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4334 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4335 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4336 *RequiresDSC = true;
4337 LinkDSCEnable = true;
4338 if (Output == dml2_dp) {
4339 *RequiresFEC = true;
4340 }
4341 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4342 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4343 }
4344 //OutputTypeAndRate = Output & " HBR";
4345 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4346 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
4347 }
4348 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
4349 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4350 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4351
4352 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4353 *RequiresDSC = true;
4354 LinkDSCEnable = true;
4355 if (Output == dml2_dp) {
4356 *RequiresFEC = true;
4357 }
4358 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4359 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4360 }
4361 //OutputTypeAndRate = Output & " HBR2";
4362 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4363 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
4364 }
4365 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
4366 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4367 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4368
4369 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4370 *RequiresDSC = true;
4371 LinkDSCEnable = true;
4372 if (Output == dml2_dp) {
4373 *RequiresFEC = true;
4374 }
4375 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4376 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4377 }
4378 //OutputTypeAndRate = Output & " HBR3";
4379 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4380 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
4381 }
4382 }
4383 } else if (Output == dml2_hdmifrl) {
4384 if (DSCEnable == dml2_dsc_enable) {
4385 *RequiresDSC = true;
4386 LinkDSCEnable = true;
4387 *RequiresFEC = true;
4388 } else {
4389 *RequiresDSC = false;
4390 LinkDSCEnable = false;
4391 *RequiresFEC = false;
4392 }
4393 *OutBpp = 0;
4394 if (PHYCLKD18 >= 3000.0 / 18) {
4395 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4396 //OutputTypeAndRate = Output & "3x3";
4397 *OutputType = dml2_core_internal_output_type_hdmifrl;
4398 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
4399 }
4400 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4401 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4402 //OutputTypeAndRate = Output & "6x3";
4403 *OutputType = dml2_core_internal_output_type_hdmifrl;
4404 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
4405 }
4406 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4407 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4408 //OutputTypeAndRate = Output & "6x4";
4409 *OutputType = dml2_core_internal_output_type_hdmifrl;
4410 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
4411 }
4412 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
4413 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4414 //OutputTypeAndRate = Output & "8x4";
4415 *OutputType = dml2_core_internal_output_type_hdmifrl;
4416 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
4417 }
4418 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
4419 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4420 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
4421 *RequiresDSC = true;
4422 LinkDSCEnable = true;
4423 *RequiresFEC = true;
4424 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4425 }
4426 //OutputTypeAndRate = Output & "10x4";
4427 *OutputType = dml2_core_internal_output_type_hdmifrl;
4428 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
4429 }
4430 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
4431 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4432 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4433 *RequiresDSC = true;
4434 LinkDSCEnable = true;
4435 *RequiresFEC = true;
4436 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4437 }
4438 //OutputTypeAndRate = Output & "12x4";
4439 *OutputType = dml2_core_internal_output_type_hdmifrl;
4440 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
4441 }
4442 }
4443 }
4444 #ifdef __DML_VBA_DEBUG__
4445 DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
4446 DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
4447 DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
4448 #endif
4449 }
4450
CalculateWriteBackDISPCLK(enum dml2_source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)4451 static double CalculateWriteBackDISPCLK(
4452 enum dml2_source_format_class WritebackPixelFormat,
4453 double PixelClock,
4454 double WritebackHRatio,
4455 double WritebackVRatio,
4456 unsigned int WritebackHTaps,
4457 unsigned int WritebackVTaps,
4458 unsigned int WritebackSourceWidth,
4459 unsigned int WritebackDestinationWidth,
4460 unsigned int HTotal,
4461 unsigned int WritebackLineBufferSize)
4462 {
4463 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4464
4465 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
4466 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
4467 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
4468 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
4469 }
4470
RequiredDTBCLK(bool DSCEnable,double PixelClock,enum dml2_output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)4471 static double RequiredDTBCLK(
4472 bool DSCEnable,
4473 double PixelClock,
4474 enum dml2_output_format_class OutputFormat,
4475 double OutputBpp,
4476 unsigned int DSCSlices,
4477 unsigned int HTotal,
4478 unsigned int HActive,
4479 unsigned int AudioRate,
4480 unsigned int AudioLayout)
4481 {
4482 if (DSCEnable != true) {
4483 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4484 } else {
4485 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
4486 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4487 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4488 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4489 double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4490 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4491 }
4492 }
4493
DSCDelayRequirement(bool DSCEnabled,enum dml2_odm_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum dml2_output_format_class OutputFormat,enum dml2_output_encoder_class Output,double PixelClock,double PixelClockBackEnd)4494 static unsigned int DSCDelayRequirement(
4495 bool DSCEnabled,
4496 enum dml2_odm_mode ODMMode,
4497 unsigned int DSCInputBitPerComponent,
4498 double OutputBpp,
4499 unsigned int HActive,
4500 unsigned int HTotal,
4501 unsigned int NumberOfDSCSlices,
4502 enum dml2_output_format_class OutputFormat,
4503 enum dml2_output_encoder_class Output,
4504 double PixelClock,
4505 double PixelClockBackEnd)
4506 {
4507 unsigned int DSCDelayRequirement_val = 0;
4508 unsigned int NumberOfDSCSlicesFactor = 1;
4509
4510 if (DSCEnabled == true && OutputBpp != 0) {
4511
4512 if (ODMMode == dml2_odm_mode_combine_4to1)
4513 NumberOfDSCSlicesFactor = 4;
4514 else if (ODMMode == dml2_odm_mode_combine_3to1)
4515 NumberOfDSCSlicesFactor = 3;
4516 else if (ODMMode == dml2_odm_mode_combine_2to1)
4517 NumberOfDSCSlicesFactor = 2;
4518
4519 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
4520 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
4521
4522 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
4523 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
4524
4525 } else {
4526 DSCDelayRequirement_val = 0;
4527 }
4528 #ifdef __DML_VBA_DEBUG__
4529 DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
4530 DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode);
4531 DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
4532 DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
4533 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
4534 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4535 DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4536 DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
4537 DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
4538 DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
4539 DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
4540 #endif
4541
4542 return DSCDelayRequirement_val;
4543 }
4544
CalculateSurfaceSizeInMall(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int BytesPerPixelY[],unsigned int BytesPerPixelC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)4545 static void CalculateSurfaceSizeInMall(
4546 const struct dml2_display_cfg *display_cfg,
4547 unsigned int NumberOfActiveSurfaces,
4548 unsigned int MALLAllocatedForDCN,
4549 unsigned int BytesPerPixelY[],
4550 unsigned int BytesPerPixelC[],
4551 unsigned int Read256BytesBlockWidthY[],
4552 unsigned int Read256BytesBlockWidthC[],
4553 unsigned int Read256BytesBlockHeightY[],
4554 unsigned int Read256BytesBlockHeightC[],
4555 unsigned int ReadBlockWidthY[],
4556 unsigned int ReadBlockWidthC[],
4557 unsigned int ReadBlockHeightY[],
4558 unsigned int ReadBlockHeightC[],
4559
4560 // Output
4561 unsigned int SurfaceSizeInMALL[],
4562 bool *ExceededMALLSize)
4563 {
4564 unsigned int TotalSurfaceSizeInMALLForSS = 0;
4565 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
4566 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
4567
4568 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4569 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
4570 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
4571
4572 if (composition->viewport.stationary) {
4573 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
4574 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
4575 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
4576 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
4577 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
4578 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
4579
4580 if (ReadBlockWidthC[k] > 0) {
4581 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4582 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
4583 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
4584 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
4585 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
4586 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
4587 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4588 }
4589 } else {
4590 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
4591 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4592 if (ReadBlockWidthC[k] > 0) {
4593 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4594 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4595 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4596 }
4597 }
4598 }
4599
4600 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4601 /* SS and Subvp counted separate as they are never used at the same time */
4602 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
4603 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
4604 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
4605 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
4606 }
4607
4608 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
4609 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
4610
4611 #ifdef __DML_VBA_DEBUG__
4612 DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
4613 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
4614 DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
4615 DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
4616 #endif
4617 }
4618
calculate_tdlut_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_tdlut_setting_params * p)4619 static void calculate_tdlut_setting(
4620 struct dml2_core_internal_scratch *scratch,
4621 struct dml2_core_calcs_calculate_tdlut_setting_params *p)
4622 {
4623 // locals
4624 unsigned int tdlut_bpe = 8;
4625 unsigned int tdlut_width;
4626 unsigned int tdlut_pitch_bytes;
4627 unsigned int tdlut_footprint_bytes;
4628 unsigned int vmpg_bytes;
4629 unsigned int tdlut_vmpg_per_frame;
4630 unsigned int tdlut_pte_req_per_frame;
4631 unsigned int tdlut_bytes_per_line;
4632 double tdlut_drain_rate;
4633 unsigned int tdlut_mpc_width;
4634 unsigned int tdlut_bytes_per_group_simple;
4635
4636 if (!p->setup_for_tdlut) {
4637 *p->tdlut_groups_per_2row_ub = 0;
4638 *p->tdlut_opt_time = 0;
4639 *p->tdlut_drain_time = 0;
4640 *p->tdlut_bytes_to_deliver = 0;
4641 *p->tdlut_bytes_per_group = 0;
4642 *p->tdlut_pte_bytes_per_frame = 0;
4643 *p->tdlut_bytes_per_frame = 0;
4644 return;
4645 }
4646
4647 if (p->tdlut_mpc_width_flag) {
4648 tdlut_mpc_width = 33;
4649 tdlut_bytes_per_group_simple = 39*256;
4650 } else {
4651 tdlut_mpc_width = 17;
4652 tdlut_bytes_per_group_simple = 10*256;
4653 }
4654
4655 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
4656
4657 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
4658 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4659 tdlut_width = 4916;
4660 else
4661 tdlut_width = 35940;
4662 } else {
4663 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4664 tdlut_width = 17;
4665 else // dml2_tdlut_width_33_cube
4666 tdlut_width = 33;
4667 }
4668
4669 if (p->is_gfx11)
4670 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
4671 else
4672 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
4673
4674 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
4675 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
4676 else
4677 tdlut_footprint_bytes = tdlut_pitch_bytes;
4678
4679 if (!p->gpuvm_enable) {
4680 tdlut_vmpg_per_frame = 0;
4681 tdlut_pte_req_per_frame = 0;
4682 } else {
4683 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
4684 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
4685 }
4686 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
4687 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
4688
4689 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
4690 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
4691 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
4692 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
4693 //the delivery cycles is DispClk cycles per line * number of lines * number of slices
4694 //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
4695 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
4696 } else {
4697 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
4698 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
4699 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
4700 //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
4701 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
4702 }
4703
4704 //the tdlut is fetched during the 2 row times of prefetch.
4705 if (p->setup_for_tdlut) {
4706 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
4707 if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024)
4708 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
4709 else
4710 *p->tdlut_opt_time = 0;
4711 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
4712 *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
4713 }
4714
4715 #ifdef __DML_VBA_DEBUG__
4716 DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
4717 DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
4718 DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
4719 DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
4720
4721 DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
4722 DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
4723 DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
4724 DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
4725 DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
4726 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
4727 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
4728 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
4729 DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
4730 DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1));
4731 DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
4732 DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
4733 DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
4734 DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
4735 #endif
4736 }
4737
CalculateTarb(const struct dml2_display_cfg * display_cfg,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,double ReturnBW,unsigned int MetaChunkSize,double * Tarb,double * Tarb_prefetch)4738 static void CalculateTarb(
4739 const struct dml2_display_cfg *display_cfg,
4740 unsigned int PixelChunkSizeInKByte,
4741 unsigned int NumberOfActiveSurfaces,
4742 unsigned int NumberOfDPP[],
4743 unsigned int dpte_group_bytes[],
4744 unsigned int tdlut_bytes_per_group[],
4745 double HostVMInefficiencyFactor,
4746 double HostVMInefficiencyFactorPrefetch,
4747 unsigned int HostVMMinPageSize,
4748 double ReturnBW,
4749 unsigned int MetaChunkSize,
4750
4751 // output
4752 double *Tarb,
4753 double *Tarb_prefetch)
4754 {
4755 double extra_bytes = 0;
4756 double extra_bytes_prefetch = 0;
4757 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
4758
4759 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4760 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
4761
4762 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
4763 extra_bytes = extra_bytes + (MetaChunkSize * 1024);
4764
4765 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
4766 extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
4767 }
4768
4769 extra_bytes_prefetch = extra_bytes;
4770
4771 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4772 if (display_cfg->gpuvm_enable == true) {
4773 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4774 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
4775 }
4776 }
4777 *Tarb = extra_bytes / ReturnBW;
4778 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
4779 #ifdef __DML_VBA_DEBUG__
4780 DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
4781 DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
4782 DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
4783 DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
4784 #endif
4785 }
4786
CalculateTWait(long reserved_vblank_time_ns,double UrgentLatency,double Ttrip,double g6_temp_read_blackout_us)4787 static double CalculateTWait(
4788 long reserved_vblank_time_ns,
4789 double UrgentLatency,
4790 double Ttrip,
4791 double g6_temp_read_blackout_us)
4792 {
4793 double TWait;
4794 double t_urg_trip = math_max2(UrgentLatency, Ttrip);
4795 TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
4796
4797 #ifdef __DML_VBA_DEBUG__
4798 DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns);
4799 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
4800 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip);
4801 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait);
4802 #endif
4803 return TWait;
4804 }
4805
4806
CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,unsigned int * VUpdateWidthPix,unsigned int * VReadyOffsetPix)4807 static void CalculateVUpdateAndDynamicMetadataParameters(
4808 unsigned int MaxInterDCNTileRepeaters,
4809 double Dppclk,
4810 double Dispclk,
4811 double DCFClkDeepSleep,
4812 double PixelClock,
4813 unsigned int HTotal,
4814 unsigned int VBlank,
4815 unsigned int DynamicMetadataTransmittedBytes,
4816 unsigned int DynamicMetadataLinesBeforeActiveRequired,
4817 unsigned int InterlaceEnable,
4818 bool ProgressiveToInterlaceUnitInOPP,
4819
4820 // Output
4821 double *TSetup,
4822 double *Tdmbf,
4823 double *Tdmec,
4824 double *Tdmsks,
4825 unsigned int *VUpdateOffsetPix,
4826 unsigned int *VUpdateWidthPix,
4827 unsigned int *VReadyOffsetPix)
4828 {
4829 double TotalRepeaterDelayTime;
4830 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
4831 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
4832 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
4833 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
4834 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
4835 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
4836 *Tdmec = HTotal / PixelClock;
4837
4838 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
4839 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
4840 } else {
4841 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
4842 }
4843 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
4844 *Tdmsks = *Tdmsks / 2;
4845 }
4846 #ifdef __DML_VBA_DEBUG__
4847 DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
4848 DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank);
4849 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
4850 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4851 DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk);
4852 DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
4853 DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
4854 DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
4855
4856 DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
4857 DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
4858 DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
4859
4860 DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
4861 #endif
4862 }
4863
get_urgent_bandwidth_required(struct dml2_core_shared_get_urgent_bandwidth_required_locals * l,const struct dml2_display_cfg * display_cfg,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool inc_flip_bw,bool use_qual_row_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double PrefetchBandwidthMax[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double surface_required_bw[],double surface_peak_required_bw[])4864 static double get_urgent_bandwidth_required(
4865 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
4866 const struct dml2_display_cfg *display_cfg,
4867 enum dml2_core_internal_soc_state_type state_type,
4868 enum dml2_core_internal_bw_type bw_type,
4869 bool inc_flip_bw, // including flip bw
4870 bool use_qual_row_bw,
4871 unsigned int NumberOfActiveSurfaces,
4872 unsigned int NumberOfDPP[],
4873 double dcc_dram_bw_nom_overhead_factor_p0[],
4874 double dcc_dram_bw_nom_overhead_factor_p1[],
4875 double dcc_dram_bw_pref_overhead_factor_p0[],
4876 double dcc_dram_bw_pref_overhead_factor_p1[],
4877 double mall_prefetch_sdp_overhead_factor[],
4878 double mall_prefetch_dram_overhead_factor[],
4879 double ReadBandwidthLuma[],
4880 double ReadBandwidthChroma[],
4881 double PrefetchBandwidthLuma[],
4882 double PrefetchBandwidthChroma[],
4883 double PrefetchBandwidthMax[],
4884 double excess_vactive_fill_bw_l[],
4885 double excess_vactive_fill_bw_c[],
4886 double cursor_bw[],
4887 double dpte_row_bw[],
4888 double meta_row_bw[],
4889 double prefetch_cursor_bw[],
4890 double prefetch_vmrow_bw[],
4891 double flip_bw[],
4892 double UrgentBurstFactorLuma[],
4893 double UrgentBurstFactorChroma[],
4894 double UrgentBurstFactorCursor[],
4895 double UrgentBurstFactorLumaPre[],
4896 double UrgentBurstFactorChromaPre[],
4897 double UrgentBurstFactorCursorPre[],
4898 /* outputs */
4899 double surface_required_bw[],
4900 double surface_peak_required_bw[])
4901 {
4902 // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
4903 // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
4904
4905 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
4906
4907 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4908 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
4909 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4910 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4911 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4912 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4913
4914 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
4915 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
4916 l->adj_factor_cur = UrgentBurstFactorCursor[k];
4917 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
4918 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
4919 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
4920
4921 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
4922 bool exclude_this_plane = false;
4923
4924 // Exclude phantom pipe in bw calculation for non svp prefetch state
4925 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
4926 exclude_this_plane = true;
4927
4928 // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
4929 // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
4930 if (use_qual_row_bw) {
4931 if (display_cfg->hostvm_enable)
4932 l->per_plane_flip_bw[k] = 0; // qual_row_bw
4933 else if (!display_cfg->plane_descriptors[k].immediate_flip)
4934 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4935 } else {
4936 // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
4937 if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
4938 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4939 else
4940 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
4941 }
4942
4943 if (!exclude_this_plane) {
4944 l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
4945 l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
4946 l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4947 l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4948 l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
4949 surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max);
4950
4951 /* export peak required bandwidth for the surface */
4952 surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
4953
4954 #ifdef __DML_VBA_DEBUG__
4955 DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
4956 DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
4957 DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
4958 DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
4959 DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
4960 DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
4961 #endif
4962 } else {
4963 surface_required_bw[k] = 0.0;
4964 }
4965
4966 l->required_bandwidth_mbps += surface_required_bw[k];
4967
4968 #ifdef __DML_VBA_DEBUG__
4969 DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
4970 DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
4971 DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
4972 DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
4973 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
4974 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
4975 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
4976
4977 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
4978 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
4979 DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
4980
4981 DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
4982 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
4983 DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
4984 DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
4985 DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
4986 DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
4987 DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
4988
4989 DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
4990 DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
4991 DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
4992 DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
4993 DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
4994 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
4995 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
4996 DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
4997 #endif
4998 }
4999
5000 return l->required_bandwidth_mbps;
5001 }
5002
CalculateExtraLatency(const struct dml2_display_cfg * display_cfg,unsigned int ROBBufferSizeInKByte,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,double FabricClock,unsigned int PixelChunkSizeInKByte,double ReturnBW,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,enum dml2_qos_param_type qos_type,bool max_outstanding_when_urgent_expected,unsigned int max_outstanding_requests,unsigned int request_size_bytes_luma[],unsigned int request_size_bytes_chroma[],unsigned int MetaChunkSize,unsigned int dchub_arb_to_ret_delay,double Ttrip,unsigned int hostvm_mode,double * ExtraLatency,double * ExtraLatency_sr,double * ExtraLatencyPrefetch)5003 static void CalculateExtraLatency(
5004 const struct dml2_display_cfg *display_cfg,
5005 unsigned int ROBBufferSizeInKByte,
5006 unsigned int RoundTripPingLatencyCycles,
5007 unsigned int ReorderingBytes,
5008 double DCFCLK,
5009 double FabricClock,
5010 unsigned int PixelChunkSizeInKByte,
5011 double ReturnBW,
5012 unsigned int NumberOfActiveSurfaces,
5013 unsigned int NumberOfDPP[],
5014 unsigned int dpte_group_bytes[],
5015 unsigned int tdlut_bytes_per_group[],
5016 double HostVMInefficiencyFactor,
5017 double HostVMInefficiencyFactorPrefetch,
5018 unsigned int HostVMMinPageSize,
5019 enum dml2_qos_param_type qos_type,
5020 bool max_outstanding_when_urgent_expected,
5021 unsigned int max_outstanding_requests,
5022 unsigned int request_size_bytes_luma[],
5023 unsigned int request_size_bytes_chroma[],
5024 unsigned int MetaChunkSize,
5025 unsigned int dchub_arb_to_ret_delay,
5026 double Ttrip,
5027 unsigned int hostvm_mode,
5028
5029 // output
5030 double *ExtraLatency, // Tex
5031 double *ExtraLatency_sr, // Tex_sr
5032 double *ExtraLatencyPrefetch)
5033
5034 {
5035 double Tarb;
5036 double Tarb_prefetch;
5037 double Tex_trips;
5038 unsigned int max_request_size_bytes = 0;
5039
5040 CalculateTarb(
5041 display_cfg,
5042 PixelChunkSizeInKByte,
5043 NumberOfActiveSurfaces,
5044 NumberOfDPP,
5045 dpte_group_bytes,
5046 tdlut_bytes_per_group,
5047 HostVMInefficiencyFactor,
5048 HostVMInefficiencyFactorPrefetch,
5049 HostVMMinPageSize,
5050 ReturnBW,
5051 MetaChunkSize,
5052 // output
5053 &Tarb,
5054 &Tarb_prefetch);
5055
5056 Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
5057
5058 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
5059 if (request_size_bytes_luma[k] > max_request_size_bytes)
5060 max_request_size_bytes = request_size_bytes_luma[k];
5061 if (request_size_bytes_chroma[k] > max_request_size_bytes)
5062 max_request_size_bytes = request_size_bytes_chroma[k];
5063 }
5064
5065 if (qos_type == dml2_qos_param_type_dcn4x) {
5066 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
5067 *ExtraLatency = *ExtraLatency_sr;
5068 if (max_outstanding_when_urgent_expected)
5069 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
5070 } else {
5071 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
5072 *ExtraLatency = *ExtraLatency_sr;
5073 }
5074 *ExtraLatency = *ExtraLatency + Tex_trips;
5075 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
5076 *ExtraLatency = *ExtraLatency + Tarb;
5077 *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
5078
5079 #ifdef __DML_VBA_DEBUG__
5080 DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type);
5081 DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
5082 DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips);
5083 DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
5084 DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock);
5085 DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
5086 DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
5087 DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
5088 DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
5089 DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb);
5090 DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
5091 DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
5092 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
5093 #endif
5094 }
5095
CalculatePrefetchSchedule(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculatePrefetchSchedule_params * p)5096 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
5097 {
5098 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
5099 bool dcc_mrq_enable;
5100
5101 unsigned int vm_bytes;
5102 unsigned int extra_tdpe_bytes;
5103 unsigned int tdlut_row_bytes;
5104 unsigned int Lo;
5105
5106 s->NoTimeToPrefetch = false;
5107 s->DPPCycles = 0;
5108 s->DISPCLKCycles = 0;
5109 s->DSTTotalPixelsAfterScaler = 0.0;
5110 s->LineTime = 0.0;
5111 s->dst_y_prefetch_equ = 0.0;
5112 s->prefetch_bw_oto = 0.0;
5113 s->Tvm_oto = 0.0;
5114 s->Tr0_oto = 0.0;
5115 s->Tvm_oto_lines = 0.0;
5116 s->Tr0_oto_lines = 0.0;
5117 s->dst_y_prefetch_oto = 0.0;
5118 s->TimeForFetchingVM = 0.0;
5119 s->TimeForFetchingRowInVBlank = 0.0;
5120 s->LinesToRequestPrefetchPixelData = 0.0;
5121 s->HostVMDynamicLevelsTrips = 0;
5122 s->trip_to_mem = 0.0;
5123 *p->Tvm_trips = 0.0;
5124 *p->Tr0_trips = 0.0;
5125 s->Tvm_trips_rounded = 0.0;
5126 s->Tr0_trips_rounded = 0.0;
5127 s->max_Tsw = 0.0;
5128 s->Lsw_oto = 0.0;
5129 *p->Tpre_rounded = 0.0;
5130 s->prefetch_bw_equ = 0.0;
5131 s->Tvm_equ = 0.0;
5132 s->Tr0_equ = 0.0;
5133 s->Tdmbf = 0.0;
5134 s->Tdmec = 0.0;
5135 s->Tdmsks = 0.0;
5136 *p->prefetch_sw_bytes = 0.0;
5137 s->prefetch_bw_pr = 0.0;
5138 s->bytes_pp = 0.0;
5139 s->dep_bytes = 0.0;
5140 s->min_Lsw_oto = 0.0;
5141 s->min_Lsw_equ = 0.0;
5142 s->Tsw_est1 = 0.0;
5143 s->Tsw_est2 = 0.0;
5144 s->Tsw_est3 = 0.0;
5145 s->cursor_prefetch_bytes = 0;
5146 *p->prefetch_cursor_bw = 0;
5147 *p->RequiredPrefetchBWMax = 0.0;
5148
5149 dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
5150
5151 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
5152
5153 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
5154 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
5155 } else {
5156 s->HostVMDynamicLevelsTrips = 0;
5157 }
5158 #ifdef __DML_VBA_DEBUG__
5159 DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
5160 DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
5161 DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
5162 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
5163 DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5164 DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
5165 DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup);
5166 DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
5167 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5168 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
5169 DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5170 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5171 DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5172 DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
5173 #endif
5174 CalculateVUpdateAndDynamicMetadataParameters(
5175 p->MaxInterDCNTileRepeaters,
5176 p->myPipe->Dppclk,
5177 p->myPipe->Dispclk,
5178 p->myPipe->DCFClkDeepSleep,
5179 p->myPipe->PixelClock,
5180 p->myPipe->HTotal,
5181 p->myPipe->VBlank,
5182 p->DynamicMetadataTransmittedBytes,
5183 p->DynamicMetadataLinesBeforeActiveRequired,
5184 p->myPipe->InterlaceEnable,
5185 p->myPipe->ProgressiveToInterlaceUnitInOPP,
5186 p->TSetup,
5187
5188 // Output
5189 &s->Tdmbf,
5190 &s->Tdmec,
5191 &s->Tdmsks,
5192 p->VUpdateOffsetPix,
5193 p->VUpdateWidthPix,
5194 p->VReadyOffsetPix);
5195
5196 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
5197 s->trip_to_mem = p->Ttrip;
5198 *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
5199 if (dcc_mrq_enable)
5200 *p->Tvm_trips_flip = *p->Tvm_trips;
5201 else
5202 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
5203
5204 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
5205 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
5206
5207 if (p->DynamicMetadataVMEnabled == true) {
5208 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
5209 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
5210 } else {
5211 *p->Tdmdl_vm = 0;
5212 *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
5213 }
5214
5215 if (p->DynamicMetadataEnable == true) {
5216 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
5217 *p->NotEnoughTimeForDynamicMetadata = true;
5218 DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
5219 DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5220 DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5221 DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5222 DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5223 } else {
5224 *p->NotEnoughTimeForDynamicMetadata = false;
5225 }
5226 } else {
5227 *p->NotEnoughTimeForDynamicMetadata = false;
5228 }
5229
5230 if (p->myPipe->ScalerEnabled)
5231 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
5232 else
5233 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
5234
5235 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
5236
5237 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
5238
5239 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
5240 return true;
5241
5242 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
5243 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
5244 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
5245 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
5246
5247 #ifdef __DML_VBA_DEBUG__
5248 DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
5249 DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
5250 DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
5251 DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5252 DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
5253 DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
5254 DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
5255 DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
5256 DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
5257 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
5258
5259 DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
5260 DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
5261 DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
5262 DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
5263 #endif
5264
5265 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
5266 *p->DSTYAfterScaler = 1;
5267 else
5268 *p->DSTYAfterScaler = 0;
5269
5270 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
5271 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
5272 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
5273 #ifdef __DML_VBA_DEBUG__
5274 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
5275 DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
5276 #endif
5277
5278 #ifdef __DML_VBA_DEBUG__
5279 DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5280 DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5281 DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5282 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5283 DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5284 DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
5285 #endif
5286 if (p->display_cfg->gpuvm_enable) {
5287 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5288 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5289 } else {
5290 if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
5291 s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
5292 else
5293 s->Tvm_trips_rounded = s->LineTime / 4.0;
5294 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
5295 }
5296
5297 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
5298 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
5299
5300 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
5301 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5302 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5303 } else {
5304 s->Tr0_trips_rounded = s->LineTime / 4.0;
5305 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
5306 }
5307 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
5308 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
5309
5310 if (p->display_cfg->gpuvm_enable == true) {
5311 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
5312 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
5313 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
5314 *p->Tno_bw = p->ExtraLatencyPrefetch;
5315 } else {
5316 *p->Tno_bw = 0;
5317 }
5318 } else {
5319 *p->Tno_bw = 0;
5320 }
5321
5322 if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
5323 *p->Tno_bw_flip = *p->Tno_bw;
5324 else
5325 *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
5326
5327 if (dml_is_420(p->myPipe->SourcePixelFormat)) {
5328 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
5329 } else {
5330 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
5331 }
5332
5333 *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
5334 *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
5335
5336 vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
5337 extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
5338
5339 if (p->setup_for_tdlut)
5340 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
5341
5342 tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
5343
5344 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
5345 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
5346 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
5347
5348 // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
5349 // Note: in prefetch calculation, acounting is done mostly per-pipe.
5350 // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
5351 s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
5352
5353 // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
5354 s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
5355
5356 if (p->myPipe->BytePerPixelC > 0) {
5357 s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
5358 s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
5359 }
5360
5361 /* oto prefetch bw should be always be less than total vactive bw */
5362 //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
5363
5364 s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
5365
5366 s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
5367
5368 s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
5369
5370 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
5371 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5372 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5373
5374 /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch.
5375 * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule
5376 * and the required bandwidth increases when going from ms to mp
5377 */
5378 *p->RequiredPrefetchBWMax = s->prefetch_bw_oto;
5379
5380 #ifdef __DML_VBA_DEBUG__
5381 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
5382 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
5383 DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
5384 #endif
5385
5386 if (p->display_cfg->gpuvm_enable == true) {
5387 s->Tvm_oto = math_max3(
5388 *p->Tvm_trips,
5389 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
5390 s->LineTime / 4.0);
5391
5392 #ifdef __DML_VBA_DEBUG__
5393 DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
5394 DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
5395 DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
5396 #endif
5397 } else {
5398 s->Tvm_oto = s->Tvm_trips_rounded;
5399 }
5400
5401 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
5402 s->Tr0_oto = math_max3(
5403 *p->Tr0_trips,
5404 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
5405 s->LineTime / 4.0);
5406 #ifdef __DML_VBA_DEBUG__
5407 DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
5408 DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
5409 DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
5410 #endif
5411 } else
5412 s->Tr0_oto = s->LineTime / 4.0;
5413
5414 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
5415 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
5416 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
5417
5418 #ifdef DML_GLOBAL_PREFETCH_CHECK
5419 DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
5420 if (p->impacted_dst_y_pre > 0) {
5421 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5422 s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
5423 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
5424 }
5425 #endif
5426 *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
5427
5428 //To (time for delay after scaler) in line time
5429 Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
5430
5431 s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
5432 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
5433 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
5434 //Tpre_equ in line time
5435 if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
5436 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
5437 else
5438 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
5439
5440 #ifdef DML_GLOBAL_PREFETCH_CHECK
5441 s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
5442
5443 s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5444
5445 if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
5446 s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
5447 #endif
5448
5449 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5450
5451 #ifdef __DML_VBA_DEBUG__
5452 DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
5453 DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
5454 DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5455 DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5456 DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
5457 DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5458 DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5459 DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
5460 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5461 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5462 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5463 DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
5464 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5465 DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
5466 DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
5467 DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
5468 DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
5469 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
5470 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5471 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5472 DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5473 DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5474 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
5475 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
5476 DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
5477 DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
5478 DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
5479 DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
5480 DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
5481 DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
5482 DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
5483 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5484 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
5485 DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
5486 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
5487 #endif
5488 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
5489 *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
5490
5491 #ifdef __DML_VBA_DEBUG__
5492 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
5493 DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime);
5494 DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup);
5495 DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
5496 DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
5497 DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
5498 DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
5499 DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5500 DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5501 DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5502 DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
5503 DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5504 DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5505 DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
5506 DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
5507 DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5508 DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
5509 DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
5510 DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
5511 DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
5512 DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
5513 DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
5514 DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5515 DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
5516 DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5517 #endif
5518
5519 *p->dst_y_per_vm_vblank = 0;
5520 *p->dst_y_per_row_vblank = 0;
5521 *p->VRatioPrefetchY = 0;
5522 *p->VRatioPrefetchC = 0;
5523 *p->RequiredPrefetchPixelDataBWLuma = 0;
5524
5525 // Derive bandwidth by finding how much data to move within the time constraint
5526 // Tpre_rounded is Tpre rounding to 2-bit fraction
5527 // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
5528 // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
5529 // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
5530 bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
5531 bool tpre_gt_req_latency = true;
5532 #if 0
5533 // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
5534 // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
5535 // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
5536 tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
5537 #endif
5538
5539 if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
5540 s->prefetch_bw1 = 0.;
5541 s->prefetch_bw2 = 0.;
5542 s->prefetch_bw3 = 0.;
5543 s->prefetch_bw4 = 0.;
5544
5545 // prefetch_bw1: VM + 2*R0 + SW
5546 if (*p->Tpre_rounded - *p->Tno_bw > 0) {
5547 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
5548 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
5549 + *p->prefetch_sw_bytes)
5550 / (*p->Tpre_rounded - *p->Tno_bw);
5551 s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
5552 } else
5553 s->prefetch_bw1 = 0;
5554
5555 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
5556 if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
5557 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
5558 (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
5559 #ifdef __DML_VBA_DEBUG__
5560 DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
5561 DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
5562 DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
5563 DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5564 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5565 DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5566 DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
5567 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
5568 #endif
5569 }
5570
5571 // prefetch_bw2: VM + SW
5572 if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
5573 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
5574 (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
5575 s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
5576 } else
5577 s->prefetch_bw2 = 0;
5578
5579 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
5580 if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
5581 s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
5582 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
5583 }
5584
5585 // prefetch_bw3: 2*R0 + SW
5586 if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
5587 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
5588 (*p->Tpre_rounded - s->Tvm_trips_rounded);
5589 s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
5590 } else
5591 s->prefetch_bw3 = 0;
5592
5593 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
5594 if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
5595 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
5596 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
5597 }
5598
5599 // prefetch_bw4: SW
5600 if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
5601 s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
5602 else
5603 s->prefetch_bw4 = 0;
5604
5605 #ifdef __DML_VBA_DEBUG__
5606 DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5607 DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
5608 DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5609 DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
5610 DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
5611 DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
5612 DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
5613 DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
5614 DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
5615 DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
5616 DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
5617 #endif
5618 {
5619 bool Case1OK = false;
5620 bool Case2OK = false;
5621 bool Case3OK = false;
5622
5623 // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
5624 // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
5625 // vs the latency based number
5626
5627 // prefetch_bw1: VM + 2*R0 + SW
5628 // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
5629 // here is to make sure equ bw wont be more agressive than the latency-based requirement.
5630 // check vm time >= vm_trips
5631 // check r0 time >= r0_trips
5632
5633 double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
5634
5635 DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
5636 DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
5637
5638 if (s->prefetch_bw1 > 0) {
5639 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
5640 double row_transfer_time = total_row_bytes / s->prefetch_bw1;
5641 DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5642 DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
5643 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5644 Case1OK = true;
5645 }
5646 }
5647
5648 // prefetch_bw2: VM + SW
5649 // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
5650 // check vm time >= vm_trips
5651 // check r0 time < r0_trips
5652 if (s->prefetch_bw2 > 0) {
5653 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
5654 double row_transfer_time = total_row_bytes / s->prefetch_bw2;
5655 DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5656 DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
5657 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
5658 Case2OK = true;
5659 }
5660 }
5661
5662 // prefetch_bw3: VM + 2*R0
5663 // check vm time < vm_trips
5664 // check r0 time >= r0_trips
5665 if (s->prefetch_bw3 > 0) {
5666 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
5667 double row_transfer_time = total_row_bytes / s->prefetch_bw3;
5668 DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5669 DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
5670 if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5671 Case3OK = true;
5672 }
5673 }
5674
5675 if (Case1OK) {
5676 s->prefetch_bw_equ = s->prefetch_bw1;
5677 } else if (Case2OK) {
5678 s->prefetch_bw_equ = s->prefetch_bw2;
5679 } else if (Case3OK) {
5680 s->prefetch_bw_equ = s->prefetch_bw3;
5681 } else {
5682 s->prefetch_bw_equ = s->prefetch_bw4;
5683 }
5684
5685 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
5686 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5687 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5688 #ifdef __DML_VBA_DEBUG__
5689 DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK);
5690 DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK);
5691 DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK);
5692 DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
5693 #endif
5694
5695 if (s->prefetch_bw_equ > 0) {
5696 if (p->display_cfg->gpuvm_enable == true) {
5697 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
5698 } else {
5699 s->Tvm_equ = s->LineTime / 4;
5700 }
5701
5702 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
5703 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
5704 *p->Tr0_trips,
5705 s->LineTime / 4);
5706 } else {
5707 s->Tr0_equ = s->LineTime / 4;
5708 }
5709 } else {
5710 s->Tvm_equ = 0;
5711 s->Tr0_equ = 0;
5712 DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
5713 }
5714 }
5715 #ifdef __DML_VBA_DEBUG__
5716 DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
5717 DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
5718 #endif
5719 // Use the more stressful prefetch schedule
5720 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
5721 *p->dst_y_prefetch = s->dst_y_prefetch_oto;
5722 s->TimeForFetchingVM = s->Tvm_oto;
5723 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
5724
5725 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5726 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5727 #ifdef __DML_VBA_DEBUG__
5728 DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__);
5729 #endif
5730
5731 } else {
5732 *p->dst_y_prefetch = s->dst_y_prefetch_equ;
5733
5734 if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
5735 *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
5736
5737 s->TimeForFetchingVM = s->Tvm_equ;
5738 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
5739
5740 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5741 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5742
5743 /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming.
5744 * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data
5745 * bandwidth may end up higher than what was calculated in mode support.
5746 */
5747 *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax);
5748
5749 #ifdef __DML_VBA_DEBUG__
5750 DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
5751 #endif
5752 }
5753
5754 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
5755 s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
5756
5757 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
5758 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
5759 *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
5760
5761 #ifdef __DML_VBA_DEBUG__
5762 DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
5763 DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
5764 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5765 DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
5766 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5767 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5768 DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
5769 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5770 DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
5771
5772 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
5773 DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
5774 DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
5775 DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
5776 #endif
5777 DML_ASSERT(*p->dst_y_prefetch < 64);
5778
5779 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
5780 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
5781 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
5782 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
5783 #ifdef __DML_VBA_DEBUG__
5784 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5785 DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
5786 DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
5787 #endif
5788 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
5789 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
5790 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
5791 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
5792 } else {
5793 s->NoTimeToPrefetch = true;
5794 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
5795 *p->VRatioPrefetchY = 0;
5796 }
5797 #ifdef __DML_VBA_DEBUG__
5798 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5799 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5800 DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
5801 #endif
5802 }
5803
5804 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
5805 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
5806
5807 #ifdef __DML_VBA_DEBUG__
5808 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5809 DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
5810 DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
5811 #endif
5812 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
5813 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
5814 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
5815 } else {
5816 s->NoTimeToPrefetch = true;
5817 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
5818 *p->VRatioPrefetchC = 0;
5819 }
5820 #ifdef __DML_VBA_DEBUG__
5821 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5822 DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5823 DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
5824 #endif
5825 }
5826
5827 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
5828 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
5829
5830 #ifdef __DML_VBA_DEBUG__
5831 DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5832 DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5833 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5834 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5835 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5836 #endif
5837 } else {
5838 s->NoTimeToPrefetch = true;
5839 DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
5840 DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
5841 *p->VRatioPrefetchY = 0;
5842 *p->VRatioPrefetchC = 0;
5843 *p->RequiredPrefetchPixelDataBWLuma = 0;
5844 *p->RequiredPrefetchPixelDataBWChroma = 0;
5845 }
5846 DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
5847 DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
5848 DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
5849 DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
5850 DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
5851 DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
5852 DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
5853 DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
5854
5855 } else {
5856 DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
5857 DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
5858 __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
5859 s->NoTimeToPrefetch = true;
5860 s->TimeForFetchingVM = 0;
5861 s->TimeForFetchingRowInVBlank = 0;
5862 *p->dst_y_per_vm_vblank = 0;
5863 *p->dst_y_per_row_vblank = 0;
5864 s->LinesToRequestPrefetchPixelData = 0;
5865 *p->VRatioPrefetchY = 0;
5866 *p->VRatioPrefetchC = 0;
5867 *p->RequiredPrefetchPixelDataBWLuma = 0;
5868 *p->RequiredPrefetchPixelDataBWChroma = 0;
5869 }
5870
5871 {
5872 double prefetch_vm_bw;
5873 double prefetch_row_bw;
5874
5875 if (vm_bytes == 0) {
5876 prefetch_vm_bw = 0;
5877 } else if (*p->dst_y_per_vm_vblank > 0) {
5878 #ifdef __DML_VBA_DEBUG__
5879 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5880 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5881 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5882 #endif
5883 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
5884 #ifdef __DML_VBA_DEBUG__
5885 DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
5886 #endif
5887 } else {
5888 prefetch_vm_bw = 0;
5889 s->NoTimeToPrefetch = true;
5890 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
5891 }
5892
5893 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
5894 prefetch_row_bw = 0;
5895 } else if (*p->dst_y_per_row_vblank > 0) {
5896 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
5897
5898 #ifdef __DML_VBA_DEBUG__
5899 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5900 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5901 DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
5902 #endif
5903 } else {
5904 prefetch_row_bw = 0;
5905 s->NoTimeToPrefetch = true;
5906 DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
5907 }
5908
5909 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
5910 }
5911
5912 if (s->NoTimeToPrefetch) {
5913 s->TimeForFetchingVM = 0;
5914 s->TimeForFetchingRowInVBlank = 0;
5915 *p->dst_y_per_vm_vblank = 0;
5916 *p->dst_y_per_row_vblank = 0;
5917 *p->dst_y_prefetch = 0;
5918 s->LinesToRequestPrefetchPixelData = 0;
5919 *p->VRatioPrefetchY = 0;
5920 *p->VRatioPrefetchC = 0;
5921 *p->RequiredPrefetchPixelDataBWLuma = 0;
5922 *p->RequiredPrefetchPixelDataBWChroma = 0;
5923 *p->prefetch_vmrow_bw = 0;
5924 }
5925
5926 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
5927 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
5928 DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
5929 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5930 DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5931 DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
5932
5933 return s->NoTimeToPrefetch;
5934 }
5935
get_num_lb_source_lines(unsigned int max_line_buffer_lines,unsigned int line_buffer_size_bits,unsigned int num_pipes,unsigned int vp_width,unsigned int vp_height,double h_ratio,enum dml2_rotation_angle rotation_angle)5936 static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
5937 unsigned int line_buffer_size_bits,
5938 unsigned int num_pipes,
5939 unsigned int vp_width,
5940 unsigned int vp_height,
5941 double h_ratio,
5942 enum dml2_rotation_angle rotation_angle)
5943 {
5944 unsigned int num_lb_source_lines = 0;
5945 double lb_bit_per_pixel = 57.0;
5946 unsigned recin_width = vp_width/num_pipes;
5947
5948 if (dml_is_vertical_rotation(rotation_angle))
5949 recin_width = vp_height/num_pipes;
5950
5951 num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
5952 math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
5953
5954 return num_lb_source_lines;
5955 }
5956
find_max_impact_plane(unsigned int this_plane_idx,unsigned int num_planes,unsigned int Trpd_dcfclk_cycles[])5957 static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
5958 {
5959 int max_value = -1;
5960 int max_idx = -1;
5961 for (unsigned int i = 0; i < num_planes; i++) {
5962 if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
5963 max_value = Trpd_dcfclk_cycles[i];
5964 max_idx = i;
5965 }
5966 }
5967 if (max_idx <= 0) {
5968 DML_ASSERT(max_idx >= 0);
5969 max_idx = this_plane_idx;
5970 }
5971
5972 return max_idx;
5973 }
5974
calculate_impacted_Tsw(unsigned int exclude_plane_idx,unsigned int num_planes,double * prefetch_swath_bytes,double bw_mbps)5975 static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
5976 {
5977 double sum = 0.;
5978 for (unsigned int i = 0; i < num_planes; i++) {
5979 if (i != exclude_plane_idx) {
5980 sum += prefetch_swath_bytes[i];
5981 }
5982 }
5983 return sum / bw_mbps;
5984 }
5985
5986 // a global check against the aggregate effect of the per plane prefetch schedule
CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params * p)5987 static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
5988 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
5989 {
5990 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
5991 unsigned int i, k;
5992
5993 memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
5994
5995 *p->recalc_prefetch_schedule = 0;
5996 s->prefetch_global_check_passed = 1;
5997 // worst case if the rob and cdb is fully hogged
5998 s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
5999 #ifdef __DML_VBA_DEBUG__
6000 DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
6001 DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
6002 DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
6003 DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
6004 DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
6005 DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
6006 #endif
6007
6008 // calculate the return impact from each plane, request is 256B per dcfclk
6009 for (i = 0; i < p->num_active_planes; i++) {
6010 s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
6011 s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
6012 s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
6013 s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
6014
6015 if (p->pixel_format[i] == dml2_420_10) {
6016 s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
6017 s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
6018 s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
6019 s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
6020 }
6021
6022 s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
6023 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
6024
6025 #ifdef __DML_VBA_DEBUG__
6026 DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
6027 DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
6028 DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
6029 DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
6030 DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
6031 DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
6032 #endif
6033
6034 if (s->src_swath_bytes_c[i] > 0) { // dual_plane
6035 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
6036
6037 if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
6038 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
6039 }
6040
6041 #ifdef __DML_VBA_DEBUG__
6042 DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
6043 DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
6044 DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
6045 DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
6046 #endif
6047 }
6048
6049 s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
6050 s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
6051
6052 #ifdef __DML_VBA_DEBUG__
6053 DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
6054 DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
6055 DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
6056 #endif
6057 // clamping to worst case delay which is one which occupy the full rob+cdb
6058 if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
6059 s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
6060 }
6061
6062 // Figure out the impacted prefetch time for each plane
6063 // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
6064 for (i = 0; i < p->num_active_planes; i++) {
6065 k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
6066 // the rest of planes (except for k) complete for bw
6067 p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
6068 p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
6069 p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
6070
6071 #ifdef __DML_VBA_DEBUG__
6072 DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
6073 #endif
6074 }
6075
6076 if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
6077 for (i = 0; i < p->num_active_planes; i++) {
6078 if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
6079 s->prefetch_global_check_passed = 0;
6080 *p->recalc_prefetch_schedule = 1;
6081 }
6082 #ifdef __DML_VBA_DEBUG__
6083 DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
6084 DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
6085 #endif
6086 }
6087 } else {
6088 // likely a mode programming calls, assume support, and no recalc - not used anyways
6089 s->prefetch_global_check_passed = 1;
6090 *p->recalc_prefetch_schedule = 0;
6091 }
6092
6093 #ifdef __DML_VBA_DEBUG__
6094 DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
6095 DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
6096 #endif
6097
6098 return s->prefetch_global_check_passed;
6099 }
6100
calculate_peak_bandwidth_required(struct dml2_core_internal_scratch * s,struct dml2_core_calcs_calculate_peak_bandwidth_required_params * p)6101 static void calculate_peak_bandwidth_required(
6102 struct dml2_core_internal_scratch *s,
6103 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
6104 {
6105 unsigned int n;
6106 unsigned int m;
6107
6108 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
6109
6110 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
6111
6112 #ifdef __DML_VBA_DEBUG__
6113 DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
6114 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
6115 #endif
6116
6117 for (unsigned int k = 0; k < p->num_active_planes; ++k) {
6118 l->unity_array[k] = 1.0;
6119 l->zero_array[k] = 0.0;
6120 }
6121
6122 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
6123 for (n = 0; n < dml2_core_internal_bw_max; n++) {
6124 get_urgent_bandwidth_required(
6125 &s->get_urgent_bandwidth_required_locals,
6126 p->display_cfg,
6127 m,
6128 n,
6129 0, //inc_flip_bw,
6130 0, //use_qual_row_bw
6131 p->num_active_planes,
6132 p->num_of_dpp,
6133 p->dcc_dram_bw_nom_overhead_factor_p0,
6134 p->dcc_dram_bw_nom_overhead_factor_p1,
6135 p->dcc_dram_bw_pref_overhead_factor_p0,
6136 p->dcc_dram_bw_pref_overhead_factor_p1,
6137 p->mall_prefetch_sdp_overhead_factor,
6138 p->mall_prefetch_dram_overhead_factor,
6139 p->surface_read_bandwidth_l,
6140 p->surface_read_bandwidth_c,
6141 l->zero_array, //PrefetchBandwidthLuma,
6142 l->zero_array, //PrefetchBandwidthChroma,
6143 l->zero_array, //PrefetchBWMax
6144 l->zero_array,
6145 l->zero_array,
6146 l->zero_array,
6147 p->dpte_row_bw,
6148 p->meta_row_bw,
6149 l->zero_array, //prefetch_cursor_bw,
6150 l->zero_array, //prefetch_vmrow_bw,
6151 l->zero_array, //flip_bw,
6152 l->zero_array,
6153 l->zero_array,
6154 l->zero_array,
6155 l->zero_array,
6156 l->zero_array,
6157 l->zero_array,
6158 p->surface_avg_vactive_required_bw[m][n],
6159 p->surface_peak_required_bw[m][n]);
6160
6161 p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6162 &s->get_urgent_bandwidth_required_locals,
6163 p->display_cfg,
6164 m,
6165 n,
6166 0, //inc_flip_bw,
6167 0, //use_qual_row_bw
6168 p->num_active_planes,
6169 p->num_of_dpp,
6170 p->dcc_dram_bw_nom_overhead_factor_p0,
6171 p->dcc_dram_bw_nom_overhead_factor_p1,
6172 p->dcc_dram_bw_pref_overhead_factor_p0,
6173 p->dcc_dram_bw_pref_overhead_factor_p1,
6174 p->mall_prefetch_sdp_overhead_factor,
6175 p->mall_prefetch_dram_overhead_factor,
6176 p->surface_read_bandwidth_l,
6177 p->surface_read_bandwidth_c,
6178 l->zero_array, //PrefetchBandwidthLuma,
6179 l->zero_array, //PrefetchBandwidthChroma,
6180 l->zero_array, //PrefetchBWMax
6181 p->excess_vactive_fill_bw_l,
6182 p->excess_vactive_fill_bw_c,
6183 p->cursor_bw,
6184 p->dpte_row_bw,
6185 p->meta_row_bw,
6186 l->zero_array, //prefetch_cursor_bw,
6187 l->zero_array, //prefetch_vmrow_bw,
6188 l->zero_array, //flip_bw,
6189 p->urgent_burst_factor_l,
6190 p->urgent_burst_factor_c,
6191 p->urgent_burst_factor_cursor,
6192 p->urgent_burst_factor_prefetch_l,
6193 p->urgent_burst_factor_prefetch_c,
6194 p->urgent_burst_factor_prefetch_cursor,
6195 l->surface_dummy_bw,
6196 p->surface_peak_required_bw[m][n]);
6197
6198 p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6199 &s->get_urgent_bandwidth_required_locals,
6200 p->display_cfg,
6201 m,
6202 n,
6203 p->inc_flip_bw,
6204 0, //use_qual_row_bw
6205 p->num_active_planes,
6206 p->num_of_dpp,
6207 p->dcc_dram_bw_nom_overhead_factor_p0,
6208 p->dcc_dram_bw_nom_overhead_factor_p1,
6209 p->dcc_dram_bw_pref_overhead_factor_p0,
6210 p->dcc_dram_bw_pref_overhead_factor_p1,
6211 p->mall_prefetch_sdp_overhead_factor,
6212 p->mall_prefetch_dram_overhead_factor,
6213 p->surface_read_bandwidth_l,
6214 p->surface_read_bandwidth_c,
6215 p->prefetch_bandwidth_l,
6216 p->prefetch_bandwidth_c,
6217 p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
6218 p->excess_vactive_fill_bw_l,
6219 p->excess_vactive_fill_bw_c,
6220 p->cursor_bw,
6221 p->dpte_row_bw,
6222 p->meta_row_bw,
6223 p->prefetch_cursor_bw,
6224 p->prefetch_vmrow_bw,
6225 p->flip_bw,
6226 p->urgent_burst_factor_l,
6227 p->urgent_burst_factor_c,
6228 p->urgent_burst_factor_cursor,
6229 p->urgent_burst_factor_prefetch_l,
6230 p->urgent_burst_factor_prefetch_c,
6231 p->urgent_burst_factor_prefetch_cursor,
6232 l->surface_dummy_bw,
6233 p->surface_peak_required_bw[m][n]);
6234
6235 p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
6236 &s->get_urgent_bandwidth_required_locals,
6237 p->display_cfg,
6238 m,
6239 n,
6240 0, //inc_flip_bw
6241 1, //use_qual_row_bw
6242 p->num_active_planes,
6243 p->num_of_dpp,
6244 p->dcc_dram_bw_nom_overhead_factor_p0,
6245 p->dcc_dram_bw_nom_overhead_factor_p1,
6246 p->dcc_dram_bw_pref_overhead_factor_p0,
6247 p->dcc_dram_bw_pref_overhead_factor_p1,
6248 p->mall_prefetch_sdp_overhead_factor,
6249 p->mall_prefetch_dram_overhead_factor,
6250 p->surface_read_bandwidth_l,
6251 p->surface_read_bandwidth_c,
6252 p->prefetch_bandwidth_l,
6253 p->prefetch_bandwidth_c,
6254 p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw
6255 p->excess_vactive_fill_bw_l,
6256 p->excess_vactive_fill_bw_c,
6257 p->cursor_bw,
6258 p->dpte_row_bw,
6259 p->meta_row_bw,
6260 p->prefetch_cursor_bw,
6261 p->prefetch_vmrow_bw,
6262 p->flip_bw,
6263 p->urgent_burst_factor_l,
6264 p->urgent_burst_factor_c,
6265 p->urgent_burst_factor_cursor,
6266 p->urgent_burst_factor_prefetch_l,
6267 p->urgent_burst_factor_prefetch_c,
6268 p->urgent_burst_factor_prefetch_cursor,
6269 l->surface_dummy_bw,
6270 p->surface_peak_required_bw[m][n]);
6271
6272 p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6273 &s->get_urgent_bandwidth_required_locals,
6274 p->display_cfg,
6275 m,
6276 n,
6277 p->inc_flip_bw,
6278 0, //use_qual_row_bw
6279 p->num_active_planes,
6280 p->num_of_dpp,
6281 p->dcc_dram_bw_nom_overhead_factor_p0,
6282 p->dcc_dram_bw_nom_overhead_factor_p1,
6283 p->dcc_dram_bw_pref_overhead_factor_p0,
6284 p->dcc_dram_bw_pref_overhead_factor_p1,
6285 p->mall_prefetch_sdp_overhead_factor,
6286 p->mall_prefetch_dram_overhead_factor,
6287 p->surface_read_bandwidth_l,
6288 p->surface_read_bandwidth_c,
6289 p->prefetch_bandwidth_l,
6290 p->prefetch_bandwidth_c,
6291 p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
6292 p->excess_vactive_fill_bw_l,
6293 p->excess_vactive_fill_bw_c,
6294 p->cursor_bw,
6295 p->dpte_row_bw,
6296 p->meta_row_bw,
6297 p->prefetch_cursor_bw,
6298 p->prefetch_vmrow_bw,
6299 p->flip_bw,
6300 l->unity_array,
6301 l->unity_array,
6302 l->unity_array,
6303 l->unity_array,
6304 l->unity_array,
6305 l->unity_array,
6306 l->surface_dummy_bw,
6307 p->surface_peak_required_bw[m][n]);
6308
6309 #ifdef __DML_VBA_DEBUG__
6310 DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
6311 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6312 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6313 DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
6314 #endif
6315 DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
6316 }
6317 }
6318 }
6319
check_urgent_bandwidth_support(double * frac_urg_bandwidth_nom,double * frac_urg_bandwidth_mall,bool * vactive_bandwidth_support_ok,bool * bandwidth_support_ok,unsigned int mall_allocated_for_dcn_mbytes,double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6320 static void check_urgent_bandwidth_support(
6321 double *frac_urg_bandwidth_nom,
6322 double *frac_urg_bandwidth_mall,
6323 bool *vactive_bandwidth_support_ok, // vactive ok
6324 bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
6325
6326 unsigned int mall_allocated_for_dcn_mbytes,
6327 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6328 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6329 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6330 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6331 {
6332 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6333 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6334 double frac_urg_bandwidth_mall_sdp;
6335 double frac_urg_bandwidth_mall_dram;
6336 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
6337 frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6338 else
6339 frac_urg_bandwidth_mall_sdp = 0.0;
6340 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
6341 frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6342 else
6343 frac_urg_bandwidth_mall_dram = 0.0;
6344
6345 *bandwidth_support_ok = 1;
6346 *vactive_bandwidth_support_ok = 1;
6347
6348 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
6349 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
6350 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
6351 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
6352
6353 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6354 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6355
6356 if (mall_allocated_for_dcn_mbytes > 0) {
6357 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6358 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6359 }
6360
6361 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
6362 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
6363
6364 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
6365
6366 if (mall_allocated_for_dcn_mbytes > 0)
6367 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
6368
6369 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6370 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6371 if (mall_allocated_for_dcn_mbytes > 0) {
6372 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6373 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6374 }
6375
6376 #ifdef __DML_VBA_DEBUG__
6377 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
6378 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
6379 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
6380
6381 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
6382 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
6383 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
6384 DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
6385
6386 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6387 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6388 DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6389 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6390 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
6391 }
6392 }
6393 #endif
6394 }
6395
get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6396 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
6397 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
6398 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6399 {
6400 double flip_bw_available_mbps;
6401 double flip_bw_available_sdp_mbps;
6402 double flip_bw_available_dram_mbps;
6403
6404 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
6405 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
6406 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
6407
6408 #ifdef __DML_VBA_DEBUG__
6409 DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6410 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
6411 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
6412 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
6413 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
6414 DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
6415 DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
6416 DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
6417 #endif
6418
6419 return flip_bw_available_mbps;
6420 }
6421
calculate_immediate_flip_bandwidth_support(double * frac_urg_bandwidth_flip,bool * flip_bandwidth_support_ok,enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6422 static void calculate_immediate_flip_bandwidth_support(
6423 // Output
6424 double *frac_urg_bandwidth_flip,
6425 bool *flip_bandwidth_support_ok,
6426
6427 // Input
6428 enum dml2_core_internal_soc_state_type eval_state,
6429 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6430 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6431 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6432 {
6433 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
6434 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
6435
6436 *flip_bandwidth_support_ok = true;
6437 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
6438 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
6439
6440 #ifdef __DML_VBA_DEBUG__
6441 DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
6442 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
6443 DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
6444 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
6445 DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6446 #endif
6447 DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
6448 }
6449
6450 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
6451 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
6452
6453 #ifdef __DML_VBA_DEBUG__
6454 DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6455 DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
6456 DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
6457 DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
6458 DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6459
6460 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6461 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6462 DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6463 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6464 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
6465 }
6466 }
6467 #endif
6468 }
6469
CalculateFlipSchedule(struct dml2_core_internal_scratch * s,bool iflip_enable,bool use_lb_flip_bw,double HostVMInefficiencyFactor,double Tvm_trips_flip,double Tr0_trips_flip,double Tvm_trips_flip_rounded,double Tr0_trips_flip_rounded,bool GPUVMEnable,double vm_bytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum dml2_source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw_flip,unsigned int dpte_row_height,unsigned int dpte_row_height_chroma,bool use_one_row_for_frame_flip,unsigned int max_flip_time_us,unsigned int max_flip_time_lines,unsigned int per_pipe_flip_bytes,unsigned int meta_row_bytes,unsigned int meta_row_height,unsigned int meta_row_height_chroma,bool dcc_mrq_enable,double * dst_y_per_vm_flip,double * dst_y_per_row_flip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)6470 static void CalculateFlipSchedule(
6471 struct dml2_core_internal_scratch *s,
6472 bool iflip_enable,
6473 bool use_lb_flip_bw,
6474 double HostVMInefficiencyFactor,
6475 double Tvm_trips_flip,
6476 double Tr0_trips_flip,
6477 double Tvm_trips_flip_rounded,
6478 double Tr0_trips_flip_rounded,
6479 bool GPUVMEnable,
6480 double vm_bytes, // vm_bytes
6481 double DPTEBytesPerRow, // dpte_row_bytes
6482 double BandwidthAvailableForImmediateFlip,
6483 unsigned int TotImmediateFlipBytes,
6484 enum dml2_source_format_class SourcePixelFormat,
6485 double LineTime,
6486 double VRatio,
6487 double VRatioChroma,
6488 double Tno_bw_flip,
6489 unsigned int dpte_row_height,
6490 unsigned int dpte_row_height_chroma,
6491 bool use_one_row_for_frame_flip,
6492 unsigned int max_flip_time_us,
6493 unsigned int max_flip_time_lines,
6494 unsigned int per_pipe_flip_bytes,
6495 unsigned int meta_row_bytes,
6496 unsigned int meta_row_height,
6497 unsigned int meta_row_height_chroma,
6498 bool dcc_mrq_enable,
6499
6500 // Output
6501 double *dst_y_per_vm_flip,
6502 double *dst_y_per_row_flip,
6503 double *final_flip_bw,
6504 bool *ImmediateFlipSupportedForPipe)
6505 {
6506 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
6507
6508 l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
6509 l->dpte_row_bytes = DPTEBytesPerRow;
6510
6511 #ifdef __DML_VBA_DEBUG__
6512 DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
6513 DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
6514 DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
6515 DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6516 DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
6517 DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
6518 DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
6519 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
6520 DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
6521 DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
6522 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
6523 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
6524 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
6525 DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
6526 DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
6527 DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
6528 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
6529 DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
6530 DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
6531 DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
6532 DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
6533 #endif
6534
6535 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
6536 if (l->dual_plane) {
6537 if (dcc_mrq_enable & GPUVMEnable) {
6538 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6539 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
6540 } else if (GPUVMEnable) {
6541 l->min_row_height = dpte_row_height;
6542 l->min_row_height_chroma = dpte_row_height_chroma;
6543 } else {
6544 l->min_row_height = meta_row_height;
6545 l->min_row_height_chroma = meta_row_height_chroma;
6546 }
6547 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
6548 } else {
6549 if (dcc_mrq_enable & GPUVMEnable)
6550 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6551 else if (GPUVMEnable)
6552 l->min_row_height = dpte_row_height;
6553 else
6554 l->min_row_height = meta_row_height;
6555
6556 l->min_row_time = l->min_row_height * LineTime / VRatio;
6557 }
6558 #ifdef __DML_VBA_DEBUG__
6559 DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
6560 #endif
6561 DML_ASSERT(l->min_row_time > 0);
6562
6563 if (use_lb_flip_bw) {
6564 // For mode check, calculation the flip bw requirement with worst case flip time
6565 l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
6566 math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
6567
6568 //The lower bound on flip bandwidth
6569 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
6570 l->lb_flip_bw = 0;
6571
6572 if (iflip_enable) {
6573 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
6574 l->num_rows = 2;
6575 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
6576 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
6577 l->lb_flip_bw = math_max3(
6578 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
6579 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
6580 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6581 #ifdef __DML_VBA_DEBUG__
6582 DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
6583 DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
6584 DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
6585 DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
6586 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
6587 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
6588 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6589
6590 if (l->lb_flip_bw > 0) {
6591 DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
6592 DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
6593 DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
6594 DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
6595 DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
6596 }
6597 #endif
6598 l->lb_flip_bw = math_max3(l->lb_flip_bw,
6599 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
6600 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6601
6602 #ifdef __DML_VBA_DEBUG__
6603 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
6604 DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6605 #endif
6606 }
6607
6608 *final_flip_bw = l->lb_flip_bw;
6609
6610 *dst_y_per_vm_flip = 1; // not used
6611 *dst_y_per_row_flip = 1; // not used
6612 *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
6613 } else {
6614 if (iflip_enable) {
6615 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
6616
6617 #ifdef __DML_VBA_DEBUG__
6618 DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
6619 DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6620 DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
6621 DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes);
6622 #endif
6623 if (l->ImmediateFlipBW == 0) {
6624 l->Tvm_flip = 0;
6625 l->Tr0_flip = 0;
6626 } else {
6627 l->Tvm_flip = math_max3(Tvm_trips_flip,
6628 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
6629 LineTime / 4.0);
6630
6631 l->Tr0_flip = math_max3(Tr0_trips_flip,
6632 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
6633 LineTime / 4.0);
6634 }
6635 #ifdef __DML_VBA_DEBUG__
6636 DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
6637 DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
6638
6639 DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
6640 DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
6641 #endif
6642 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
6643 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
6644
6645 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
6646 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
6647
6648 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
6649 *ImmediateFlipSupportedForPipe = false;
6650 } else {
6651 *ImmediateFlipSupportedForPipe = iflip_enable;
6652 }
6653 } else {
6654 l->Tvm_flip = 0;
6655 l->Tr0_flip = 0;
6656 *dst_y_per_vm_flip = 0;
6657 *dst_y_per_row_flip = 0;
6658 *final_flip_bw = 0;
6659 *ImmediateFlipSupportedForPipe = iflip_enable;
6660 }
6661 }
6662 } else {
6663 l->Tvm_flip = 0;
6664 l->Tr0_flip = 0;
6665 *dst_y_per_vm_flip = 0;
6666 *dst_y_per_row_flip = 0;
6667 *final_flip_bw = 0;
6668 *ImmediateFlipSupportedForPipe = iflip_enable;
6669 }
6670
6671 #ifdef __DML_VBA_DEBUG__
6672 if (!use_lb_flip_bw) {
6673 DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
6674 DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
6675 DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
6676 DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
6677 DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
6678 }
6679 DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
6680 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
6681 #endif
6682 }
6683
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params * p)6684 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
6685 struct dml2_core_internal_scratch *scratch,
6686 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
6687 {
6688 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
6689
6690 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
6691 double reserved_vblank_time_us;
6692 bool FoundCriticalSurface = false;
6693
6694 s->TotalActiveWriteback = 0;
6695 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
6696
6697 #ifdef __DML_VBA_DEBUG__
6698 DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6699 #endif
6700
6701 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
6702 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
6703 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
6704 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6705 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6706 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6707 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6708 if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
6709 p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6710 p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6711 p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6712 p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6713 }
6714 p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
6715
6716 #ifdef __DML_VBA_DEBUG__
6717 DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
6718 DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
6719 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
6720 DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
6721 DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
6722 DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6723 DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
6724 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
6725 DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
6726 DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
6727 DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
6728 DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
6729 DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
6730 DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
6731 #endif
6732
6733 s->TotalActiveWriteback = 0;
6734 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6735 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6736 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
6737 }
6738 }
6739
6740 if (s->TotalActiveWriteback <= 1) {
6741 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
6742 } else {
6743 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6744 }
6745 if (p->USRRetrainingRequired)
6746 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
6747
6748 if (s->TotalActiveWriteback <= 1) {
6749 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
6750 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
6751 } else {
6752 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6753 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
6754 }
6755
6756 if (p->USRRetrainingRequired)
6757 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6758
6759 if (p->USRRetrainingRequired)
6760 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6761
6762 #ifdef __DML_VBA_DEBUG__
6763 DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
6764 DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
6765 DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
6766 DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
6767 DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
6768 #endif
6769
6770 s->TotalPixelBW = 0.0;
6771 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6772 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6773 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6774 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6775 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6776 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
6777 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
6778 }
6779
6780 *p->global_fclk_change_supported = true;
6781 *p->global_dram_clock_change_supported = true;
6782
6783 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6784 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6785 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6786 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6787 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6788 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
6789 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
6790 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
6791 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
6792 double LBBitPerPixel = 57;
6793
6794 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
6795 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
6796
6797 #ifdef __DML_VBA_DEBUG__
6798 DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
6799 DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
6800 DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel);
6801 DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
6802 DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
6803 #endif
6804
6805 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
6806 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
6807
6808 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
6809 if (p->UnboundedRequestEnabled) {
6810 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
6811 }
6812
6813 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
6814 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
6815 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
6816
6817 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
6818
6819 if (p->NumberOfActiveSurfaces > 1) {
6820 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
6821 }
6822
6823 if (p->BytePerPixelDETC[k] > 0) {
6824 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
6825 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
6826 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
6827 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
6828 if (p->NumberOfActiveSurfaces > 1) {
6829 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
6830 }
6831 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
6832 } else {
6833 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
6834 }
6835
6836 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
6837 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
6838 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
6839 s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
6840
6841 if (p->VActiveLatencyHidingMargin)
6842 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
6843
6844 if (p->VActiveLatencyHidingUs)
6845 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
6846
6847 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6848 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
6849 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
6850 * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
6851 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
6852 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
6853 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
6854 }
6855 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
6856
6857 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
6858
6859 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
6860 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
6861 }
6862 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
6863
6864 uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
6865 reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
6866
6867 p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
6868 if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
6869 p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
6870 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
6871 p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
6872
6873 if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
6874 *p->global_fclk_change_supported = false;
6875
6876 p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
6877 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
6878 if (p->display_cfg->overrides.all_streams_blanked ||
6879 (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
6880 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
6881 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6882 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6883 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6884 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6885 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6886 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6887 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6888 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6889 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
6890 p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
6891 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
6892 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
6893 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
6894 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
6895
6896 if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
6897 *p->global_dram_clock_change_supported = false;
6898
6899 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
6900 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
6901 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
6902 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
6903
6904 #ifdef __DML_VBA_DEBUG__
6905 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
6906 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
6907 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
6908 DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
6909 DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
6910 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
6911 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
6912 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
6913 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
6914 DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
6915 #endif
6916 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
6917
6918 if (p->BytePerPixelDETC[k] > 0) {
6919 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
6920 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
6921 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
6922
6923 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
6924 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
6925 else
6926 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
6927
6928 #ifdef __DML_VBA_DEBUG__
6929 DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
6930 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
6931 DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
6932 DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
6933 #endif
6934 }
6935 }
6936
6937 *p->g6_temp_read_support = true;
6938 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6939 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
6940 (s->g6_temp_read_latency_margin[k] < 0)) {
6941 *p->g6_temp_read_support = false;
6942 }
6943 }
6944
6945 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6946 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
6947 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
6948 FoundCriticalSurface = true;
6949 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
6950 }
6951 }
6952
6953 #ifdef __DML_VBA_DEBUG__
6954 DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
6955 DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
6956 DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
6957 DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
6958 #endif
6959 }
6960
calculate_bytes_to_fetch_required_to_hide_latency(struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params * p)6961 static void calculate_bytes_to_fetch_required_to_hide_latency(
6962 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
6963 {
6964 unsigned int dst_lines_to_hide;
6965 unsigned int src_lines_to_hide_l;
6966 unsigned int src_lines_to_hide_c;
6967 unsigned int plane_index;
6968 unsigned int stream_index;
6969
6970 for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
6971 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
6972 continue;
6973
6974 stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
6975
6976 dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us[0] /
6977 ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
6978 (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
6979
6980 src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
6981 p->swath_height_l[plane_index]);
6982 p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
6983
6984 src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
6985 p->swath_height_c[plane_index]);
6986 p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
6987
6988 if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
6989 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
6990 if (p->meta_row_height_c[plane_index]) {
6991 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
6992 }
6993 }
6994
6995 if (p->display_cfg->gpuvm_enable == true) {
6996 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
6997 if (p->dpte_row_height_c[plane_index]) {
6998 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
6999 }
7000 }
7001 }
7002 }
7003
calculate_vactive_det_fill_latency(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double surface_read_bw_l[],double surface_read_bw_c[],double (* surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double (* surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double vactive_det_fill_delay_us[])7004 static noinline_for_stack void calculate_vactive_det_fill_latency(
7005 const struct dml2_display_cfg *display_cfg,
7006 unsigned int num_active_planes,
7007 unsigned int bytes_required_l[],
7008 unsigned int bytes_required_c[],
7009 double dcc_dram_bw_nom_overhead_factor_p0[],
7010 double dcc_dram_bw_nom_overhead_factor_p1[],
7011 double surface_read_bw_l[],
7012 double surface_read_bw_c[],
7013 double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7014 double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7015 /* output */
7016 double vactive_det_fill_delay_us[])
7017 {
7018 double effective_excess_bandwidth;
7019 double effective_excess_bandwidth_l;
7020 double effective_excess_bandwidth_c;
7021 double adj_factor;
7022 unsigned int plane_index;
7023 unsigned int soc_state;
7024 unsigned int bw_type;
7025
7026 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7027 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7028 continue;
7029
7030 vactive_det_fill_delay_us[plane_index] = 0.0;
7031 for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
7032 for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
7033 effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
7034
7035 /* luma */
7036 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
7037
7038 effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7039 if (effective_excess_bandwidth_l > 0.0) {
7040 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
7041 }
7042
7043 /* chroma */
7044 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
7045
7046 effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7047 if (effective_excess_bandwidth_c > 0.0) {
7048 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
7049 }
7050 }
7051 }
7052 }
7053 }
7054
calculate_excess_vactive_bandwidth_required(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[])7055 static void calculate_excess_vactive_bandwidth_required(
7056 const struct dml2_display_cfg *display_cfg,
7057 unsigned int num_active_planes,
7058 unsigned int bytes_required_l[],
7059 unsigned int bytes_required_c[],
7060 /* outputs */
7061 double excess_vactive_fill_bw_l[],
7062 double excess_vactive_fill_bw_c[])
7063 {
7064 unsigned int plane_index;
7065
7066 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7067 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7068 continue;
7069
7070 excess_vactive_fill_bw_l[plane_index] = 0.0;
7071 excess_vactive_fill_bw_c[plane_index] = 0.0;
7072
7073 if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk] > 0) {
7074 excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk];
7075 excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us[dml2_pstate_type_uclk];
7076 }
7077 }
7078 }
7079
uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz,const struct dml2_dram_params * dram_config,const struct dml2_mcg_dram_bw_to_min_clk_table * dram_bw_table)7080 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config, const struct dml2_mcg_dram_bw_to_min_clk_table *dram_bw_table)
7081 {
7082 double bw_mbps = 0;
7083 unsigned int i;
7084
7085 if (!dram_config->alt_clock_bw_conversion)
7086 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7087 else
7088 for (i = 0; i < dram_bw_table->num_entries; i++)
7089 if (dram_bw_table->entries[i].min_uclk_khz >= uclk_khz) {
7090 bw_mbps = (double)dram_bw_table->entries[i].pre_derate_dram_bw_kbps / 1000.0;
7091 break;
7092 }
7093
7094 DML_ASSERT(bw_mbps > 0);
7095
7096 return bw_mbps;
7097 }
7098
dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps,const struct dml2_dram_params * dram_config)7099 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
7100 {
7101 double uclk_mhz = 0;
7102
7103 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7104
7105 return uclk_mhz;
7106 }
7107
get_qos_param_index(unsigned long uclk_freq_khz,const struct dml2_dcn4_uclk_dpm_dependent_qos_params * per_uclk_dpm_params)7108 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
7109 {
7110 unsigned int i;
7111 unsigned int index = 0;
7112
7113 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7114 DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
7115
7116 if (i == 0)
7117 index = 0;
7118 else
7119 index = i - 1;
7120
7121 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
7122 per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
7123 break;
7124 }
7125 }
7126 DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
7127 DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
7128 return index;
7129 }
7130
get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz,const struct dml2_soc_state_table * clk_table)7131 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
7132 {
7133 unsigned int i;
7134 bool clk_entry_found = false;
7135
7136 for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
7137 DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
7138
7139 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
7140 clk_entry_found = true;
7141 break;
7142 }
7143 }
7144
7145 if (!clk_entry_found)
7146 DML_ASSERT(clk_entry_found);
7147 #if defined(__DML_VBA_DEBUG__)
7148 DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
7149 DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
7150 #endif
7151 return i;
7152 }
7153
get_pipe_flip_bytes(double hostvm_inefficiency_factor,unsigned int vm_bytes,unsigned int dpte_row_bytes,unsigned int meta_row_bytes)7154 static unsigned int get_pipe_flip_bytes(
7155 double hostvm_inefficiency_factor,
7156 unsigned int vm_bytes,
7157 unsigned int dpte_row_bytes,
7158 unsigned int meta_row_bytes)
7159 {
7160 unsigned int flip_bytes = 0;
7161
7162 flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
7163 flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
7164
7165 return flip_bytes;
7166 }
7167
calculate_hostvm_inefficiency_factor(double * HostVMInefficiencyFactor,double * HostVMInefficiencyFactorPrefetch,bool gpuvm_enable,bool hostvm_enable,unsigned int remote_iommu_outstanding_translations,unsigned int max_outstanding_reqs,double urg_bandwidth_avail_active_pixel_and_vm,double urg_bandwidth_avail_active_vm_only)7168 static void calculate_hostvm_inefficiency_factor(
7169 double *HostVMInefficiencyFactor,
7170 double *HostVMInefficiencyFactorPrefetch,
7171
7172 bool gpuvm_enable,
7173 bool hostvm_enable,
7174 unsigned int remote_iommu_outstanding_translations,
7175 unsigned int max_outstanding_reqs,
7176 double urg_bandwidth_avail_active_pixel_and_vm,
7177 double urg_bandwidth_avail_active_vm_only)
7178 {
7179 *HostVMInefficiencyFactor = 1;
7180 *HostVMInefficiencyFactorPrefetch = 1;
7181
7182 if (gpuvm_enable && hostvm_enable) {
7183 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
7184 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
7185
7186 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
7187 *HostVMInefficiencyFactorPrefetch = 4;
7188 #ifdef __DML_VBA_DEBUG__
7189 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
7190 DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
7191 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
7192 DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
7193 #endif
7194 }
7195 }
7196
7197 struct dml2_core_internal_g6_temp_read_blackouts_table {
7198 struct {
7199 unsigned int uclk_khz;
7200 unsigned int blackout_us;
7201 } entries[DML_MAX_CLK_TABLE_SIZE];
7202 };
7203
7204 struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
7205 .entries = {
7206 {
7207 .uclk_khz = 96000,
7208 .blackout_us = 23,
7209 },
7210 {
7211 .uclk_khz = 435000,
7212 .blackout_us = 10,
7213 },
7214 {
7215 .uclk_khz = 521000,
7216 .blackout_us = 10,
7217 },
7218 {
7219 .uclk_khz = 731000,
7220 .blackout_us = 8,
7221 },
7222 {
7223 .uclk_khz = 822000,
7224 .blackout_us = 8,
7225 },
7226 {
7227 .uclk_khz = 962000,
7228 .blackout_us = 5,
7229 },
7230 {
7231 .uclk_khz = 1069000,
7232 .blackout_us = 5,
7233 },
7234 {
7235 .uclk_khz = 1187000,
7236 .blackout_us = 5,
7237 },
7238 },
7239 };
7240
get_g6_temp_read_blackout_us(struct dml2_soc_bb * soc,unsigned int uclk_freq_khz,unsigned int min_clk_index)7241 static double get_g6_temp_read_blackout_us(
7242 struct dml2_soc_bb *soc,
7243 unsigned int uclk_freq_khz,
7244 unsigned int min_clk_index)
7245 {
7246 unsigned int i;
7247 unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7248
7249 if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
7250 /* overrides are present in the SoC BB */
7251 return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
7252 }
7253
7254 /* use internal table */
7255 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7256
7257 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7258 if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
7259 core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
7260 break;
7261 }
7262
7263 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
7264 }
7265
7266 return (double)blackout_us;
7267 }
7268
get_max_urgent_latency_us(struct dml2_dcn4x_soc_qos_params * dcn4x,double uclk_freq_mhz,double FabricClock,unsigned int min_clk_index)7269 static double get_max_urgent_latency_us(
7270 struct dml2_dcn4x_soc_qos_params *dcn4x,
7271 double uclk_freq_mhz,
7272 double FabricClock,
7273 unsigned int min_clk_index)
7274 {
7275 double latency;
7276 latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
7277 * (1 + dcn4x->umc_max_latency_margin / 100.0)
7278 + dcn4x->mall_overhead_fclk_cycles / FabricClock
7279 + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
7280 * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
7281 return latency;
7282 }
7283
calculate_pstate_keepout_dst_lines(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_watermarks * watermarks,unsigned int pstate_keepout_dst_lines[])7284 static void calculate_pstate_keepout_dst_lines(
7285 const struct dml2_display_cfg *display_cfg,
7286 const struct dml2_core_internal_watermarks *watermarks,
7287 unsigned int pstate_keepout_dst_lines[])
7288 {
7289 const struct dml2_stream_parameters *stream_descriptor;
7290 unsigned int i;
7291
7292 for (i = 0; i < display_cfg->num_planes; i++) {
7293 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
7294 stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
7295
7296 pstate_keepout_dst_lines[i] =
7297 (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
7298
7299 if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
7300 pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
7301 }
7302 }
7303 }
7304 }
7305
dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib * mode_lib,const struct dml2_display_cfg * display_cfg)7306 static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib,
7307 const struct dml2_display_cfg *display_cfg)
7308 {
7309 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7310 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
7311 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
7312 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
7313 #ifdef DML_GLOBAL_PREFETCH_CHECK
7314 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
7315 #endif
7316 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
7317
7318 double min_return_bw_for_latency;
7319 unsigned int k;
7320
7321 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
7322
7323 calculate_hostvm_inefficiency_factor(
7324 &s->HostVMInefficiencyFactor,
7325 &s->HostVMInefficiencyFactorPrefetch,
7326
7327 display_cfg->gpuvm_enable,
7328 display_cfg->hostvm_enable,
7329 mode_lib->ip.remote_iommu_outstanding_translations,
7330 mode_lib->soc.max_outstanding_reqs,
7331 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
7332 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
7333
7334 mode_lib->ms.Total3dlutActive = 0;
7335 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7336 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
7337 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
7338
7339 // Calculate tdlut schedule related terms
7340 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
7341 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
7342 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
7343 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
7344 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
7345 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
7346 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
7347 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
7348 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
7349
7350 // output
7351 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
7352 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
7353 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
7354 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
7355 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
7356 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
7357 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
7358
7359 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
7360 }
7361
7362 min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
7363
7364 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
7365 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
7366 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
7367 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
7368
7369 CalculateExtraLatency(
7370 display_cfg,
7371 mode_lib->ip.rob_buffer_size_kbytes,
7372 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
7373 s->ReorderingBytes,
7374 mode_lib->ms.DCFCLK,
7375 mode_lib->ms.FabricClock,
7376 mode_lib->ip.pixel_chunk_size_kbytes,
7377 min_return_bw_for_latency,
7378 mode_lib->ms.num_active_planes,
7379 mode_lib->ms.NoOfDPP,
7380 mode_lib->ms.dpte_group_bytes,
7381 s->tdlut_bytes_per_group,
7382 s->HostVMInefficiencyFactor,
7383 s->HostVMInefficiencyFactorPrefetch,
7384 mode_lib->soc.hostvm_min_page_size_kbytes,
7385 mode_lib->soc.qos_parameters.qos_type,
7386 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
7387 mode_lib->soc.max_outstanding_reqs,
7388 mode_lib->ms.support.request_size_bytes_luma,
7389 mode_lib->ms.support.request_size_bytes_chroma,
7390 mode_lib->ip.meta_chunk_size_kbytes,
7391 mode_lib->ip.dchub_arb_to_ret_delay,
7392 mode_lib->ms.TripToMemory,
7393 mode_lib->ip.hostvm_mode,
7394
7395 // output
7396 &mode_lib->ms.ExtraLatency,
7397 &mode_lib->ms.ExtraLatency_sr,
7398 &mode_lib->ms.ExtraLatencyPrefetch);
7399
7400 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7401 s->impacted_dst_y_pre[k] = 0;
7402
7403 s->recalc_prefetch_schedule = 0;
7404 s->recalc_prefetch_done = 0;
7405 do {
7406 mode_lib->ms.support.PrefetchSupported = true;
7407
7408 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7409 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
7410 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
7411
7412 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
7413 mode_lib->ms.NoOfDPP[k],
7414 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
7415 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
7416 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
7417 display_cfg->plane_descriptors[k].composition.rotation_angle);
7418
7419 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
7420 mode_lib->ms.NoOfDPP[k],
7421 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
7422 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
7423 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
7424 display_cfg->plane_descriptors[k].composition.rotation_angle);
7425
7426 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
7427
7428 mode_lib->ms.TWait[k] = CalculateTWait(
7429 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
7430 mode_lib->ms.UrgLatency,
7431 mode_lib->ms.TripToMemory,
7432 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
7433 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0);
7434
7435 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
7436 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
7437 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
7438 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
7439 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
7440 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
7441 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7442 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
7443 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
7444 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
7445 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
7446 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
7447 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
7448 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
7449 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
7450 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
7451 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
7452 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
7453 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
7454 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
7455 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
7456 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
7457 myPipe->ODMMode = mode_lib->ms.ODMMode[k];
7458 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
7459 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
7460 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
7461 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
7462
7463 #ifdef __DML_VBA_DEBUG__
7464 DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
7465 DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
7466 #endif
7467 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
7468 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
7469 CalculatePrefetchSchedule_params->myPipe = myPipe;
7470 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
7471 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
7472 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
7473 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
7474 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
7475 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
7476 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
7477 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
7478 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
7479 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
7480 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
7481 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
7482 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
7483 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
7484 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
7485 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
7486 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
7487 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
7488 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
7489 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
7490 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
7491 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
7492 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
7493 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
7494 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
7495 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
7496 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
7497 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
7498 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
7499 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
7500 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
7501 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
7502 CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
7503 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
7504 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
7505 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
7506 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
7507 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
7508 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
7509 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
7510 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
7511 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
7512 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
7513 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
7514 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
7515 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
7516 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
7517 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
7518
7519 // output
7520 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
7521 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
7522 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
7523 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
7524 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
7525 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
7526 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
7527 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
7528 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
7529 CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k];
7530 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
7531 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
7532 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
7533 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
7534 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
7535 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
7536 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
7537 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
7538 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
7539 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
7540 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
7541 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
7542 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
7543 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
7544 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
7545 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
7546 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
7547 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
7548 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
7549 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
7550 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
7551
7552 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
7553
7554 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
7555 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
7556 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
7557 } // for k num_planes
7558
7559 CalculateDCFCLKDeepSleepTdlut(
7560 display_cfg,
7561 mode_lib->ms.num_active_planes,
7562 mode_lib->ms.BytePerPixelY,
7563 mode_lib->ms.BytePerPixelC,
7564 mode_lib->ms.SwathWidthY,
7565 mode_lib->ms.SwathWidthC,
7566 mode_lib->ms.NoOfDPP,
7567 mode_lib->ms.PSCL_FACTOR,
7568 mode_lib->ms.PSCL_FACTOR_CHROMA,
7569 mode_lib->ms.RequiredDPPCLK,
7570 mode_lib->ms.vactive_sw_bw_l,
7571 mode_lib->ms.vactive_sw_bw_c,
7572 mode_lib->soc.return_bus_width_bytes,
7573 mode_lib->ms.RequiredDISPCLK,
7574 s->tdlut_bytes_to_deliver,
7575 s->prefetch_swath_time_us,
7576
7577 /* Output */
7578 &mode_lib->ms.dcfclk_deepsleep);
7579
7580 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7581 if (mode_lib->ms.dst_y_prefetch[k] < 2.0
7582 || mode_lib->ms.LinesForVM[k] >= 32.0
7583 || mode_lib->ms.LinesForDPTERow[k] >= 16.0
7584 || mode_lib->ms.NoTimeForPrefetch[k] == true
7585 || s->DSTYAfterScaler[k] > 8) {
7586 mode_lib->ms.support.PrefetchSupported = false;
7587 DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
7588 DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
7589 DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
7590 DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
7591 DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
7592 }
7593 }
7594
7595 mode_lib->ms.support.DynamicMetadataSupported = true;
7596 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7597 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
7598 mode_lib->ms.support.DynamicMetadataSupported = false;
7599 }
7600 }
7601
7602 mode_lib->ms.support.VRatioInPrefetchSupported = true;
7603 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7604 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
7605 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
7606 mode_lib->ms.support.VRatioInPrefetchSupported = false;
7607 DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
7608 DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
7609 DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
7610 }
7611 }
7612
7613 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
7614
7615 // By default, do not recalc prefetch schedule
7616 s->recalc_prefetch_schedule = 0;
7617
7618 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
7619 if (mode_lib->ms.support.PrefetchSupported) {
7620 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7621 // Calculate Urgent burst factor for prefetch
7622 #ifdef __DML_VBA_DEBUG__
7623 DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
7624 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
7625 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
7626 #endif
7627 CalculateUrgentBurstFactor(
7628 &display_cfg->plane_descriptors[k],
7629 mode_lib->ms.swath_width_luma_ub[k],
7630 mode_lib->ms.swath_width_chroma_ub[k],
7631 mode_lib->ms.SwathHeightY[k],
7632 mode_lib->ms.SwathHeightC[k],
7633 s->line_times[k],
7634 mode_lib->ms.UrgLatency,
7635 mode_lib->ms.VRatioPreY[k],
7636 mode_lib->ms.VRatioPreC[k],
7637 mode_lib->ms.BytePerPixelInDETY[k],
7638 mode_lib->ms.BytePerPixelInDETC[k],
7639 mode_lib->ms.DETBufferSizeY[k],
7640 mode_lib->ms.DETBufferSizeC[k],
7641 /* Output */
7642 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
7643 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
7644 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
7645 }
7646
7647 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
7648 // assume flip bw is 0 at this point
7649 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7650 mode_lib->ms.final_flip_bw[k] = 0;
7651
7652 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
7653 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
7654 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
7655 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
7656 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
7657 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
7658
7659 calculate_peak_bandwidth_params->display_cfg = display_cfg;
7660 calculate_peak_bandwidth_params->inc_flip_bw = 0;
7661 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
7662 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
7663 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
7664 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
7665 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
7666 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
7667 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
7668 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
7669
7670 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
7671 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
7672 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
7673 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
7674 calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
7675 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
7676 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
7677 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
7678 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
7679 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
7680 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
7681 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
7682 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
7683 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
7684 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
7685 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
7686 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
7687 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
7688 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
7689
7690 calculate_peak_bandwidth_required(
7691 &mode_lib->scratch,
7692 calculate_peak_bandwidth_params);
7693
7694 // Check urg peak bandwidth against available urg bw
7695 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
7696 check_urgent_bandwidth_support(
7697 &s->dummy_single[0], // double* frac_urg_bandwidth
7698 &s->dummy_single[1], // double* frac_urg_bandwidth_mall
7699 &mode_lib->ms.support.UrgVactiveBandwidthSupport,
7700 &mode_lib->ms.support.PrefetchBandwidthSupported,
7701
7702 mode_lib->soc.mall_allocated_for_dcn_mbytes,
7703 mode_lib->ms.support.non_urg_bandwidth_required,
7704 mode_lib->ms.support.urg_vactive_bandwidth_required,
7705 mode_lib->ms.support.urg_bandwidth_required,
7706 mode_lib->ms.support.urg_bandwidth_available);
7707
7708 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
7709 DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
7710
7711 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7712 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
7713 mode_lib->ms.support.PrefetchSupported = false;
7714 DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
7715 }
7716 }
7717
7718 #ifdef DML_GLOBAL_PREFETCH_CHECK
7719 if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
7720 CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
7721 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
7722 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
7723 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
7724 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
7725 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
7726 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
7727 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
7728 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7729 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
7730 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
7731 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
7732 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
7733 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
7734 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
7735 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
7736 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
7737 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
7738 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
7739 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
7740 if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
7741 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
7742
7743 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
7744 ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
7745
7746 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
7747 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
7748 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
7749 mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
7750 s->recalc_prefetch_done = 1;
7751 s->recalc_prefetch_schedule = 1;
7752 }
7753 #endif
7754 } // prefetch schedule ok, do urg bw and flip schedule
7755 } while (s->recalc_prefetch_schedule);
7756
7757 // Flip Schedule
7758 // Both prefetch schedule and BW okay
7759 if (mode_lib->ms.support.PrefetchSupported == true) {
7760 mode_lib->ms.BandwidthAvailableForImmediateFlip =
7761 get_bandwidth_available_for_immediate_flip(
7762 dml2_core_internal_soc_state_sys_active,
7763 mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
7764 mode_lib->ms.support.urg_bandwidth_available);
7765
7766 mode_lib->ms.TotImmediateFlipBytes = 0;
7767 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7768 if (display_cfg->plane_descriptors[k].immediate_flip) {
7769 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
7770 s->HostVMInefficiencyFactor,
7771 mode_lib->ms.vm_bytes[k],
7772 mode_lib->ms.DPTEBytesPerRow[k],
7773 mode_lib->ms.meta_row_bytes[k]);
7774 } else {
7775 s->per_pipe_flip_bytes[k] = 0;
7776 }
7777 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
7778
7779 }
7780
7781 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7782 CalculateFlipSchedule(
7783 &mode_lib->scratch,
7784 display_cfg->plane_descriptors[k].immediate_flip,
7785 1, // use_lb_flip_bw
7786 s->HostVMInefficiencyFactor,
7787 s->Tvm_trips_flip[k],
7788 s->Tr0_trips_flip[k],
7789 s->Tvm_trips_flip_rounded[k],
7790 s->Tr0_trips_flip_rounded[k],
7791 display_cfg->gpuvm_enable,
7792 mode_lib->ms.vm_bytes[k],
7793 mode_lib->ms.DPTEBytesPerRow[k],
7794 mode_lib->ms.BandwidthAvailableForImmediateFlip,
7795 mode_lib->ms.TotImmediateFlipBytes,
7796 display_cfg->plane_descriptors[k].pixel_format,
7797 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
7798 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
7799 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
7800 mode_lib->ms.Tno_bw_flip[k],
7801 mode_lib->ms.dpte_row_height[k],
7802 mode_lib->ms.dpte_row_height_chroma[k],
7803 mode_lib->ms.use_one_row_for_frame_flip[k],
7804 mode_lib->ip.max_flip_time_us,
7805 mode_lib->ip.max_flip_time_lines,
7806 s->per_pipe_flip_bytes[k],
7807 mode_lib->ms.meta_row_bytes[k],
7808 s->meta_row_height_luma[k],
7809 s->meta_row_height_chroma[k],
7810 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
7811
7812 /* Output */
7813 &mode_lib->ms.dst_y_per_vm_flip[k],
7814 &mode_lib->ms.dst_y_per_row_flip[k],
7815 &mode_lib->ms.final_flip_bw[k],
7816 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
7817 }
7818
7819 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
7820 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
7821 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
7822 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
7823 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
7824 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
7825
7826 calculate_peak_bandwidth_params->display_cfg = display_cfg;
7827 calculate_peak_bandwidth_params->inc_flip_bw = 1;
7828 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
7829 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
7830 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
7831 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
7832 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
7833 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
7834 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
7835 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
7836
7837 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
7838 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
7839 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
7840 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
7841 calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
7842 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
7843 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
7844 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
7845 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
7846 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
7847 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
7848 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
7849 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
7850 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
7851 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
7852 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
7853 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
7854 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
7855 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
7856
7857 calculate_peak_bandwidth_required(
7858 &mode_lib->scratch,
7859 calculate_peak_bandwidth_params);
7860
7861 calculate_immediate_flip_bandwidth_support(
7862 &s->dummy_single[0], // double* frac_urg_bandwidth_flip
7863 &mode_lib->ms.support.ImmediateFlipSupport,
7864
7865 dml2_core_internal_soc_state_sys_active,
7866 mode_lib->ms.support.urg_bandwidth_required_flip,
7867 mode_lib->ms.support.non_urg_bandwidth_required_flip,
7868 mode_lib->ms.support.urg_bandwidth_available);
7869
7870 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7871 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
7872 mode_lib->ms.support.ImmediateFlipSupport = false;
7873 }
7874
7875 } else { // if prefetch not support, assume iflip is not supported too
7876 mode_lib->ms.support.ImmediateFlipSupport = false;
7877 }
7878
7879 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
7880 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
7881 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
7882 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
7883 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
7884 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
7885 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
7886 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
7887 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
7888 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
7889 s->mSOCParameters.USRRetrainingLatency = 0;
7890 s->mSOCParameters.SMNLatency = 0;
7891 s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx);
7892 s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx);
7893 s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
7894 s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
7895
7896 CalculateWatermarks_params->display_cfg = display_cfg;
7897 CalculateWatermarks_params->USRRetrainingRequired = false;
7898 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7899 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
7900 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
7901 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
7902 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
7903 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
7904 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
7905 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
7906 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
7907 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
7908 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
7909 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
7910 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
7911 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
7912 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
7913 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
7914 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
7915 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
7916 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
7917 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
7918 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
7919 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
7920 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
7921 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
7922 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
7923 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
7924 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
7925
7926 // Output
7927 CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
7928 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
7929 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
7930 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
7931 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
7932 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
7933 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
7934 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
7935 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
7936 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
7937 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
7938 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
7939
7940 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
7941
7942 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
7943 DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__);
7944
7945 }
7946
7947
dml_core_mode_support(struct dml2_core_calcs_mode_support_ex * in_out_params)7948 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
7949 {
7950 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
7951 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
7952 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
7953
7954 double outstanding_latency_us = 0;
7955
7956 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7957 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
7958 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
7959 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
7960 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
7961 unsigned int k, m, n;
7962
7963 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
7964 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
7965
7966 mode_lib->ms.num_active_planes = display_cfg->num_planes;
7967 get_stream_output_bpp(s->OutputBpp, display_cfg);
7968
7969 mode_lib->ms.state_idx = in_out_params->min_clk_index;
7970 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
7971 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
7972 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
7973 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
7974 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
7975 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000;
7976 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
7977 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000;
7978 mode_lib->ms.uclk_freq_mhz = (double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz / 1000.0;
7979 if (!mode_lib->ms.uclk_freq_mhz)
7980 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
7981 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
7982 mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
7983 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
7984 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
7985
7986 #if defined(__DML_VBA_DEBUG__)
7987 DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
7988 DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
7989 DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
7990 DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
7991 DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
7992 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
7993 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
7994 DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
7995 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
7996 DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
7997 DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
7998 DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
7999 DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
8000 DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
8001 DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
8002 DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
8003
8004 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8005 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
8006 #endif
8007
8008 CalculateMaxDETAndMinCompressedBufferSize(
8009 mode_lib->ip.config_return_buffer_size_in_kbytes,
8010 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
8011 mode_lib->ip.rob_buffer_size_kbytes,
8012 mode_lib->ip.max_num_dpp,
8013 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
8014 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
8015 mode_lib->ip.dcn_mrq_present,
8016
8017 /* Output */
8018 &mode_lib->ms.MaxTotalDETInKByte,
8019 &mode_lib->ms.NomDETInKByte,
8020 &mode_lib->ms.MinCompressedBufferSizeInKByte);
8021
8022 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
8023
8024 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
8025
8026 /*Scale Ratio, taps Support Check*/
8027 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
8028 // Many core tests are still setting scaling parameters "incorrectly"
8029 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8030 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
8031 && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
8032 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
8033 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
8034 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
8035 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
8036 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
8037 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
8038 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
8039 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
8040 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
8041 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
8042 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
8043 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
8044 || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
8045 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
8046 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
8047 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
8048 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
8049 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
8050 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
8051 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
8052 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
8053 }
8054 }
8055
8056 /*Source Format, Pixel Format and Scan Support Check*/
8057 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
8058 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8059 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
8060 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
8061 }
8062 }
8063
8064 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8065 CalculateBytePerPixelAndBlockSizes(
8066 display_cfg->plane_descriptors[k].pixel_format,
8067 display_cfg->plane_descriptors[k].surface.tiling,
8068 display_cfg->plane_descriptors[k].surface.plane0.pitch,
8069 display_cfg->plane_descriptors[k].surface.plane1.pitch,
8070
8071 /* Output */
8072 &mode_lib->ms.BytePerPixelY[k],
8073 &mode_lib->ms.BytePerPixelC[k],
8074 &mode_lib->ms.BytePerPixelInDETY[k],
8075 &mode_lib->ms.BytePerPixelInDETC[k],
8076 &mode_lib->ms.Read256BlockHeightY[k],
8077 &mode_lib->ms.Read256BlockHeightC[k],
8078 &mode_lib->ms.Read256BlockWidthY[k],
8079 &mode_lib->ms.Read256BlockWidthC[k],
8080 &mode_lib->ms.MacroTileHeightY[k],
8081 &mode_lib->ms.MacroTileHeightC[k],
8082 &mode_lib->ms.MacroTileWidthY[k],
8083 &mode_lib->ms.MacroTileWidthC[k],
8084 &mode_lib->ms.surf_linear128_l[k],
8085 &mode_lib->ms.surf_linear128_c[k]);
8086 }
8087
8088 /*Bandwidth Support Check*/
8089 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8090 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
8091 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
8092 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
8093 } else {
8094 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8095 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8096 }
8097 }
8098
8099 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8100 mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8101 mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8102
8103 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
8104 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
8105
8106 #ifdef __DML_VBA_DEBUG__
8107 DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
8108 DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0);
8109 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
8110 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
8111 #endif
8112 }
8113
8114 // Writeback bandwidth
8115 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8116 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
8117 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
8118 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
8119 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
8120 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
8121 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
8122 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8123 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
8124 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
8125 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
8126 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
8127 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
8128 } else {
8129 mode_lib->ms.WriteBandwidth[k][0] = 0.0;
8130 }
8131 }
8132
8133 /*Writeback Latency support check*/
8134 mode_lib->ms.support.WritebackLatencySupport = true;
8135 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8136 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
8137 (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
8138 mode_lib->ms.support.WritebackLatencySupport = false;
8139 }
8140 }
8141
8142
8143 /* Writeback Scale Ratio and Taps Support Check */
8144 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
8145 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8146 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8147 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
8148 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
8149 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
8150 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
8151 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
8152 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
8153 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
8154 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
8155 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
8156 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
8157 }
8158 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
8159 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
8160 }
8161 }
8162 }
8163
8164 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8165 CalculateSinglePipeDPPCLKAndSCLThroughput(
8166 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
8167 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
8168 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
8169 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
8170 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
8171 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
8172 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8173 display_cfg->plane_descriptors[k].pixel_format,
8174 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
8175 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
8176 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
8177 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
8178 /* Output */
8179 &mode_lib->ms.PSCL_FACTOR[k],
8180 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
8181 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
8182 }
8183
8184 // Max Viewport Size support
8185 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8186 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
8187 s->MaximumSwathWidthSupportLuma = 15360;
8188 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
8189 s->MaximumSwathWidthSupportLuma = 7680 + 16;
8190 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
8191 s->MaximumSwathWidthSupportLuma = 4320 + 16;
8192 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
8193 s->MaximumSwathWidthSupportLuma = 5120 + 16;
8194 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
8195 s->MaximumSwathWidthSupportLuma = 3072 + 16;
8196 } else {
8197 s->MaximumSwathWidthSupportLuma = 6144 + 16;
8198 }
8199
8200 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
8201 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
8202 } else {
8203 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
8204 }
8205
8206 unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
8207 unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
8208
8209 /*
8210 #if defined(DV_BUILD)
8211 // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
8212 if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
8213 lb_buffer_size_bits_luma = 34620 * 57;
8214 lb_buffer_size_bits_chroma = 13560 * 57;
8215 }
8216 #endif
8217 */
8218 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
8219 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
8220 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
8221 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
8222 } else {
8223 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
8224 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
8225 }
8226
8227 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
8228 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
8229
8230 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
8231 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
8232 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
8233
8234 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
8235 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
8236 DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
8237 }
8238
8239 /* Cursor Support Check */
8240 mode_lib->ms.support.CursorSupport = true;
8241 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8242 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
8243 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
8244 mode_lib->ms.support.CursorSupport = false;
8245 }
8246 }
8247
8248 /* Valid Pitch Check */
8249 mode_lib->ms.support.PitchSupport = true;
8250 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8251
8252 // data pitch
8253 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
8254
8255 if (mode_lib->ms.surf_linear128_l[k])
8256 alignment_l = alignment_l / 2;
8257
8258 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
8259 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
8260 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
8261
8262 if (mode_lib->ms.surf_linear128_c[k])
8263 alignment_c = alignment_c / 2;
8264 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
8265 } else {
8266 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8267 }
8268
8269 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
8270 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
8271 mode_lib->ms.support.PitchSupport = false;
8272 #if defined(__DML_VBA_DEBUG__)
8273 DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
8274 DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
8275 DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
8276 DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
8277 DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
8278 #endif
8279 }
8280
8281 // meta pitch
8282 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
8283 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
8284 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
8285
8286 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
8287 mode_lib->ms.support.PitchSupport = false;
8288
8289 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
8290 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
8291 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
8292
8293 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
8294 mode_lib->ms.support.PitchSupport = false;
8295 }
8296 } else {
8297 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
8298 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
8299 }
8300 }
8301
8302 mode_lib->ms.support.ViewportExceedsSurface = false;
8303 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
8304 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8305 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
8306 display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
8307 mode_lib->ms.support.ViewportExceedsSurface = true;
8308 #if defined(__DML_VBA_DEBUG__)
8309 DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
8310 DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
8311 DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
8312 DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
8313 DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
8314 #endif
8315 }
8316 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
8317 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
8318 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
8319 mode_lib->ms.support.ViewportExceedsSurface = true;
8320 }
8321 }
8322 }
8323 }
8324
8325 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
8326 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
8327 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
8328 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
8329 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
8330 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
8331 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
8332 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
8333 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
8334 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8335 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
8336 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
8337 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
8338 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
8339 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
8340 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
8341 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
8342 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
8343 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
8344 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
8345 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
8346 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
8347 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
8348 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
8349 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
8350 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
8351 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
8352 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
8353 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
8354 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8355
8356 // output
8357 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
8358 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
8359 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
8360 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
8361 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
8362 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
8363 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
8364 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
8365 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
8366 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
8367 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
8368 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
8369 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
8370 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
8371 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
8372 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
8373 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
8374 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
8375 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
8376 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
8377 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
8378
8379 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
8380 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
8381
8382 mode_lib->ms.TotalNumberOfActiveDPP = 0;
8383 mode_lib->ms.TotalNumberOfActiveOPP = 0;
8384 mode_lib->ms.support.TotalAvailablePipesSupport = true;
8385
8386 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8387 /*Number Of DSC Slices*/
8388 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
8389 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
8390
8391 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
8392 mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
8393 else {
8394 if (s->PixelClockBackEnd[k] > 4800) {
8395 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
8396 } else if (s->PixelClockBackEnd[k] > 2400) {
8397 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
8398 } else if (s->PixelClockBackEnd[k] > 1200) {
8399 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
8400 } else if (s->PixelClockBackEnd[k] > 340) {
8401 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
8402 } else {
8403 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
8404 }
8405 }
8406 } else {
8407 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
8408 }
8409
8410 CalculateODMMode(
8411 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
8412 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8413 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8414 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8415 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
8416 mode_lib->ms.max_dispclk_freq_mhz,
8417 false, // DSCEnable
8418 mode_lib->ms.TotalNumberOfActiveDPP,
8419 mode_lib->ms.TotalNumberOfActiveOPP,
8420 mode_lib->ip.max_num_dpp,
8421 mode_lib->ip.max_num_opp,
8422 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8423 mode_lib->ms.support.NumberOfDSCSlices[k],
8424
8425 /* Output */
8426 &s->TotalAvailablePipesSupportNoDSC,
8427 &s->NumberOfDPPNoDSC,
8428 &s->ODMModeNoDSC,
8429 &s->RequiredDISPCLKPerSurfaceNoDSC);
8430
8431 CalculateODMMode(
8432 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
8433 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8434 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8435 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8436 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
8437 mode_lib->ms.max_dispclk_freq_mhz,
8438 true, // DSCEnable
8439 mode_lib->ms.TotalNumberOfActiveDPP,
8440 mode_lib->ms.TotalNumberOfActiveOPP,
8441 mode_lib->ip.max_num_dpp,
8442 mode_lib->ip.max_num_opp,
8443 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8444 mode_lib->ms.support.NumberOfDSCSlices[k],
8445
8446 /* Output */
8447 &s->TotalAvailablePipesSupportDSC,
8448 &s->NumberOfDPPDSC,
8449 &s->ODMModeDSC,
8450 &s->RequiredDISPCLKPerSurfaceDSC);
8451
8452 CalculateOutputLink(
8453 &mode_lib->scratch,
8454 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
8455 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
8456 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
8457 mode_lib->soc.phy_downspread_percent,
8458 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8459 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8460 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8461 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8462 s->PixelClockBackEnd[k],
8463 s->OutputBpp[k],
8464 mode_lib->ip.maximum_dsc_bits_per_component,
8465 mode_lib->ms.support.NumberOfDSCSlices[k],
8466 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
8467 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
8468 s->ODMModeNoDSC,
8469 s->ODMModeDSC,
8470 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
8471 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
8472 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
8473
8474 /* Output */
8475 &mode_lib->ms.RequiresDSC[k],
8476 &mode_lib->ms.RequiresFEC[k],
8477 &mode_lib->ms.OutputBpp[k],
8478 &mode_lib->ms.OutputType[k],
8479 &mode_lib->ms.OutputRate[k],
8480 &mode_lib->ms.RequiredSlots[k]);
8481
8482 if (s->OutputBpp[k] == 0.0) {
8483 s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
8484 }
8485
8486 if (mode_lib->ms.RequiresDSC[k] == false) {
8487 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
8488 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
8489 if (!s->TotalAvailablePipesSupportNoDSC)
8490 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8491 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
8492 } else {
8493 mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
8494 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
8495 if (!s->TotalAvailablePipesSupportDSC)
8496 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8497 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
8498 }
8499 #if defined(__DML_VBA_DEBUG__)
8500 DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
8501 DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
8502 #endif
8503
8504 // ensure the number dsc slices is integer multiple based on ODM mode
8505 mode_lib->ms.support.DSCSlicesODMModeSupported = true;
8506 if (mode_lib->ms.RequiresDSC[k]) {
8507 // fail a ms check if the override num_slices doesn't align with odm mode setting
8508 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
8509 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8510 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
8511 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8512 mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
8513 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8514 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
8515 #if defined(__DML_VBA_DEBUG__)
8516 if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
8517 DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
8518 DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
8519 DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
8520 }
8521 #endif
8522 } else {
8523 // safe guard to ensure the dml derived dsc slices and odm setting are compatible
8524 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8525 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
8526 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8527 mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
8528 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8529 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
8530 }
8531
8532 } else {
8533 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
8534 }
8535 }
8536
8537 mode_lib->ms.support.incorrect_imall_usage = 0;
8538 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8539 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
8540 mode_lib->ms.support.incorrect_imall_usage = 1;
8541 }
8542
8543 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8544 mode_lib->ms.MPCCombine[k] = false;
8545 mode_lib->ms.NoOfDPP[k] = 1;
8546 mode_lib->ms.NoOfOPP[k] = 1;
8547
8548 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
8549 mode_lib->ms.MPCCombine[k] = false;
8550 mode_lib->ms.NoOfDPP[k] = 4;
8551 mode_lib->ms.NoOfOPP[k] = 4;
8552 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
8553 mode_lib->ms.MPCCombine[k] = false;
8554 mode_lib->ms.NoOfDPP[k] = 3;
8555 mode_lib->ms.NoOfOPP[k] = 3;
8556 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
8557 mode_lib->ms.MPCCombine[k] = false;
8558 mode_lib->ms.NoOfDPP[k] = 2;
8559 mode_lib->ms.NoOfOPP[k] = 2;
8560 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
8561 mode_lib->ms.MPCCombine[k] = true;
8562 mode_lib->ms.NoOfDPP[k] = 2;
8563 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
8564 mode_lib->ms.MPCCombine[k] = false;
8565 mode_lib->ms.NoOfDPP[k] = 1;
8566 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
8567 DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
8568 }
8569 } else {
8570 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
8571 mode_lib->ms.MPCCombine[k] = true;
8572 mode_lib->ms.NoOfDPP[k] = 2;
8573 }
8574 }
8575 #if defined(__DML_VBA_DEBUG__)
8576 DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
8577 #endif
8578 }
8579
8580 mode_lib->ms.TotalNumberOfActiveDPP = 0;
8581 mode_lib->ms.TotalNumberOfActiveOPP = 0;
8582 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8583 mode_lib->ms.TotalNumberOfActiveDPP += mode_lib->ms.NoOfDPP[k];
8584 mode_lib->ms.TotalNumberOfActiveOPP += mode_lib->ms.NoOfOPP[k];
8585 }
8586 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
8587 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8588 if (mode_lib->ms.TotalNumberOfActiveOPP > (unsigned int)mode_lib->ip.max_num_opp)
8589 mode_lib->ms.support.TotalAvailablePipesSupport = false;
8590
8591
8592 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
8593 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
8594 if (mode_lib->ms.NoOfDPP[k] == 1)
8595 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
8596 }
8597
8598 //DISPCLK/DPPCLK
8599 mode_lib->ms.WritebackRequiredDISPCLK = 0;
8600 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8601 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8602 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
8603 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
8604 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8605 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
8606 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
8607 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
8608 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
8609 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
8610 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
8611 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8612 mode_lib->ip.writeback_line_buffer_buffer_size));
8613 }
8614 }
8615
8616 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
8617 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8618 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
8619 }
8620
8621 mode_lib->ms.GlobalDPPCLK = 0;
8622 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8623 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
8624 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
8625 }
8626
8627 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
8628
8629 /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
8630 s->TotalNumberOfActiveOTG = 0;
8631 s->TotalNumberOfActiveHDMIFRL = 0;
8632 s->TotalNumberOfActiveDP2p0 = 0;
8633 s->TotalNumberOfActiveDP2p0Outputs = 0;
8634 s->TotalNumberOfActiveWriteback = 0;
8635 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8636
8637 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8638 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8639 if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8640 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8641
8642 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
8643 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
8644
8645 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
8646 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
8647 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
8648 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
8649 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
8650 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
8651 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
8652 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
8653 //}
8654 }
8655 }
8656 }
8657 }
8658
8659 /* Writeback Mode Support Check */
8660 mode_lib->ms.support.EnoughWritebackUnits = 1;
8661 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
8662 mode_lib->ms.support.EnoughWritebackUnits = false;
8663 }
8664 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
8665 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
8666 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
8667
8668
8669 mode_lib->ms.support.ExceededMultistreamSlots = false;
8670 mode_lib->ms.support.LinkCapacitySupport = true;
8671 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8672 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
8673 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8674 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
8675 mode_lib->ms.support.LinkCapacitySupport = false;
8676 }
8677 }
8678
8679 mode_lib->ms.support.P2IWith420 = false;
8680 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
8681 mode_lib->ms.support.DSC422NativeNotSupported = false;
8682 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
8683 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
8684 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
8685 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
8686 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
8687 mode_lib->ms.support.NotEnoughLanesForMSO = false;
8688
8689 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8690 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8691 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8692 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
8693 mode_lib->ms.support.P2IWith420 = true;
8694
8695 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
8696 mode_lib->ms.support.DSC422NativeNotSupported = true;
8697
8698 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
8699 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
8700 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
8701 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
8702 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
8703 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
8704 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
8705
8706 // FIXME_STAGE2
8707 //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
8708 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
8709 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
8710 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
8711 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8712 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8713 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
8714 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8715 // }
8716 //}
8717
8718 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8719 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
8720 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
8721 // FIXME_STAGE2
8722 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
8723 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8724 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8725 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
8726 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8727 //}
8728 }
8729 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
8730 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
8731 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
8732
8733 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
8734 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
8735 mode_lib->ms.support.NotEnoughLanesForMSO = true;
8736 }
8737 }
8738
8739 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
8740 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8741 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
8742 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8743 mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
8744 mode_lib->ms.RequiresDSC[k],
8745 s->PixelClockBackEnd[k],
8746 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8747 mode_lib->ms.OutputBpp[k],
8748 mode_lib->ms.support.NumberOfDSCSlices[k],
8749 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8750 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8751 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
8752 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
8753
8754 if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) {
8755 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
8756 }
8757 } else {
8758 /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
8759 * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
8760 * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
8761 * required - by setting phantom dtbclk to 0 we ignore it.
8762 */
8763 mode_lib->ms.RequiredDTBCLK[k] = 0;
8764 }
8765 }
8766
8767 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
8768 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8769 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
8770 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
8771 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8772 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8773 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
8774 s->DSCFormatFactor = 2;
8775 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
8776 s->DSCFormatFactor = 1;
8777 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8778 s->DSCFormatFactor = 2;
8779 } else {
8780 s->DSCFormatFactor = 1;
8781 }
8782 #ifdef __DML_VBA_DEBUG__
8783 DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
8784 #endif
8785 if (mode_lib->ms.RequiresDSC[k] == true) {
8786 s->PixelClockBackEndFactor = 3.0;
8787
8788 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8789 s->PixelClockBackEndFactor = 12.0;
8790 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8791 s->PixelClockBackEndFactor = 9.0;
8792 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8793 s->PixelClockBackEndFactor = 6.0;
8794
8795 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
8796 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
8797 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
8798 }
8799
8800 #ifdef __DML_VBA_DEBUG__
8801 DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
8802 DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
8803 DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
8804 DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
8805 #endif
8806 }
8807 }
8808 }
8809
8810 /* Check DSC Unit and Slices Support */
8811 mode_lib->ms.support.NotEnoughDSCSlices = false;
8812 s->TotalDSCUnitsRequired = 0;
8813 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
8814 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8815
8816 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8817 if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8818 s->NumDSCUnitRequired = 1;
8819
8820 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8821 s->NumDSCUnitRequired = 4;
8822 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8823 s->NumDSCUnitRequired = 3;
8824 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8825 s->NumDSCUnitRequired = 2;
8826
8827 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
8828 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
8829 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
8830
8831 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
8832 mode_lib->ms.support.NotEnoughDSCSlices = true;
8833 }
8834 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8835 }
8836
8837 mode_lib->ms.support.NotEnoughDSCUnits = false;
8838 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
8839 mode_lib->ms.support.NotEnoughDSCUnits = true;
8840 }
8841
8842 /*DSC Delay per state*/
8843 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8844 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
8845 mode_lib->ms.ODMMode[k],
8846 mode_lib->ip.maximum_dsc_bits_per_component,
8847 s->OutputBpp[k],
8848 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8849 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8850 mode_lib->ms.support.NumberOfDSCSlices[k],
8851 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8852 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8853 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8854 s->PixelClockBackEnd[k]);
8855 }
8856
8857 // Figure out the swath and DET configuration after the num dpp per plane is figured out
8858 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8859 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
8860 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
8861
8862 // output
8863 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
8864 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
8865 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
8866 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
8867 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
8868 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
8869 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
8870 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
8871 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
8872 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
8873 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
8874 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
8875 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
8876 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
8877 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
8878 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
8879 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
8880 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
8881 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
8882
8883 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
8884
8885 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
8886 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8887 mode_lib->ms.SurfaceSizeInMALL[k] = 0;
8888 mode_lib->ms.support.ExceededMALLSize = 0;
8889 } else {
8890 CalculateSurfaceSizeInMall(
8891 display_cfg,
8892 mode_lib->ms.num_active_planes,
8893 mode_lib->soc.mall_allocated_for_dcn_mbytes,
8894
8895 mode_lib->ms.BytePerPixelY,
8896 mode_lib->ms.BytePerPixelC,
8897 mode_lib->ms.Read256BlockWidthY,
8898 mode_lib->ms.Read256BlockWidthC,
8899 mode_lib->ms.Read256BlockHeightY,
8900 mode_lib->ms.Read256BlockHeightC,
8901 mode_lib->ms.MacroTileWidthY,
8902 mode_lib->ms.MacroTileWidthC,
8903 mode_lib->ms.MacroTileHeightY,
8904 mode_lib->ms.MacroTileHeightC,
8905
8906 /* Output */
8907 mode_lib->ms.SurfaceSizeInMALL,
8908 &mode_lib->ms.support.ExceededMALLSize);
8909 }
8910
8911 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
8912 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8913 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
8914 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
8915 }
8916 }
8917
8918 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8919 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8920 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
8921 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
8922 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8923 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8924 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
8925 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
8926 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
8927 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
8928 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
8929 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
8930 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
8931 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
8932 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8933 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8934 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8935 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8936 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
8937 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8938 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8939 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8940 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8941 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8942 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8943 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8944 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
8945 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8946 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8947 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8948 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8949 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8950 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8951 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
8952 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
8953 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
8954
8955 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
8956 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
8957 }
8958
8959 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
8960 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8961 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
8962 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
8963 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
8964 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
8965 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
8966 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
8967 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
8968 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
8969 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
8970 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8971
8972 // output
8973 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
8974 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
8975 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
8976 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
8977 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
8978 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
8979 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
8980 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
8981 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
8982 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
8983 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
8984 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
8985 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
8986 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
8987 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
8988 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
8989 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
8990 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
8991 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
8992 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
8993 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
8994 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
8995 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
8996 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
8997 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
8998 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
8999 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
9000 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
9001 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
9002 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
9003 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
9004 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
9005 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
9006 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
9007 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
9008 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
9009 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
9010 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
9011 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
9012 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
9013 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
9014 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
9015 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
9016 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
9017 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
9018 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
9019 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
9020 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
9021 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
9022 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
9023 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
9024 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
9025
9026 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
9027
9028 mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
9029 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
9030
9031 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9032 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
9033 mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
9034
9035 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
9036 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
9037
9038 #ifdef __DML_VBA_DEBUG__
9039 DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
9040 DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
9041 #endif
9042 }
9043 #ifdef __DML_VBA_DEBUG__
9044 DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
9045 DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
9046 #endif
9047
9048 /* VActive bytes to fetch for UCLK P-State */
9049 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
9050 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
9051
9052 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
9053 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
9054 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
9055 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
9056 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
9057 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
9058 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
9059 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
9060 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
9061 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
9062 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
9063 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
9064 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
9065 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
9066 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
9067 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
9068 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
9069
9070 /* outputs */
9071 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk];
9072 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk];
9073
9074 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
9075
9076 /* Excess VActive bandwidth required to fill DET */
9077 calculate_excess_vactive_bandwidth_required(
9078 display_cfg,
9079 mode_lib->ms.num_active_planes,
9080 s->pstate_bytes_required_l[dml2_pstate_type_uclk],
9081 s->pstate_bytes_required_c[dml2_pstate_type_uclk],
9082 /* outputs */
9083 mode_lib->ms.excess_vactive_fill_bw_l,
9084 mode_lib->ms.excess_vactive_fill_bw_c);
9085
9086 mode_lib->ms.UrgLatency = CalculateUrgentLatency(
9087 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
9088 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
9089 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
9090 mode_lib->soc.do_urgent_latency_adjustment,
9091 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
9092 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
9093 mode_lib->ms.FabricClock,
9094 mode_lib->ms.uclk_freq_mhz,
9095 mode_lib->soc.qos_parameters.qos_type,
9096 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
9097 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
9098 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
9099 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
9100 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
9101 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
9102
9103 mode_lib->ms.TripToMemory = CalculateTripToMemory(
9104 mode_lib->ms.UrgLatency,
9105 mode_lib->ms.FabricClock,
9106 mode_lib->ms.uclk_freq_mhz,
9107 mode_lib->soc.qos_parameters.qos_type,
9108 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
9109 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
9110 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
9111 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
9112 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
9113
9114 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
9115
9116 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9117 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9118 bool cursor_not_enough_urgent_latency_hiding = false;
9119
9120 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
9121 calculate_cursor_req_attributes(
9122 display_cfg->plane_descriptors[k].cursor.cursor_width,
9123 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
9124
9125 // output
9126 &s->cursor_lines_per_chunk[k],
9127 &s->cursor_bytes_per_line[k],
9128 &s->cursor_bytes_per_chunk[k],
9129 &s->cursor_bytes[k]);
9130
9131 calculate_cursor_urgent_burst_factor(
9132 mode_lib->ip.cursor_buffer_size,
9133 display_cfg->plane_descriptors[k].cursor.cursor_width,
9134 s->cursor_bytes_per_chunk[k],
9135 s->cursor_lines_per_chunk[k],
9136 line_time_us,
9137 mode_lib->ms.UrgLatency,
9138
9139 // output
9140 &mode_lib->ms.UrgentBurstFactorCursor[k],
9141 &cursor_not_enough_urgent_latency_hiding);
9142 }
9143
9144 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
9145
9146 #ifdef __DML_VBA_DEBUG__
9147 DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
9148 DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9149 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9150 #endif
9151
9152 CalculateUrgentBurstFactor(
9153 &display_cfg->plane_descriptors[k],
9154 mode_lib->ms.swath_width_luma_ub[k],
9155 mode_lib->ms.swath_width_chroma_ub[k],
9156 mode_lib->ms.SwathHeightY[k],
9157 mode_lib->ms.SwathHeightC[k],
9158 line_time_us,
9159 mode_lib->ms.UrgLatency,
9160 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
9161 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
9162 mode_lib->ms.BytePerPixelInDETY[k],
9163 mode_lib->ms.BytePerPixelInDETC[k],
9164 mode_lib->ms.DETBufferSizeY[k],
9165 mode_lib->ms.DETBufferSizeC[k],
9166
9167 // Output
9168 &mode_lib->ms.UrgentBurstFactorLuma[k],
9169 &mode_lib->ms.UrgentBurstFactorChroma[k],
9170 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
9171
9172 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
9173 }
9174
9175 CalculateDCFCLKDeepSleep(
9176 display_cfg,
9177 mode_lib->ms.num_active_planes,
9178 mode_lib->ms.BytePerPixelY,
9179 mode_lib->ms.BytePerPixelC,
9180 mode_lib->ms.SwathWidthY,
9181 mode_lib->ms.SwathWidthC,
9182 mode_lib->ms.NoOfDPP,
9183 mode_lib->ms.PSCL_FACTOR,
9184 mode_lib->ms.PSCL_FACTOR_CHROMA,
9185 mode_lib->ms.RequiredDPPCLK,
9186 mode_lib->ms.vactive_sw_bw_l,
9187 mode_lib->ms.vactive_sw_bw_c,
9188 mode_lib->soc.return_bus_width_bytes,
9189
9190 /* Output */
9191 &mode_lib->ms.dcfclk_deepsleep);
9192
9193 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9194 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
9195 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
9196 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
9197 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
9198 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
9199 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
9200 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
9201 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
9202 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
9203 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
9204 } else {
9205 mode_lib->ms.WritebackDelayTime[k] = 0.0;
9206 }
9207 }
9208
9209 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
9210 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9211 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
9212 s->MaximumVStartup[k] = CalculateMaxVStartup(
9213 mode_lib->ip.ptoi_supported,
9214 mode_lib->ip.vblank_nom_default_us,
9215 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
9216 mode_lib->ms.WritebackDelayTime[k]);
9217 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
9218 }
9219
9220 #ifdef __DML_VBA_DEBUG__
9221 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
9222 #endif
9223
9224 /* Immediate Flip and MALL parameters */
9225 s->ImmediateFlipRequired = false;
9226 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9227 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
9228 }
9229
9230 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
9231 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9232 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
9233 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
9234 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
9235 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
9236 }
9237
9238 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
9239 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9240 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
9241 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
9242 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
9243 }
9244
9245 s->FullFrameMALLPStateMethod = false;
9246 s->SubViewportMALLPStateMethod = false;
9247 s->PhantomPipeMALLPStateMethod = false;
9248 s->SubViewportMALLRefreshGreaterThan120Hz = false;
9249 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9250 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
9251 s->FullFrameMALLPStateMethod = true;
9252 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
9253 s->SubViewportMALLPStateMethod = true;
9254 if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
9255 // For dv, small frame tests will have very high refresh rate
9256 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
9257 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9258 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
9259 if (refresh_rate > 120)
9260 s->SubViewportMALLRefreshGreaterThan120Hz = true;
9261 }
9262 }
9263 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
9264 s->PhantomPipeMALLPStateMethod = true;
9265 }
9266 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
9267 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
9268
9269 #ifdef __DML_VBA_DEBUG__
9270 DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
9271 DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
9272 DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
9273 DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
9274 DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
9275 DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
9276 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
9277 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
9278 DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
9279 #endif
9280
9281 mode_lib->ms.support.OutstandingRequestsSupport = true;
9282 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
9283
9284 mode_lib->ms.support.avg_urgent_latency_us
9285 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
9286 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
9287 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
9288 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
9289
9290 mode_lib->ms.support.avg_non_urgent_latency_us
9291 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
9292 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
9293 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
9294 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
9295
9296 mode_lib->ms.support.max_non_urgent_latency_us
9297 = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
9298 / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
9299 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
9300 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
9301 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
9302
9303 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9304
9305 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9306 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
9307 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
9308
9309 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
9310 mode_lib->ms.support.OutstandingRequestsSupport = false;
9311 }
9312
9313 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
9314 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
9315 }
9316
9317 #ifdef __DML_VBA_DEBUG__
9318 DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
9319 DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
9320 DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
9321 DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
9322 #endif
9323 }
9324
9325 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
9326 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
9327 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
9328
9329 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
9330 mode_lib->ms.support.OutstandingRequestsSupport = false;
9331 }
9332
9333 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
9334 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
9335 }
9336 #ifdef __DML_VBA_DEBUG__
9337 DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
9338 DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
9339 #endif
9340 }
9341 }
9342
9343 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
9344 if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
9345 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9346 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
9347 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
9348 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
9349 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
9350 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
9351 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
9352 }
9353 } else {
9354 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9355 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
9356 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
9357 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
9358 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
9359 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
9360 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
9361 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
9362
9363 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
9364 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
9365 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
9366 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
9367 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
9368
9369 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
9370 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
9371 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
9372 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
9373 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
9374 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
9375 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
9376 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
9377 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
9378 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
9379
9380 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
9381 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
9382 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
9383 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
9384 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
9385 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
9386 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
9387 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
9388 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
9389 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
9390
9391 // output
9392 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
9393 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
9394 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
9395 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
9396
9397 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
9398 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
9399 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k];
9400 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
9401 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
9402
9403 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
9404 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
9405 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k];
9406 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
9407 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
9408
9409 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
9410 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
9411 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
9412
9413 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
9414 }
9415
9416 calculate_mall_bw_overhead_factor(
9417 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
9418 mode_lib->ms.mall_prefetch_dram_overhead_factor,
9419
9420 // input
9421 display_cfg,
9422 mode_lib->ms.num_active_planes);
9423 }
9424
9425 // Calculate all the bandwidth available
9426 // Need anothe bw for latency evaluation
9427 calculate_bandwidth_available(
9428 mode_lib->ms.support.avg_bandwidth_available_min, // not used
9429 mode_lib->ms.support.avg_bandwidth_available, // not used
9430 mode_lib->ms.support.urg_bandwidth_available_min_latency,
9431 mode_lib->ms.support.urg_bandwidth_available, // not used
9432 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
9433 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
9434
9435 &mode_lib->soc,
9436 display_cfg->hostvm_enable,
9437 mode_lib->ms.DCFCLK,
9438 mode_lib->ms.FabricClock,
9439 mode_lib->ms.dram_bw_mbps);
9440
9441 calculate_bandwidth_available(
9442 mode_lib->ms.support.avg_bandwidth_available_min,
9443 mode_lib->ms.support.avg_bandwidth_available,
9444 mode_lib->ms.support.urg_bandwidth_available_min,
9445 mode_lib->ms.support.urg_bandwidth_available,
9446 mode_lib->ms.support.urg_bandwidth_available_vm_only,
9447 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
9448
9449 &mode_lib->soc,
9450 display_cfg->hostvm_enable,
9451 mode_lib->ms.MaxDCFCLK,
9452 mode_lib->ms.MaxFabricClock,
9453 #ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
9454 mode_lib->ms.dram_bw_mbps);
9455 #else
9456 mode_lib->ms.max_dram_bw_mbps);
9457 #endif
9458
9459 // Average BW support check
9460 calculate_avg_bandwidth_required(
9461 mode_lib->ms.support.avg_bandwidth_required,
9462 // input
9463 display_cfg,
9464 mode_lib->ms.num_active_planes,
9465 mode_lib->ms.vactive_sw_bw_l,
9466 mode_lib->ms.vactive_sw_bw_c,
9467 mode_lib->ms.cursor_bw,
9468 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9469 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9470 mode_lib->ms.mall_prefetch_dram_overhead_factor,
9471 mode_lib->ms.mall_prefetch_sdp_overhead_factor);
9472
9473 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
9474 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
9475 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
9476 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
9477 }
9478
9479 mode_lib->ms.support.AvgBandwidthSupport = true;
9480 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
9481 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9482 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
9483 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
9484 DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
9485
9486 }
9487 }
9488 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
9489 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
9490 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
9491 mode_lib->ms.support.AvgBandwidthSupport = false;
9492 #ifdef __DML_VBA_DEBUG__
9493 DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
9494 #endif
9495 }
9496 }
9497 }
9498
9499 dml_core_ms_prefetch_check(mode_lib, display_cfg);
9500
9501 mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
9502
9503 //Re-ordering Buffer Support Check
9504 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9505 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
9506 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
9507 mode_lib->ms.support.ROBSupport = true;
9508 } else {
9509 mode_lib->ms.support.ROBSupport = false;
9510 }
9511 } else {
9512 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
9513 mode_lib->ms.support.ROBSupport = true;
9514 } else {
9515 mode_lib->ms.support.ROBSupport = false;
9516 }
9517 }
9518
9519 /* VActive fill time calculations (informative) */
9520 calculate_vactive_det_fill_latency(
9521 display_cfg,
9522 mode_lib->ms.num_active_planes,
9523 s->pstate_bytes_required_l[dml2_pstate_type_uclk],
9524 s->pstate_bytes_required_c[dml2_pstate_type_uclk],
9525 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9526 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9527 mode_lib->ms.vactive_sw_bw_l,
9528 mode_lib->ms.vactive_sw_bw_c,
9529 mode_lib->ms.surface_avg_vactive_required_bw,
9530 mode_lib->ms.surface_peak_required_bw,
9531 /* outputs */
9532 mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk]);
9533
9534 #ifdef __DML_VBA_DEBUG__
9535 DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
9536 DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
9537 #endif
9538
9539 /*Mode Support, Voltage State and SOC Configuration*/
9540 {
9541 if (mode_lib->ms.support.ScaleRatioAndTapsSupport
9542 && mode_lib->ms.support.SourceFormatPixelAndScanSupport
9543 && mode_lib->ms.support.ViewportSizeSupport
9544 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
9545 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
9546 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
9547 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
9548 && !mode_lib->ms.support.ExceededMultistreamSlots
9549 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
9550 && !mode_lib->ms.support.NotEnoughLanesForMSO
9551 && !mode_lib->ms.support.P2IWith420
9552 && !mode_lib->ms.support.DSC422NativeNotSupported
9553 && mode_lib->ms.support.DSCSlicesODMModeSupported
9554 && !mode_lib->ms.support.NotEnoughDSCUnits
9555 && !mode_lib->ms.support.NotEnoughDSCSlices
9556 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
9557 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
9558 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
9559 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
9560 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
9561 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
9562 && mode_lib->ms.support.ROBSupport
9563 && mode_lib->ms.support.OutstandingRequestsSupport
9564 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
9565 && mode_lib->ms.support.DISPCLK_DPPCLK_Support
9566 && mode_lib->ms.support.TotalAvailablePipesSupport
9567 && mode_lib->ms.support.NumberOfOTGSupport
9568 && mode_lib->ms.support.NumberOfHDMIFRLSupport
9569 && mode_lib->ms.support.NumberOfDP2p0Support
9570 && mode_lib->ms.support.EnoughWritebackUnits
9571 && mode_lib->ms.support.WritebackLatencySupport
9572 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
9573 && mode_lib->ms.support.CursorSupport
9574 && mode_lib->ms.support.PitchSupport
9575 && !mode_lib->ms.support.ViewportExceedsSurface
9576 && mode_lib->ms.support.PrefetchSupported
9577 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
9578 && mode_lib->ms.support.AvgBandwidthSupport
9579 && mode_lib->ms.support.DynamicMetadataSupported
9580 && mode_lib->ms.support.VRatioInPrefetchSupported
9581 && mode_lib->ms.support.PTEBufferSizeNotExceeded
9582 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
9583 && !mode_lib->ms.support.ExceededMALLSize
9584 && mode_lib->ms.support.g6_temp_read_support
9585 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
9586 DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__);
9587 mode_lib->ms.support.ModeSupport = true;
9588 } else {
9589 DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__);
9590 mode_lib->ms.support.ModeSupport = false;
9591 }
9592 }
9593
9594 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
9595 DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
9596 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9597
9598 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9599 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
9600 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
9601 }
9602
9603 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9604 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
9605 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
9606 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
9607 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
9608 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
9609 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
9610
9611 #if defined(__DML_VBA_DEBUG__)
9612 DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
9613 DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
9614 #endif
9615 }
9616
9617 #if defined(__DML_VBA_DEBUG__)
9618 if (!mode_lib->ms.support.ModeSupport)
9619 dml2_print_mode_support_info(&mode_lib->ms.support, true);
9620
9621 DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__);
9622 #endif
9623
9624 return mode_lib->ms.support.ModeSupport;
9625 }
9626
dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex * in_out_params)9627 unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
9628 {
9629 unsigned int result;
9630
9631 DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
9632 result = dml_core_mode_support(in_out_params);
9633
9634 if (result)
9635 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
9636
9637 DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
9638
9639 for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
9640 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
9641
9642 DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
9643
9644 return result;
9645 }
9646
CalculatePixelDeliveryTimes(const struct dml2_display_cfg * display_cfg,const struct core_display_cfg_support_info * cfg_support_info,unsigned int NumberOfActiveSurfaces,double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])9647 static void CalculatePixelDeliveryTimes(
9648 const struct dml2_display_cfg *display_cfg,
9649 const struct core_display_cfg_support_info *cfg_support_info,
9650 unsigned int NumberOfActiveSurfaces,
9651 double VRatioPrefetchY[],
9652 double VRatioPrefetchC[],
9653 unsigned int swath_width_luma_ub[],
9654 unsigned int swath_width_chroma_ub[],
9655 double PSCL_THROUGHPUT[],
9656 double PSCL_THROUGHPUT_CHROMA[],
9657 double Dppclk[],
9658 unsigned int BytePerPixelC[],
9659 unsigned int req_per_swath_ub_l[],
9660 unsigned int req_per_swath_ub_c[],
9661
9662 // Output
9663 double DisplayPipeLineDeliveryTimeLuma[],
9664 double DisplayPipeLineDeliveryTimeChroma[],
9665 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
9666 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
9667 double DisplayPipeRequestDeliveryTimeLuma[],
9668 double DisplayPipeRequestDeliveryTimeChroma[],
9669 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
9670 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
9671 {
9672 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9673 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9674
9675 #ifdef __DML_VBA_DEBUG__
9676 DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9677 DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9678 DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
9679 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9680 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
9681 DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
9682 DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
9683 DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
9684 DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
9685 DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
9686 DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
9687 DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
9688 DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
9689 #endif
9690 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
9691 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9692 } else {
9693 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9694 }
9695
9696 if (BytePerPixelC[k] == 0) {
9697 DisplayPipeLineDeliveryTimeChroma[k] = 0;
9698 } else {
9699 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
9700 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9701 } else {
9702 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9703 }
9704 }
9705
9706 if (VRatioPrefetchY[k] <= 1) {
9707 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9708 } else {
9709 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9710 }
9711
9712 if (BytePerPixelC[k] == 0) {
9713 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
9714 } else {
9715 if (VRatioPrefetchC[k] <= 1) {
9716 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9717 } else {
9718 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9719 }
9720 }
9721 #ifdef __DML_VBA_DEBUG__
9722 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
9723 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
9724 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
9725 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
9726 #endif
9727 }
9728
9729 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9730
9731 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
9732 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
9733 if (BytePerPixelC[k] == 0) {
9734 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
9735 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
9736 } else {
9737 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
9738 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
9739 }
9740 #ifdef __DML_VBA_DEBUG__
9741 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
9742 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
9743 DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
9744 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
9745 DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
9746 DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
9747 #endif
9748 }
9749 }
9750
CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params * p)9751 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
9752 {
9753 unsigned int meta_chunk_width;
9754 unsigned int min_meta_chunk_width;
9755 unsigned int meta_chunk_per_row_int;
9756 unsigned int meta_row_remainder;
9757 unsigned int meta_chunk_threshold;
9758 unsigned int meta_chunks_per_row_ub;
9759 unsigned int meta_chunk_width_chroma;
9760 unsigned int min_meta_chunk_width_chroma;
9761 unsigned int meta_chunk_per_row_int_chroma;
9762 unsigned int meta_row_remainder_chroma;
9763 unsigned int meta_chunk_threshold_chroma;
9764 unsigned int meta_chunks_per_row_ub_chroma;
9765 unsigned int dpte_group_width_luma;
9766 unsigned int dpte_groups_per_row_luma_ub;
9767 unsigned int dpte_group_width_chroma;
9768 unsigned int dpte_groups_per_row_chroma_ub;
9769 double pixel_clock_mhz;
9770
9771 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9772 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9773 if (p->BytePerPixelC[k] == 0) {
9774 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9775 } else {
9776 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9777 }
9778 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9779 if (p->BytePerPixelC[k] == 0) {
9780 p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
9781 } else {
9782 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9783 }
9784 }
9785
9786 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9787 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
9788 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9789 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9790 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
9791 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
9792 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9793 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
9794 } else {
9795 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
9796 }
9797 if (meta_row_remainder <= meta_chunk_threshold) {
9798 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
9799 } else {
9800 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
9801 }
9802 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
9803 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9804 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9805 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9806 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9807 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9808 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9809 if (p->BytePerPixelC[k] == 0) {
9810 p->TimePerChromaMetaChunkNominal[k] = 0;
9811 p->TimePerChromaMetaChunkVBlank[k] = 0;
9812 p->TimePerChromaMetaChunkFlip[k] = 0;
9813 } else {
9814 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9815 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9816 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
9817 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
9818 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9819 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
9820 } else {
9821 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
9822 }
9823 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
9824 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
9825 } else {
9826 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
9827 }
9828 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9829 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9830 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9831 }
9832 } else {
9833 p->TimePerMetaChunkNominal[k] = 0;
9834 p->TimePerMetaChunkVBlank[k] = 0;
9835 p->TimePerMetaChunkFlip[k] = 0;
9836 p->TimePerChromaMetaChunkNominal[k] = 0;
9837 p->TimePerChromaMetaChunkVBlank[k] = 0;
9838 p->TimePerChromaMetaChunkFlip[k] = 0;
9839 }
9840
9841 #ifdef __DML_VBA_DEBUG__
9842 DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
9843 DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
9844 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
9845 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
9846 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
9847 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
9848 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
9849 DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
9850 #endif
9851 }
9852
9853 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9854 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9855 if (p->BytePerPixelC[k] == 0) {
9856 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9857 } else {
9858 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9859 }
9860 }
9861
9862 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9863 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9864
9865 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
9866 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
9867 else
9868 p->time_per_tdlut_group[k] = 0;
9869
9870 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
9871
9872 if (p->display_cfg->gpuvm_enable == true) {
9873 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9874 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
9875 } else {
9876 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
9877 }
9878 if (p->use_one_row_for_frame[k]) {
9879 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
9880 } else {
9881 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
9882 }
9883 if (dpte_groups_per_row_luma_ub <= 2) {
9884 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
9885 }
9886 DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
9887 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
9888 DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
9889 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
9890 DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
9891 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
9892 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
9893 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
9894
9895 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9896 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9897 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9898 if (p->BytePerPixelC[k] == 0) {
9899 p->time_per_pte_group_nom_chroma[k] = 0;
9900 p->time_per_pte_group_vblank_chroma[k] = 0;
9901 p->time_per_pte_group_flip_chroma[k] = 0;
9902 } else {
9903 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9904 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
9905 } else {
9906 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
9907 }
9908
9909 if (p->use_one_row_for_frame[k]) {
9910 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
9911 } else {
9912 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
9913 }
9914 if (dpte_groups_per_row_chroma_ub <= 2) {
9915 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
9916 }
9917 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
9918 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
9919 DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
9920
9921 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9922 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9923 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9924 }
9925 } else {
9926 p->time_per_pte_group_nom_luma[k] = 0;
9927 p->time_per_pte_group_vblank_luma[k] = 0;
9928 p->time_per_pte_group_flip_luma[k] = 0;
9929 p->time_per_pte_group_nom_chroma[k] = 0;
9930 p->time_per_pte_group_vblank_chroma[k] = 0;
9931 p->time_per_pte_group_flip_chroma[k] = 0;
9932 }
9933 #ifdef __DML_VBA_DEBUG__
9934 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
9935 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
9936
9937 DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
9938 DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
9939 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
9940 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
9941 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
9942 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
9943 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
9944 DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
9945 #endif
9946 }
9947 } // CalculateMetaAndPTETimes
9948
CalculateVMGroupAndRequestTimes(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelC[],double dst_y_per_vm_vblank[],double dst_y_per_vm_flip[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int tdlut_pte_bytes_per_frame[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],bool mrq_present,double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])9949 static void CalculateVMGroupAndRequestTimes(
9950 const struct dml2_display_cfg *display_cfg,
9951 unsigned int NumberOfActiveSurfaces,
9952 unsigned int BytePerPixelC[],
9953 double dst_y_per_vm_vblank[],
9954 double dst_y_per_vm_flip[],
9955 unsigned int dpte_row_width_luma_ub[],
9956 unsigned int dpte_row_width_chroma_ub[],
9957 unsigned int vm_group_bytes[],
9958 unsigned int dpde0_bytes_per_frame_ub_l[],
9959 unsigned int dpde0_bytes_per_frame_ub_c[],
9960 unsigned int tdlut_pte_bytes_per_frame[],
9961 unsigned int meta_pte_bytes_per_frame_ub_l[],
9962 unsigned int meta_pte_bytes_per_frame_ub_c[],
9963 bool mrq_present,
9964
9965 // Output
9966 double TimePerVMGroupVBlank[],
9967 double TimePerVMGroupFlip[],
9968 double TimePerVMRequestVBlank[],
9969 double TimePerVMRequestFlip[])
9970 {
9971 unsigned int num_group_per_lower_vm_stage = 0;
9972 unsigned int num_req_per_lower_vm_stage = 0;
9973 unsigned int num_group_per_lower_vm_stage_flip;
9974 unsigned int num_group_per_lower_vm_stage_pref;
9975 unsigned int num_req_per_lower_vm_stage_flip;
9976 unsigned int num_req_per_lower_vm_stage_pref;
9977 double line_time;
9978
9979 #ifdef __DML_VBA_DEBUG__
9980 DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
9981 #endif
9982 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9983 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9984 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
9985 #ifdef __DML_VBA_DEBUG__
9986 DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
9987 DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
9988 DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
9989 DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
9990 DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
9991 DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
9992 #endif
9993
9994 if (display_cfg->gpuvm_enable) {
9995 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
9996 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
9997
9998 if (BytePerPixelC[k] > 0)
9999 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
10000 }
10001
10002 if (dcc_mrq_enable) {
10003 if (BytePerPixelC[k] > 0) {
10004 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
10005 math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
10006 } else {
10007 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
10008 }
10009 }
10010
10011 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
10012 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
10013
10014 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10015 num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
10016 if (display_cfg->gpuvm_max_page_table_levels >= 2)
10017 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
10018 }
10019
10020 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
10021 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
10022 if (BytePerPixelC[k] > 0)
10023 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
10024 }
10025
10026 if (dcc_mrq_enable) {
10027 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
10028 if (BytePerPixelC[k] > 0)
10029 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
10030 }
10031
10032 num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
10033 num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
10034
10035 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10036 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
10037 }
10038
10039 line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
10040
10041 if (num_group_per_lower_vm_stage_pref > 0)
10042 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
10043 else
10044 TimePerVMGroupVBlank[k] = 0;
10045
10046 if (num_group_per_lower_vm_stage_flip > 0)
10047 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
10048 else
10049 TimePerVMGroupFlip[k] = 0;
10050
10051 if (num_req_per_lower_vm_stage_pref > 0)
10052 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
10053 else
10054 TimePerVMRequestVBlank[k] = 0.0;
10055 if (num_req_per_lower_vm_stage_flip > 0)
10056 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
10057 else
10058 TimePerVMRequestFlip[k] = 0.0;
10059
10060 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
10061 DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
10062 DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
10063 DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref);
10064 DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip);
10065 DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref);
10066 DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip);
10067
10068 if (display_cfg->gpuvm_max_page_table_levels > 2) {
10069 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
10070 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
10071 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
10072 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
10073 }
10074
10075 } else {
10076 TimePerVMGroupVBlank[k] = 0;
10077 TimePerVMGroupFlip[k] = 0;
10078 TimePerVMRequestVBlank[k] = 0;
10079 TimePerVMRequestFlip[k] = 0;
10080 }
10081
10082 #ifdef __DML_VBA_DEBUG__
10083 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
10084 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
10085 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
10086 DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
10087 #endif
10088 }
10089 }
10090
CalculateStutterEfficiency(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateStutterEfficiency_params * p)10091 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
10092 struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
10093 {
10094 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
10095
10096 unsigned int TotalNumberOfActiveOTG = 0;
10097 double SinglePixelClock = 0;
10098 unsigned int SingleHTotal = 0;
10099 unsigned int SingleVTotal = 0;
10100 bool SameTiming = true;
10101 bool FoundCriticalSurface = false;
10102
10103 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
10104
10105 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10106 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10107 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
10108 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
10109 l->MaximumEffectiveCompressionLuma = 2;
10110 } else {
10111 l->MaximumEffectiveCompressionLuma = 4;
10112 }
10113 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
10114 #ifdef __DML_VBA_DEBUG__
10115 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10116 DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
10117 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
10118 #endif
10119 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
10120 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
10121
10122 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
10123 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
10124 l->MaximumEffectiveCompressionChroma = 2;
10125 } else {
10126 l->MaximumEffectiveCompressionChroma = 4;
10127 }
10128 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
10129 #ifdef __DML_VBA_DEBUG__
10130 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
10131 DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
10132 DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
10133 #endif
10134 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
10135 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
10136 }
10137 } else {
10138 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
10139 }
10140 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
10141 }
10142 }
10143
10144 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
10145 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
10146
10147 #ifdef __DML_VBA_DEBUG__
10148 DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
10149 DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
10150 DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
10151 DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
10152 DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
10153 DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
10154 DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10155 DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
10156
10157 DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
10158 DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
10159 DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
10160 DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
10161 #endif
10162 if (l->AverageDCCZeroSizeFraction == 1) {
10163 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10164 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
10165
10166
10167 } else if (l->AverageDCCZeroSizeFraction > 0) {
10168 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10169 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10170 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
10171 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
10172 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
10173 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10174
10175
10176 #ifdef __DML_VBA_DEBUG__
10177 DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10178 DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
10179 DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10180 DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
10181 #endif
10182 } else {
10183 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10184 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
10185 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
10186
10187 #ifdef __DML_VBA_DEBUG__
10188 DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10189 DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
10190 #endif
10191 }
10192
10193 #ifdef __DML_VBA_DEBUG__
10194 DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
10195 DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
10196 DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
10197 DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10198 #endif
10199
10200 *p->StutterPeriod = 0;
10201
10202 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10203 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10204 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
10205 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
10206 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10207 #ifdef __DML_VBA_DEBUG__
10208 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
10209 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
10210 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
10211 DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10212 DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
10213 DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
10214 DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
10215 DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
10216 DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
10217 #endif
10218
10219 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
10220 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
10221
10222 FoundCriticalSurface = true;
10223 *p->StutterPeriod = l->DETBufferingTimeY;
10224 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10225 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10226 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
10227 l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
10228 l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
10229 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
10230 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
10231 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
10232 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
10233 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
10234
10235 #ifdef __DML_VBA_DEBUG__
10236 DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
10237 DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
10238 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
10239 DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
10240 DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
10241 DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
10242 DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
10243 DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
10244 DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
10245 DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
10246 DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
10247 #endif
10248 }
10249 }
10250 }
10251
10252 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
10253 // stutter period is calculated only on the det sizing
10254 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
10255 // else
10256 // the cdb + rob part will be in compressed rate with urg bw (idea bw)
10257 // the det part will be return at uncompressed rate with 64B/dcfclk
10258 //
10259 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
10260 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
10261
10262 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
10263 #ifdef __DML_VBA_DEBUG__
10264 DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10265 DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
10266 DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10267 DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
10268 DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
10269 DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
10270 DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
10271 DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
10272 #endif
10273
10274 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
10275 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10276 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
10277 / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10278 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
10279 #ifdef __DML_VBA_DEBUG__
10280 DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
10281 DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
10282 DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
10283 DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10284 #endif
10285 l->TotalActiveWriteback = 0;
10286 memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
10287
10288 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10289 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10290 if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
10291
10292 if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
10293 l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
10294
10295 if (TotalNumberOfActiveOTG == 0) { // first otg
10296 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10297 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10298 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
10299 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
10300 SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
10301 SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
10302 SameTiming = false;
10303 }
10304 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
10305 l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
10306 }
10307 }
10308 }
10309
10310 if (l->TotalActiveWriteback == 0) {
10311 #ifdef __DML_VBA_DEBUG__
10312 DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
10313 DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
10314 DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10315 #endif
10316 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10317 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10318 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10319 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10320 } else {
10321 *p->StutterEfficiencyNotIncludingVBlank = 0.;
10322 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
10323 *p->NumberOfStutterBurstsPerFrame = 0;
10324 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10325 }
10326 #ifdef __DML_VBA_DEBUG__
10327 DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
10328 DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10329 DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
10330 DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
10331 DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10332 #endif
10333
10334 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
10335 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10336 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
10337 } else {
10338 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10339 }
10340 } else {
10341 *p->StutterEfficiency = 0;
10342 *p->NumberOfStutterBurstsPerFrame = 0;
10343 }
10344
10345 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
10346 //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
10347 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10348 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
10349 } else {
10350 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10351 }
10352 } else {
10353 *p->Z8StutterEfficiency = 0.;
10354 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10355 }
10356
10357 #ifdef __DML_VBA_DEBUG__
10358 DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
10359 DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming);
10360 DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
10361 DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0);
10362 DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
10363 DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10364 DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10365 DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
10366 DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
10367 DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10368 DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10369 #endif
10370
10371 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
10372
10373 #ifdef __DML_VBA_DEBUG__
10374 DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
10375 DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
10376 DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
10377 #endif
10378 }
10379
dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex * in_out_params)10380 static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
10381 {
10382 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
10383 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
10384 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
10385 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
10386 struct dml2_display_cfg_programming *programming = in_out_params->programming;
10387
10388 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
10389 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
10390 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
10391 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
10392 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
10393 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
10394 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
10395 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
10396 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
10397 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
10398 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
10399 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
10400
10401 unsigned int k;
10402 bool must_support_iflip;
10403 const long min_return_uclk_cycles = 83;
10404 const long min_return_fclk_cycles = 75;
10405 const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
10406 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
10407 double max_uclk_mhz = 0;
10408 double min_return_latency_in_DCFCLK_cycles = 0;
10409
10410 DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
10411
10412 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
10413 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
10414
10415 s->num_active_planes = display_cfg->num_planes;
10416 get_stream_output_bpp(s->OutputBpp, display_cfg);
10417
10418 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
10419 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
10420
10421 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
10422 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
10423 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config, &min_clk_table->dram_bw_table);
10424 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
10425 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
10426 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
10427 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
10428 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
10429
10430 for (k = 0; k < s->num_active_planes; ++k) {
10431 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10432 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
10433 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
10434 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
10435 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10436
10437 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
10438 DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10439
10440 switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
10441 case (4):
10442 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
10443 break;
10444 case (3):
10445 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
10446 break;
10447 case (2):
10448 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
10449 break;
10450 default:
10451 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
10452 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
10453 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
10454 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
10455 else
10456 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
10457 break;
10458 }
10459 }
10460
10461 for (k = 0; k < s->num_active_planes; ++k) {
10462 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
10463 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
10464 DML_ASSERT(mode_lib->mp.Dppclk[k] > 0);
10465 }
10466
10467 for (k = 0; k < s->num_active_planes; ++k) {
10468 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10469 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
10470 DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
10471 }
10472
10473 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
10474 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
10475
10476 DML_ASSERT(mode_lib->mp.Dcfclk > 0);
10477 DML_ASSERT(mode_lib->mp.FabricClock > 0);
10478 DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
10479 DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
10480 DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
10481 DML_ASSERT(mode_lib->mp.Dispclk > 0);
10482 DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
10483 DML_ASSERT(s->SOCCLK > 0);
10484
10485 #ifdef __DML_VBA_DEBUG__
10486 DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
10487 DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
10488 DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
10489 DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
10490 DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
10491 DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
10492 DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
10493 for (k = 0; k < s->num_active_planes; ++k) {
10494 DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
10495 }
10496 DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
10497 DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
10498 DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
10499 DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
10500 DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
10501 if (min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz)
10502 DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_uclk_khz / 1000.0);
10503 else
10504 DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
10505 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
10506 DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
10507 DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10508 }
10509
10510 for (k = 0; k < s->num_active_planes; k++)
10511 DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
10512 #endif
10513
10514 CalculateMaxDETAndMinCompressedBufferSize(
10515 mode_lib->ip.config_return_buffer_size_in_kbytes,
10516 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
10517 mode_lib->ip.rob_buffer_size_kbytes,
10518 mode_lib->ip.max_num_dpp,
10519 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
10520 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
10521 mode_lib->ip.dcn_mrq_present,
10522
10523 /* Output */
10524 &s->MaxTotalDETInKByte,
10525 &s->NomDETInKByte,
10526 &s->MinCompressedBufferSizeInKByte);
10527
10528
10529 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
10530
10531 for (k = 0; k < s->num_active_planes; ++k) {
10532 CalculateSinglePipeDPPCLKAndSCLThroughput(
10533 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
10534 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
10535 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10536 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10537 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
10538 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
10539 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10540 display_cfg->plane_descriptors[k].pixel_format,
10541 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
10542 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
10543 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
10544 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
10545
10546 /* Output */
10547 &mode_lib->mp.PSCL_THROUGHPUT[k],
10548 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
10549 &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
10550 }
10551
10552 for (k = 0; k < s->num_active_planes; ++k) {
10553 CalculateBytePerPixelAndBlockSizes(
10554 display_cfg->plane_descriptors[k].pixel_format,
10555 display_cfg->plane_descriptors[k].surface.tiling,
10556 display_cfg->plane_descriptors[k].surface.plane0.pitch,
10557 display_cfg->plane_descriptors[k].surface.plane1.pitch,
10558
10559 // Output
10560 &mode_lib->mp.BytePerPixelY[k],
10561 &mode_lib->mp.BytePerPixelC[k],
10562 &mode_lib->mp.BytePerPixelInDETY[k],
10563 &mode_lib->mp.BytePerPixelInDETC[k],
10564 &mode_lib->mp.Read256BlockHeightY[k],
10565 &mode_lib->mp.Read256BlockHeightC[k],
10566 &mode_lib->mp.Read256BlockWidthY[k],
10567 &mode_lib->mp.Read256BlockWidthC[k],
10568 &mode_lib->mp.MacroTileHeightY[k],
10569 &mode_lib->mp.MacroTileHeightC[k],
10570 &mode_lib->mp.MacroTileWidthY[k],
10571 &mode_lib->mp.MacroTileWidthC[k],
10572 &mode_lib->mp.surf_linear128_l[k],
10573 &mode_lib->mp.surf_linear128_c[k]);
10574 }
10575
10576 CalculateSwathWidth(
10577 display_cfg,
10578 false, // ForceSingleDPP
10579 s->num_active_planes,
10580 mode_lib->mp.ODMMode,
10581 mode_lib->mp.BytePerPixelY,
10582 mode_lib->mp.BytePerPixelC,
10583 mode_lib->mp.Read256BlockHeightY,
10584 mode_lib->mp.Read256BlockHeightC,
10585 mode_lib->mp.Read256BlockWidthY,
10586 mode_lib->mp.Read256BlockWidthC,
10587 mode_lib->mp.surf_linear128_l,
10588 mode_lib->mp.surf_linear128_c,
10589 mode_lib->mp.NoOfDPP,
10590
10591 /* Output */
10592 mode_lib->mp.req_per_swath_ub_l,
10593 mode_lib->mp.req_per_swath_ub_c,
10594 mode_lib->mp.SwathWidthSingleDPPY,
10595 mode_lib->mp.SwathWidthSingleDPPC,
10596 mode_lib->mp.SwathWidthY,
10597 mode_lib->mp.SwathWidthC,
10598 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
10599 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
10600 mode_lib->mp.swath_width_luma_ub,
10601 mode_lib->mp.swath_width_chroma_ub);
10602
10603 for (k = 0; k < s->num_active_planes; ++k) {
10604 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
10605 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
10606 mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10607 mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10608 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
10609 DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
10610 }
10611
10612 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
10613 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
10614 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
10615 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
10616 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10617 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10618 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10619 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10620 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
10621 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
10622 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
10623 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
10624 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
10625 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
10626 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
10627 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
10628 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
10629 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
10630 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
10631 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
10632 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
10633 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
10634 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
10635 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
10636 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
10637 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
10638 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
10639 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
10640 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
10641 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10642
10643 // output
10644 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
10645 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
10646 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
10647 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
10648 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
10649 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
10650 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
10651 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
10652 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
10653 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
10654 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
10655 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
10656 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
10657 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
10658 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
10659 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
10660 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
10661 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
10662 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
10663 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
10664 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
10665
10666 // Calculate DET size, swath height here.
10667 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
10668
10669 // DSC Delay
10670 for (k = 0; k < s->num_active_planes; ++k) {
10671 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
10672 mode_lib->mp.ODMMode[k],
10673 mode_lib->ip.maximum_dsc_bits_per_component,
10674 s->OutputBpp[k],
10675 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
10676 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
10677 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
10678 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
10679 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
10680 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10681 s->PixelClockBackEnd[k]);
10682 }
10683
10684 // Prefetch
10685 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
10686 for (k = 0; k < s->num_active_planes; ++k)
10687 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
10688 } else {
10689 CalculateSurfaceSizeInMall(
10690 display_cfg,
10691 s->num_active_planes,
10692 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10693 mode_lib->mp.BytePerPixelY,
10694 mode_lib->mp.BytePerPixelC,
10695 mode_lib->mp.Read256BlockWidthY,
10696 mode_lib->mp.Read256BlockWidthC,
10697 mode_lib->mp.Read256BlockHeightY,
10698 mode_lib->mp.Read256BlockHeightC,
10699 mode_lib->mp.MacroTileWidthY,
10700 mode_lib->mp.MacroTileWidthC,
10701 mode_lib->mp.MacroTileHeightY,
10702 mode_lib->mp.MacroTileHeightC,
10703
10704 /* Output */
10705 mode_lib->mp.SurfaceSizeInTheMALL,
10706 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
10707 }
10708
10709 for (k = 0; k < s->num_active_planes; ++k) {
10710 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10711 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10712 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10713 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10714 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10715 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10716 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10717 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10718 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10719 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
10720 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
10721 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
10722 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
10723 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10724 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10725 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10726 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10727 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
10728 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10729 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10730 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10731 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10732 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10733 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10734 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10735 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
10736 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
10737 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10738 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10739 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10740 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10741 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10742 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
10743 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
10744 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
10745 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
10746 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
10747 }
10748
10749 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
10750 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
10751 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
10752 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
10753 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
10754 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
10755 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
10756 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
10757 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
10758 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10759 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
10760 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10761
10762 // output
10763 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
10764 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
10765 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
10766 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
10767 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
10768 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
10769 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
10770 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
10771 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
10772 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
10773 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
10774 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
10775 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
10776 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
10777 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
10778 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
10779 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
10780 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
10781 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
10782 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
10783 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
10784 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
10785 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
10786 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
10787 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
10788 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
10789 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
10790 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10791 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
10792 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10793 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10794 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
10795 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
10796 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
10797 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
10798 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
10799 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
10800 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
10801 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10802 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
10803 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10804 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10805 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
10806 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
10807 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
10808 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
10809 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
10810 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
10811 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
10812 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
10813 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
10814 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
10815
10816 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
10817
10818 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
10819 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
10820 for (k = 0; k < s->num_active_planes; k++) {
10821 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
10822 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
10823 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
10824 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
10825 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
10826 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
10827 }
10828 } else {
10829 for (k = 0; k < s->num_active_planes; k++) {
10830 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10831 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
10832 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
10833 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
10834 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
10835 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10836 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10837
10838 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
10839 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
10840 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10841 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
10842 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
10843
10844 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10845 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10846 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
10847 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10848 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
10849 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
10850 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
10851 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
10852 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
10853 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
10854
10855 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10856 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10857 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
10858 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10859 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
10860 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
10861 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
10862 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
10863 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
10864 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
10865
10866 // output
10867 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
10868 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
10869 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
10870 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
10871
10872 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
10873 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
10874 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k];
10875 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
10876 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
10877
10878 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
10879 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
10880 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k];
10881 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
10882 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
10883
10884 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
10885 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
10886 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
10887 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
10888 }
10889
10890 calculate_mall_bw_overhead_factor(
10891 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10892 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10893
10894 // input
10895 display_cfg,
10896 s->num_active_planes);
10897 }
10898
10899 // Calculate all the bandwidth availabe
10900 calculate_bandwidth_available(
10901 mode_lib->mp.avg_bandwidth_available_min,
10902 mode_lib->mp.avg_bandwidth_available,
10903 mode_lib->mp.urg_bandwidth_available_min,
10904 mode_lib->mp.urg_bandwidth_available,
10905 mode_lib->mp.urg_bandwidth_available_vm_only,
10906 mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
10907
10908 &mode_lib->soc,
10909 display_cfg->hostvm_enable,
10910 mode_lib->mp.Dcfclk,
10911 mode_lib->mp.FabricClock,
10912 mode_lib->mp.dram_bw_mbps);
10913
10914
10915 calculate_hostvm_inefficiency_factor(
10916 &s->HostVMInefficiencyFactor,
10917 &s->HostVMInefficiencyFactorPrefetch,
10918
10919 display_cfg->gpuvm_enable,
10920 display_cfg->hostvm_enable,
10921 mode_lib->ip.remote_iommu_outstanding_translations,
10922 mode_lib->soc.max_outstanding_reqs,
10923 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
10924 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
10925
10926 s->TotalDCCActiveDPP = 0;
10927 s->TotalActiveDPP = 0;
10928 for (k = 0; k < s->num_active_planes; ++k) {
10929 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
10930 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
10931 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
10932 }
10933 // Calculate tdlut schedule related terms
10934 for (k = 0; k <= s->num_active_planes - 1; k++) {
10935 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
10936 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10937 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
10938 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
10939 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
10940 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10941 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10942
10943 // output
10944 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
10945 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
10946 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
10947 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
10948 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
10949 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
10950 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
10951 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
10952 }
10953
10954 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
10955 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
10956 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
10957 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
10958
10959 CalculateExtraLatency(
10960 display_cfg,
10961 mode_lib->ip.rob_buffer_size_kbytes,
10962 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
10963 s->ReorderingBytes,
10964 mode_lib->mp.Dcfclk,
10965 mode_lib->mp.FabricClock,
10966 mode_lib->ip.pixel_chunk_size_kbytes,
10967 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
10968 s->num_active_planes,
10969 mode_lib->mp.NoOfDPP,
10970 mode_lib->mp.dpte_group_bytes,
10971 s->tdlut_bytes_per_group,
10972 s->HostVMInefficiencyFactor,
10973 s->HostVMInefficiencyFactorPrefetch,
10974 mode_lib->soc.hostvm_min_page_size_kbytes,
10975 mode_lib->soc.qos_parameters.qos_type,
10976 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
10977 mode_lib->soc.max_outstanding_reqs,
10978 mode_lib->mp.request_size_bytes_luma,
10979 mode_lib->mp.request_size_bytes_chroma,
10980 mode_lib->ip.meta_chunk_size_kbytes,
10981 mode_lib->ip.dchub_arb_to_ret_delay,
10982 mode_lib->mp.TripToMemory,
10983 mode_lib->ip.hostvm_mode,
10984
10985 // output
10986 &mode_lib->mp.ExtraLatency,
10987 &mode_lib->mp.ExtraLatency_sr,
10988 &mode_lib->mp.ExtraLatencyPrefetch);
10989
10990 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
10991
10992 for (k = 0; k < s->num_active_planes; ++k) {
10993 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
10994 mode_lib->mp.WritebackDelay[k] =
10995 mode_lib->soc.qos_parameters.writeback.base_latency_us
10996 + CalculateWriteBackDelay(
10997 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
10998 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
10999 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
11000 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
11001 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
11002 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
11003 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
11004 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
11005 } else
11006 mode_lib->mp.WritebackDelay[k] = 0;
11007 }
11008
11009 /* VActive bytes to fetch for UCLK P-State */
11010 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
11011 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11012
11013 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
11014 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11015 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11016 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11017 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
11018 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
11019 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
11020 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
11021 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
11022 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
11023 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
11024 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
11025 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
11026 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
11027 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
11028 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
11029 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us[0] = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11030
11031 /* outputs */
11032 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l[dml2_pstate_type_uclk];
11033 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c[dml2_pstate_type_uclk];
11034
11035 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
11036
11037 /* Excess VActive bandwidth required to fill DET */
11038 calculate_excess_vactive_bandwidth_required(
11039 display_cfg,
11040 s->num_active_planes,
11041 s->pstate_bytes_required_l[dml2_pstate_type_uclk],
11042 s->pstate_bytes_required_c[dml2_pstate_type_uclk],
11043 /* outputs */
11044 mode_lib->mp.excess_vactive_fill_bw_l,
11045 mode_lib->mp.excess_vactive_fill_bw_c);
11046
11047 mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
11048 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
11049 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
11050 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
11051 mode_lib->soc.do_urgent_latency_adjustment,
11052 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
11053 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
11054 mode_lib->mp.FabricClock,
11055 mode_lib->mp.uclk_freq_mhz,
11056 mode_lib->soc.qos_parameters.qos_type,
11057 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
11058 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
11059 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11060 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11061 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
11062 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11063
11064 mode_lib->mp.TripToMemory = CalculateTripToMemory(
11065 mode_lib->mp.UrgentLatency,
11066 mode_lib->mp.FabricClock,
11067 mode_lib->mp.uclk_freq_mhz,
11068 mode_lib->soc.qos_parameters.qos_type,
11069 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
11070 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11071 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11072 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11073 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11074
11075 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
11076
11077 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
11078 mode_lib->mp.UrgentLatency,
11079 mode_lib->mp.FabricClock,
11080 mode_lib->mp.uclk_freq_mhz,
11081 mode_lib->soc.qos_parameters.qos_type,
11082 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
11083 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
11084 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11085 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11086
11087 for (k = 0; k < s->num_active_planes; ++k) {
11088 bool cursor_not_enough_urgent_latency_hiding = false;
11089 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11090 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11091
11092 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
11093
11094 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11095 mode_lib->mp.NoOfDPP[k],
11096 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
11097 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
11098 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
11099 display_cfg->plane_descriptors[k].composition.rotation_angle);
11100
11101 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11102 mode_lib->mp.NoOfDPP[k],
11103 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
11104 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
11105 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
11106 display_cfg->plane_descriptors[k].composition.rotation_angle);
11107
11108 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
11109 calculate_cursor_req_attributes(
11110 display_cfg->plane_descriptors[k].cursor.cursor_width,
11111 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
11112
11113 // output
11114 &s->cursor_lines_per_chunk[k],
11115 &s->cursor_bytes_per_line[k],
11116 &s->cursor_bytes_per_chunk[k],
11117 &s->cursor_bytes[k]);
11118
11119 calculate_cursor_urgent_burst_factor(
11120 mode_lib->ip.cursor_buffer_size,
11121 display_cfg->plane_descriptors[k].cursor.cursor_width,
11122 s->cursor_bytes_per_chunk[k],
11123 s->cursor_lines_per_chunk[k],
11124 s->line_times[k],
11125 mode_lib->mp.UrgentLatency,
11126
11127 // output
11128 &mode_lib->mp.UrgentBurstFactorCursor[k],
11129 &cursor_not_enough_urgent_latency_hiding);
11130 }
11131 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
11132
11133 CalculateUrgentBurstFactor(
11134 &display_cfg->plane_descriptors[k],
11135 mode_lib->mp.swath_width_luma_ub[k],
11136 mode_lib->mp.swath_width_chroma_ub[k],
11137 mode_lib->mp.SwathHeightY[k],
11138 mode_lib->mp.SwathHeightC[k],
11139 s->line_times[k],
11140 mode_lib->mp.UrgentLatency,
11141 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11142 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11143 mode_lib->mp.BytePerPixelInDETY[k],
11144 mode_lib->mp.BytePerPixelInDETC[k],
11145 mode_lib->mp.DETBufferSizeY[k],
11146 mode_lib->mp.DETBufferSizeC[k],
11147
11148 /* output */
11149 &mode_lib->mp.UrgentBurstFactorLuma[k],
11150 &mode_lib->mp.UrgentBurstFactorChroma[k],
11151 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11152
11153 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
11154 }
11155
11156 for (k = 0; k < s->num_active_planes; ++k) {
11157 s->MaxVStartupLines[k] = CalculateMaxVStartup(
11158 mode_lib->ip.ptoi_supported,
11159 mode_lib->ip.vblank_nom_default_us,
11160 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
11161 mode_lib->mp.WritebackDelay[k]);
11162
11163 #ifdef __DML_VBA_DEBUG__
11164 DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11165 DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
11166 #endif
11167 }
11168
11169 s->immediate_flip_required = false;
11170 for (k = 0; k < s->num_active_planes; ++k) {
11171 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
11172 }
11173 #ifdef __DML_VBA_DEBUG__
11174 DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
11175 #endif
11176
11177 if (s->num_active_planes > 1) {
11178 CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes;
11179 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
11180 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11181 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11182 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
11183 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
11184 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY;
11185 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC;
11186 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
11187 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
11188 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY;
11189 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC;
11190 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
11191 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
11192 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
11193 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care
11194 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care
11195 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
11196 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk;
11197 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
11198 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch;
11199
11200 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
11201 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0];
11202 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
11203 CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming
11204 }
11205
11206 {
11207 s->DestinationLineTimesForPrefetchLessThan2 = false;
11208 s->VRatioPrefetchMoreThanMax = false;
11209
11210 DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
11211
11212 for (k = 0; k < s->num_active_planes; ++k) {
11213 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
11214
11215 DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11216 mode_lib->mp.TWait[k] = CalculateTWait(
11217 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
11218 mode_lib->mp.UrgentLatency,
11219 mode_lib->mp.TripToMemory,
11220 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
11221 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
11222
11223 myPipe->Dppclk = mode_lib->mp.Dppclk[k];
11224 myPipe->Dispclk = mode_lib->mp.Dispclk;
11225 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11226 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11227 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
11228 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
11229 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
11230 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
11231 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
11232 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
11233 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
11234 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
11235 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
11236 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
11237 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
11238 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
11239 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
11240 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
11241 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
11242 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
11243 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
11244 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11245 myPipe->ODMMode = mode_lib->mp.ODMMode[k];
11246 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
11247 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
11248 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
11249 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11250
11251 #ifdef __DML_VBA_DEBUG__
11252 DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
11253 #endif
11254 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
11255 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
11256 CalculatePrefetchSchedule_params->myPipe = myPipe;
11257 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
11258 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
11259 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
11260 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
11261 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
11262 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
11263 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
11264 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
11265 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
11266 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
11267 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
11268 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
11269 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
11270 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
11271 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
11272 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency;
11273 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
11274 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
11275 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
11276 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
11277 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
11278 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
11279 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
11280 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
11281 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
11282 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
11283 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
11284 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
11285 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
11286 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
11287 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
11288 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
11289 CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency;
11290 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
11291 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
11292 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
11293 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
11294 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
11295 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
11296 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
11297 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
11298 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11299 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11300 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
11301 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
11302 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
11303 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k];
11304 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k];
11305
11306 // output
11307 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
11308 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
11309 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
11310 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
11311 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
11312 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
11313 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
11314 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
11315 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
11316 CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k];
11317 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
11318 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
11319 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
11320 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
11321 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
11322 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
11323 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
11324 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
11325 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
11326 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
11327 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
11328 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
11329 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
11330 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
11331 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
11332 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
11333 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
11334 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
11335 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
11336 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
11337 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0];
11338
11339 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
11340
11341 if (s->impacted_dst_y_pre[k] > 0)
11342 mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k];
11343 else
11344 mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
11345
11346 #ifdef __DML_VBA_DEBUG__
11347 DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11348 #endif
11349 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
11350 } // for k
11351
11352 mode_lib->mp.PrefetchModeSupported = true;
11353 for (k = 0; k < s->num_active_planes; ++k) {
11354 if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
11355 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
11356 mode_lib->mp.DSTYAfterScaler[k] > 8) {
11357 DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11358 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
11359 DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
11360 mode_lib->mp.PrefetchModeSupported = false;
11361 }
11362 if (mode_lib->mp.dst_y_prefetch[k] < 2)
11363 s->DestinationLineTimesForPrefetchLessThan2 = true;
11364
11365 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
11366 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
11367 s->VRatioPrefetchMoreThanMax = true;
11368 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11369 DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11370 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11371 }
11372
11373 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
11374 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11375 mode_lib->mp.PrefetchModeSupported = false;
11376 }
11377 }
11378
11379 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
11380 DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11381 DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
11382 mode_lib->mp.PrefetchModeSupported = false;
11383 }
11384
11385 DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
11386 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
11387
11388 // Prefetch schedule OK, now check prefetch bw
11389 if (mode_lib->mp.PrefetchModeSupported == true) {
11390 for (k = 0; k < s->num_active_planes; ++k) {
11391 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11392 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11393 CalculateUrgentBurstFactor(
11394 &display_cfg->plane_descriptors[k],
11395 mode_lib->mp.swath_width_luma_ub[k],
11396 mode_lib->mp.swath_width_chroma_ub[k],
11397 mode_lib->mp.SwathHeightY[k],
11398 mode_lib->mp.SwathHeightC[k],
11399 line_time_us,
11400 mode_lib->mp.UrgentLatency,
11401 mode_lib->mp.VRatioPrefetchY[k],
11402 mode_lib->mp.VRatioPrefetchC[k],
11403 mode_lib->mp.BytePerPixelInDETY[k],
11404 mode_lib->mp.BytePerPixelInDETC[k],
11405 mode_lib->mp.DETBufferSizeY[k],
11406 mode_lib->mp.DETBufferSizeC[k],
11407 /* Output */
11408 &mode_lib->mp.UrgentBurstFactorLumaPre[k],
11409 &mode_lib->mp.UrgentBurstFactorChromaPre[k],
11410 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11411
11412 #ifdef __DML_VBA_DEBUG__
11413 DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
11414 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
11415 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
11416 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
11417 DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
11418
11419 DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
11420 DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
11421
11422 DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
11423 DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11424 DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11425 DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
11426 DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
11427 DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
11428 DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
11429 DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
11430 DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
11431 #endif
11432 }
11433
11434 for (k = 0; k <= s->num_active_planes - 1; k++)
11435 mode_lib->mp.final_flip_bw[k] = 0;
11436
11437 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required;
11438 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required;
11439 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual;
11440 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required;
11441 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11442 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11443
11444 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11445 calculate_peak_bandwidth_params->inc_flip_bw = 0;
11446 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11447 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11448 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11449 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11450 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11451 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11452 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11453 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11454
11455 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11456 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11457 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11458 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11459 calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0];
11460 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11461 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11462 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11463 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11464 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11465 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11466 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11467 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11468 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11469 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11470 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11471 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11472 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11473 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11474
11475 calculate_peak_bandwidth_required(
11476 &mode_lib->scratch,
11477 calculate_peak_bandwidth_params);
11478
11479 // Check urg peak bandwidth against available urg bw
11480 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
11481 check_urgent_bandwidth_support(
11482 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
11483 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
11484 &s->dummy_boolean[1], // vactive bw ok
11485 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
11486
11487 mode_lib->soc.mall_allocated_for_dcn_mbytes,
11488 mode_lib->mp.non_urg_bandwidth_required,
11489 mode_lib->mp.urg_vactive_bandwidth_required,
11490 mode_lib->mp.urg_bandwidth_required,
11491 mode_lib->mp.urg_bandwidth_available);
11492
11493 if (!mode_lib->mp.PrefetchModeSupported)
11494 DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
11495
11496 for (k = 0; k < s->num_active_planes; ++k) {
11497 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
11498 DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11499 mode_lib->mp.PrefetchModeSupported = false;
11500 }
11501 }
11502 } // prefetch schedule ok
11503
11504 // Prefetch schedule and prefetch bw ok, now check flip bw
11505 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
11506
11507 mode_lib->mp.BandwidthAvailableForImmediateFlip =
11508 get_bandwidth_available_for_immediate_flip(
11509 dml2_core_internal_soc_state_sys_active,
11510 mode_lib->mp.urg_bandwidth_required_qual, // no flip
11511 mode_lib->mp.urg_bandwidth_available);
11512 mode_lib->mp.TotImmediateFlipBytes = 0;
11513 for (k = 0; k < s->num_active_planes; ++k) {
11514 if (display_cfg->plane_descriptors[k].immediate_flip) {
11515 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor,
11516 mode_lib->mp.vm_bytes[k],
11517 mode_lib->mp.PixelPTEBytesPerRow[k],
11518 mode_lib->mp.meta_row_bytes[k]);
11519 } else {
11520 s->per_pipe_flip_bytes[k] = 0;
11521 }
11522 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
11523 #ifdef __DML_VBA_DEBUG__
11524 DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k);
11525 DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
11526 DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
11527 DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
11528 DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
11529 DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
11530 #endif
11531 }
11532 for (k = 0; k < s->num_active_planes; ++k) {
11533 CalculateFlipSchedule(
11534 &mode_lib->scratch,
11535 display_cfg->plane_descriptors[k].immediate_flip,
11536 0, // use_lb_flip_bw
11537 s->HostVMInefficiencyFactor,
11538 s->Tvm_trips_flip[k],
11539 s->Tr0_trips_flip[k],
11540 s->Tvm_trips_flip_rounded[k],
11541 s->Tr0_trips_flip_rounded[k],
11542 display_cfg->gpuvm_enable,
11543 mode_lib->mp.vm_bytes[k],
11544 mode_lib->mp.PixelPTEBytesPerRow[k],
11545 mode_lib->mp.BandwidthAvailableForImmediateFlip,
11546 mode_lib->mp.TotImmediateFlipBytes,
11547 display_cfg->plane_descriptors[k].pixel_format,
11548 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
11549 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11550 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11551 mode_lib->mp.Tno_bw[k],
11552 mode_lib->mp.dpte_row_height[k],
11553 mode_lib->mp.dpte_row_height_chroma[k],
11554 mode_lib->mp.use_one_row_for_frame_flip[k],
11555 mode_lib->ip.max_flip_time_us,
11556 mode_lib->ip.max_flip_time_lines,
11557 s->per_pipe_flip_bytes[k],
11558 mode_lib->mp.meta_row_bytes[k],
11559 mode_lib->mp.meta_row_height[k],
11560 mode_lib->mp.meta_row_height_chroma[k],
11561 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
11562
11563 // Output
11564 &mode_lib->mp.dst_y_per_vm_flip[k],
11565 &mode_lib->mp.dst_y_per_row_flip[k],
11566 &mode_lib->mp.final_flip_bw[k],
11567 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
11568 }
11569
11570 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
11571 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip;
11572 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
11573 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip;
11574 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11575 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11576
11577 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11578 calculate_peak_bandwidth_params->inc_flip_bw = 1;
11579 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11580 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11581 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11582 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11583 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11584 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11585 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11586 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11587
11588 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11589 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11590 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11591 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11592 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11593 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11594 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11595 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11596 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11597 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11598 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11599 calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0];
11600 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11601 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11602 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11603 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11604 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11605 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11606 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11607
11608 calculate_peak_bandwidth_required(
11609 &mode_lib->scratch,
11610 calculate_peak_bandwidth_params);
11611
11612 calculate_immediate_flip_bandwidth_support(
11613 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
11614 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
11615
11616 dml2_core_internal_soc_state_sys_active,
11617 mode_lib->mp.urg_bandwidth_required_flip,
11618 mode_lib->mp.non_urg_bandwidth_required_flip,
11619 mode_lib->mp.urg_bandwidth_available);
11620
11621 if (!mode_lib->mp.ImmediateFlipSupported)
11622 DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__);
11623
11624 for (k = 0; k < s->num_active_planes; ++k) {
11625 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
11626 mode_lib->mp.ImmediateFlipSupported = false;
11627 #ifdef __DML_VBA_DEBUG__
11628 DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
11629 #endif
11630 }
11631 }
11632 } else { // flip or prefetch not support
11633 mode_lib->mp.ImmediateFlipSupported = false;
11634 }
11635
11636 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
11637 must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
11638 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
11639
11640 #ifdef __DML_VBA_DEBUG__
11641 DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
11642 for (k = 0; k < s->num_active_planes; ++k)
11643 DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
11644 DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
11645 DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
11646 DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
11647 #endif
11648 DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
11649 }
11650
11651 for (k = 0; k < s->num_active_planes; ++k)
11652 DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11653
11654 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
11655 DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
11656 } else {
11657 DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
11658
11659 // DCC Configuration
11660 for (k = 0; k < s->num_active_planes; ++k) {
11661 #ifdef __DML_VBA_DEBUG__
11662 DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
11663 #endif
11664 CalculateDCCConfiguration(
11665 display_cfg->plane_descriptors[k].surface.dcc.enable,
11666 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
11667 display_cfg->plane_descriptors[k].pixel_format,
11668 display_cfg->plane_descriptors[k].surface.plane0.width,
11669 display_cfg->plane_descriptors[k].surface.plane1.width,
11670 display_cfg->plane_descriptors[k].surface.plane0.height,
11671 display_cfg->plane_descriptors[k].surface.plane1.height,
11672 s->NomDETInKByte,
11673 mode_lib->mp.Read256BlockHeightY[k],
11674 mode_lib->mp.Read256BlockHeightC[k],
11675 display_cfg->plane_descriptors[k].surface.tiling,
11676 mode_lib->mp.BytePerPixelY[k],
11677 mode_lib->mp.BytePerPixelC[k],
11678 mode_lib->mp.BytePerPixelInDETY[k],
11679 mode_lib->mp.BytePerPixelInDETC[k],
11680 display_cfg->plane_descriptors[k].composition.rotation_angle,
11681
11682 /* Output */
11683 &mode_lib->mp.RequestLuma[k],
11684 &mode_lib->mp.RequestChroma[k],
11685 &mode_lib->mp.DCCYMaxUncompressedBlock[k],
11686 &mode_lib->mp.DCCCMaxUncompressedBlock[k],
11687 &mode_lib->mp.DCCYMaxCompressedBlock[k],
11688 &mode_lib->mp.DCCCMaxCompressedBlock[k],
11689 &mode_lib->mp.DCCYIndependentBlock[k],
11690 &mode_lib->mp.DCCCIndependentBlock[k]);
11691 }
11692
11693 //Watermarks and NB P-State/DRAM Clock Change Support
11694 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
11695 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
11696 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
11697 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
11698 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11699 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
11700 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11701 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
11702 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11703 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
11704 s->mmSOCParameters.USRRetrainingLatency = 0;
11705 s->mmSOCParameters.SMNLatency = 0;
11706 s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
11707 s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index);
11708 s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock;
11709 s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
11710
11711 CalculateWatermarks_params->display_cfg = display_cfg;
11712 CalculateWatermarks_params->USRRetrainingRequired = false;
11713 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
11714 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
11715 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
11716 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
11717 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
11718 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11719 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
11720 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11721 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
11722 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
11723 CalculateWatermarks_params->SOCCLK = s->SOCCLK;
11724 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11725 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11726 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
11727 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11728 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11729 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11730 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
11731 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11732 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
11733 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
11734 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
11735 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11736 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11737 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11738 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11739 CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11740
11741 // Output
11742 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
11743 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
11744 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
11745 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
11746 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
11747 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
11748 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
11749 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
11750 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
11751 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support;
11752 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
11753 CalculateWatermarks_params->VActiveLatencyHidingUs = 0;
11754
11755 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
11756
11757 for (k = 0; k < s->num_active_planes; ++k) {
11758 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
11759 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11760 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
11761 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11762 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
11763 } else {
11764 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
11765 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
11766 }
11767 }
11768
11769 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
11770
11771 DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
11772 DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
11773
11774 //Display Pipeline Delivery Time in Prefetch, Groups
11775 CalculatePixelDeliveryTimes(
11776 display_cfg,
11777 cfg_support_info,
11778 s->num_active_planes,
11779 mode_lib->mp.VRatioPrefetchY,
11780 mode_lib->mp.VRatioPrefetchC,
11781 mode_lib->mp.swath_width_luma_ub,
11782 mode_lib->mp.swath_width_chroma_ub,
11783 mode_lib->mp.PSCL_THROUGHPUT,
11784 mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
11785 mode_lib->mp.Dppclk,
11786 mode_lib->mp.BytePerPixelC,
11787 mode_lib->mp.req_per_swath_ub_l,
11788 mode_lib->mp.req_per_swath_ub_c,
11789
11790 /* Output */
11791 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
11792 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
11793 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
11794 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
11795 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
11796 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
11797 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
11798 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
11799
11800 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
11801 CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
11802 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
11803 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
11804 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
11805 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
11806 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11807 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
11808 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
11809 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
11810 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11811 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
11812 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
11813 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
11814 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
11815 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
11816 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
11817 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
11818 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
11819 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
11820 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11821
11822 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
11823 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
11824 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
11825 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
11826 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
11827 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
11828 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
11829 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
11830 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
11831 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
11832
11833 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
11834 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
11835 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
11836 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
11837 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
11838 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
11839 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
11840 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
11841 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
11842 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
11843 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
11844 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
11845 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
11846 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
11847 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
11848 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
11849 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
11850
11851 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
11852
11853 CalculateVMGroupAndRequestTimes(
11854 display_cfg,
11855 s->num_active_planes,
11856 mode_lib->mp.BytePerPixelC,
11857 mode_lib->mp.dst_y_per_vm_vblank,
11858 mode_lib->mp.dst_y_per_vm_flip,
11859 mode_lib->mp.dpte_row_width_luma_ub,
11860 mode_lib->mp.dpte_row_width_chroma_ub,
11861 mode_lib->mp.vm_group_bytes,
11862 mode_lib->mp.dpde0_bytes_per_frame_ub_l,
11863 mode_lib->mp.dpde0_bytes_per_frame_ub_c,
11864 s->tdlut_pte_bytes_per_frame,
11865 mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
11866 mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
11867 mode_lib->ip.dcn_mrq_present,
11868
11869 /* Output */
11870 mode_lib->mp.TimePerVMGroupVBlank,
11871 mode_lib->mp.TimePerVMGroupFlip,
11872 mode_lib->mp.TimePerVMRequestVBlank,
11873 mode_lib->mp.TimePerVMRequestFlip);
11874
11875 // VStartup Adjustment
11876 for (k = 0; k < s->num_active_planes; ++k) {
11877 bool isInterlaceTiming;
11878
11879 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
11880 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
11881 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
11882
11883 #ifdef __DML_VBA_DEBUG__
11884 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11885 #endif
11886 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11887 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
11888
11889 #ifdef __DML_VBA_DEBUG__
11890 DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
11891 DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11892 DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11893 #endif
11894
11895 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
11896 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
11897 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
11898 }
11899
11900 isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
11901
11902 // The actual positioning of the vstartup
11903 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
11904
11905 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
11906 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11907 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
11908 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
11909
11910 if (s->blank_lines_remaining < 0) {
11911 DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n");
11912 s->blank_lines_remaining = 0;
11913 DML_ASSERT(0);
11914 }
11915 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
11916
11917 // debug only
11918 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
11919 (isInterlaceTiming ?
11920 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
11921 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
11922 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
11923 } else {
11924 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
11925 }
11926 #ifdef __DML_VBA_DEBUG__
11927 DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
11928 DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
11929 DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
11930 DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
11931 DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
11932 DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
11933 DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
11934 DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
11935 DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11936 DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
11937 DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
11938 DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
11939 #endif
11940 }
11941
11942 //Maximum Bandwidth Used
11943 mode_lib->mp.TotalWRBandwidth = 0;
11944 for (k = 0; k < display_cfg->num_streams; ++k) {
11945 s->WRBandwidth = 0;
11946 if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) {
11947 s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height
11948 * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width /
11949 (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height
11950 / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000))
11951 * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0);
11952 mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth;
11953 }
11954 }
11955
11956 mode_lib->mp.TotalDataReadBandwidth = 0;
11957 for (k = 0; k < s->num_active_planes; ++k) {
11958 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
11959 #ifdef __DML_VBA_DEBUG__
11960 DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
11961 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11962 DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11963 #endif
11964 }
11965
11966 CalculateStutterEfficiency_params->display_cfg = display_cfg;
11967 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11968 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11969 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
11970 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
11971 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
11972 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
11973 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
11974 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
11975 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
11976 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
11977 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
11978 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
11979 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11980 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11981 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11982 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
11983 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
11984 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11985 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
11986 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11987 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11988 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11989 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11990 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11991 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11992 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11993 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
11994 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
11995 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
11996 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
11997 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
11998 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
11999 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l;
12000 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c;
12001 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
12002 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
12003 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
12004
12005 // output
12006 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
12007 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
12008 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
12009 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12010 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
12011 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12012 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
12013 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
12014
12015 // Stutter Efficiency
12016 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12017
12018 #ifdef __DML_VBA_ALLOW_DELTA__
12019 // Calculate z8 stutter eff assuming 0 reserved space
12020 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
12021 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
12022
12023 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
12024 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
12025 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
12026 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
12027
12028 // Stutter Efficiency
12029 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12030 #else
12031 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12032 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
12033 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12034 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
12035 #endif
12036 } // PrefetchAndImmediateFlipSupported
12037
12038 max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
12039 min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
12040 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
12041 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
12042 DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
12043
12044 #ifdef __DML_VBA_DEBUG__
12045 DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
12046 DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
12047 DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
12048 DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles);
12049 DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles);
12050 DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
12051 DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
12052 DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__);
12053 #endif
12054 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
12055 }
12056
dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex * in_out_params)12057 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
12058 {
12059 DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
12060 bool result = dml_core_mode_programming(in_out_params);
12061
12062 DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result);
12063 DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
12064 return result;
12065 }
12066
dml2_core_calcs_get_dpte_row_height(unsigned int * dpte_row_height,struct dml2_core_internal_display_mode_lib * mode_lib,bool is_plane1,enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,enum dml2_rotation_angle ScanDirection,unsigned int pitch,unsigned int GPUVMMinPageSizeKBytes)12067 void dml2_core_calcs_get_dpte_row_height(
12068 unsigned int *dpte_row_height,
12069 struct dml2_core_internal_display_mode_lib *mode_lib,
12070 bool is_plane1,
12071 enum dml2_source_format_class SourcePixelFormat,
12072 enum dml2_swizzle_mode SurfaceTiling,
12073 enum dml2_rotation_angle ScanDirection,
12074 unsigned int pitch,
12075 unsigned int GPUVMMinPageSizeKBytes)
12076 {
12077 unsigned int BytePerPixelY;
12078 unsigned int BytePerPixelC;
12079 double BytePerPixelInDETY;
12080 double BytePerPixelInDETC;
12081 unsigned int BlockHeight256BytesY;
12082 unsigned int BlockHeight256BytesC;
12083 unsigned int BlockWidth256BytesY;
12084 unsigned int BlockWidth256BytesC;
12085 unsigned int MacroTileWidthY;
12086 unsigned int MacroTileWidthC;
12087 unsigned int MacroTileHeightY;
12088 unsigned int MacroTileHeightC;
12089 bool surf_linear_128_l = false;
12090 bool surf_linear_128_c = false;
12091
12092 CalculateBytePerPixelAndBlockSizes(
12093 SourcePixelFormat,
12094 SurfaceTiling,
12095 pitch,
12096 pitch,
12097
12098 /* Output */
12099 &BytePerPixelY,
12100 &BytePerPixelC,
12101 &BytePerPixelInDETY,
12102 &BytePerPixelInDETC,
12103 &BlockHeight256BytesY,
12104 &BlockHeight256BytesC,
12105 &BlockWidth256BytesY,
12106 &BlockWidth256BytesC,
12107 &MacroTileHeightY,
12108 &MacroTileHeightC,
12109 &MacroTileWidthY,
12110 &MacroTileWidthC,
12111 &surf_linear_128_l,
12112 &surf_linear_128_c);
12113
12114 unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
12115 unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
12116 unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
12117 unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
12118 unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
12119 unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
12120 #ifdef __DML_VBA_DEBUG__
12121 DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
12122 DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
12123 DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
12124 DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
12125 DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
12126 DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
12127 DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
12128 DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
12129 DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
12130 DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
12131 #endif
12132 unsigned int dummy_integer[21];
12133
12134 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0;
12135 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0;
12136 mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1;
12137 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes;
12138 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes;
12139 mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat;
12140 mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling;
12141 mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel;
12142 mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection;
12143 mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0;
12144 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0;
12145 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0;
12146 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0;
12147 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1;
12148 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4;
12149 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes;
12150 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests;
12151 mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch;
12152 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth;
12153 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight;
12154 mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0;
12155 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0;
12156 mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0;
12157
12158 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1];
12159 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2];
12160 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3];
12161 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height;
12162 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4];
12163 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5];
12164 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6];
12165 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7];
12166 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8];
12167 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9];
12168 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11];
12169 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12];
12170 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13];
12171 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14];
12172
12173 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15];
12174 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16];
12175 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17];
12176 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18];
12177 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19];
12178 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20];
12179
12180 // just supply with enough parameters to calculate dpte
12181 CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
12182
12183 #ifdef __DML_VBA_DEBUG__
12184 DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
12185 #endif
12186 }
12187
is_dual_plane(enum dml2_source_format_class source_format)12188 static bool is_dual_plane(enum dml2_source_format_class source_format)
12189 {
12190 bool ret_val = false;
12191
12192 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
12193 ret_val = true;
12194
12195 return ret_val;
12196 }
12197
dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12198 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
12199 {
12200 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
12201 return plane_idx;
12202 }
12203
rq_dlg_get_wm_regs(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * wm_regs)12204 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
12205 {
12206 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12207
12208 wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
12209 wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
12210 wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
12211 wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
12212 wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
12213 wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
12214 wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
12215 wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
12216 wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
12217 wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz);
12218 wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz);
12219 wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
12220 wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
12221 wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
12222 }
12223
log_and_substract_if_non_zero(unsigned int a,unsigned int subtrahend)12224 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
12225 {
12226 if (a == 0)
12227 return 0;
12228
12229 return (math_log2_approx(a) - subtrahend);
12230 }
12231
dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs * cursor_dlg_regs,const struct dml2_get_cursor_dlg_reg * p)12232 void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
12233 {
12234 int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
12235 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
12236 cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
12237
12238 #ifdef __DML_VBA_DEBUG__
12239 DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
12240 DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
12241 DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
12242 DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
12243 DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
12244 #endif
12245
12246 cursor_dlg_regs->chunk_hdl_adjust = 3;
12247 cursor_dlg_regs->dst_y_offset = 0;
12248
12249 cursor_dlg_regs->qos_level_fixed = 8;
12250 cursor_dlg_regs->qos_ramp_disable = 0;
12251 }
12252
rq_dlg_get_rq_reg(struct dml2_display_rq_regs * rq_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12253 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
12254 const struct dml2_display_cfg *display_cfg,
12255 const struct dml2_core_internal_display_mode_lib *mode_lib,
12256 unsigned int pipe_idx)
12257 {
12258 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12259 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
12260 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
12261 bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format));
12262
12263 unsigned int pixel_chunk_bytes = 0;
12264 unsigned int min_pixel_chunk_bytes = 0;
12265 unsigned int dpte_group_bytes = 0;
12266 unsigned int mpte_group_bytes = 0;
12267
12268 unsigned int p1_pixel_chunk_bytes = 0;
12269 unsigned int p1_min_pixel_chunk_bytes = 0;
12270 unsigned int p1_dpte_group_bytes = 0;
12271 unsigned int p1_mpte_group_bytes = 0;
12272
12273 unsigned int detile_buf_plane1_addr = 0;
12274 unsigned int detile_buf_size_in_bytes;
12275 double stored_swath_l_bytes;
12276 double stored_swath_c_bytes;
12277 bool is_phantom_pipe;
12278
12279 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
12280
12281 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
12282 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
12283
12284 if (pixel_chunk_bytes == 64 * 1024)
12285 min_pixel_chunk_bytes = 0;
12286
12287 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
12288 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
12289
12290 p1_pixel_chunk_bytes = pixel_chunk_bytes;
12291 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
12292 p1_dpte_group_bytes = dpte_group_bytes;
12293 p1_mpte_group_bytes = mpte_group_bytes;
12294
12295 if (source_format == dml2_rgbe_alpha)
12296 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
12297
12298 rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib);
12299 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
12300 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
12301
12302 if (min_pixel_chunk_bytes == 0)
12303 rq_regs->rq_regs_l.min_chunk_size = 0;
12304 else
12305 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
12306
12307 if (p1_min_pixel_chunk_bytes == 0)
12308 rq_regs->rq_regs_c.min_chunk_size = 0;
12309 else
12310 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
12311
12312 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
12313 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
12314 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
12315 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
12316
12317 detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
12318
12319 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
12320 unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
12321 #ifdef __DML_VBA_DEBUG__
12322 DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
12323 #endif
12324 DML_ASSERT(p0_pte_row_height_linear >= 8);
12325
12326 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
12327 if (dual_plane) {
12328 unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
12329
12330 #ifdef __DML_VBA_DEBUG__
12331 DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
12332 #endif
12333 if (sw_mode == dml2_sw_linear) {
12334 DML_ASSERT(p1_pte_row_height_linear >= 8);
12335 }
12336 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
12337 }
12338 } else {
12339 rq_regs->rq_regs_l.pte_row_height_linear = 0;
12340 rq_regs->rq_regs_c.pte_row_height_linear = 0;
12341 }
12342
12343 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0);
12344 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0);
12345
12346 // FIXME_DCN4, programming guide has dGPU condition
12347 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
12348 rq_regs->drq_expansion_mode = 0;
12349 } else {
12350 rq_regs->drq_expansion_mode = 2;
12351 }
12352 rq_regs->prq_expansion_mode = 1;
12353 rq_regs->crq_expansion_mode = 1;
12354 rq_regs->mrq_expansion_mode = 1;
12355
12356 stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
12357 stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
12358 is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
12359
12360 // Note: detile_buf_plane1_addr is in unit of 1KB
12361 if (dual_plane) {
12362 if (is_phantom_pipe) {
12363 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
12364 } else {
12365 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
12366 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
12367 #ifdef __DML_VBA_DEBUG__
12368 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
12369 #endif
12370 } else {
12371 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
12372 #ifdef __DML_VBA_DEBUG__
12373 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
12374 #endif
12375 }
12376 }
12377 }
12378 rq_regs->plane1_base_address = detile_buf_plane1_addr;
12379
12380 #ifdef __DML_VBA_DEBUG__
12381 DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
12382 DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
12383 DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
12384 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
12385 DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
12386 DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
12387 #endif
12388 //DML_LOG_VERBOSE_rq_regs_st(rq_regs);
12389 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12390 }
12391
rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch * s,struct dml2_display_dlg_regs * disp_dlg_regs,struct dml2_display_ttu_regs * disp_ttu_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,const unsigned int pipe_idx)12392 static void rq_dlg_get_dlg_reg(
12393 struct dml2_core_internal_scratch *s,
12394 struct dml2_display_dlg_regs *disp_dlg_regs,
12395 struct dml2_display_ttu_regs *disp_ttu_regs,
12396 const struct dml2_display_cfg *display_cfg,
12397 const struct dml2_core_internal_display_mode_lib *mode_lib,
12398 const unsigned int pipe_idx)
12399 {
12400 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
12401
12402 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
12403
12404 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
12405
12406 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12407 DML_ASSERT(l->plane_idx < DML2_MAX_PLANES);
12408
12409 l->source_format = dml2_444_8;
12410 l->odm_mode = dml2_odm_mode_bypass;
12411 l->dual_plane = false;
12412 l->htotal = 0;
12413 l->hactive = 0;
12414 l->hblank_end = 0;
12415 l->vblank_end = 0;
12416 l->interlaced = false;
12417 l->pclk_freq_in_mhz = 0.0;
12418 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12419 l->ref_freq_to_pix_freq = 0.0;
12420
12421 if (l->plane_idx < DML2_MAX_PLANES) {
12422
12423 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
12424 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
12425 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
12426
12427 l->dual_plane = is_dual_plane(l->source_format);
12428
12429 l->htotal = l->timing->h_total;
12430 l->hactive = l->timing->h_active;
12431 l->hblank_end = l->timing->h_blank_end;
12432 l->vblank_end = l->timing->v_blank_end;
12433 l->interlaced = l->timing->interlaced;
12434 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
12435 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
12436
12437 DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
12438 DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
12439 DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
12440 DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
12441 DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz);
12442 DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
12443 DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12444 DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
12445
12446 DML_ASSERT(l->refclk_freq_in_mhz != 0);
12447 DML_ASSERT(l->pclk_freq_in_mhz != 0);
12448 DML_ASSERT(l->ref_freq_to_pix_freq < 4.0);
12449
12450 // Need to figure out which side of odm combine we're in
12451 // Assume the pipe instance under the same plane is in order
12452
12453 if (l->odm_mode == dml2_odm_mode_bypass) {
12454 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
12455 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
12456 // find out how many pipe are in this plane
12457 l->num_active_pipes = mode_lib->mp.num_active_pipes;
12458 l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
12459 l->pipe_idx_in_combine = 0; // pipe index within the plane
12460 l->odm_combine_factor = 2;
12461
12462 if (l->odm_mode == dml2_odm_mode_combine_3to1)
12463 l->odm_combine_factor = 3;
12464 else if (l->odm_mode == dml2_odm_mode_combine_4to1)
12465 l->odm_combine_factor = 4;
12466
12467 for (unsigned int i = 0; i < l->num_active_pipes; i++) {
12468 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
12469 if (i < l->first_pipe_idx_in_plane) {
12470 l->first_pipe_idx_in_plane = i;
12471 }
12472 }
12473 }
12474 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
12475
12476 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
12477 DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
12478 DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
12479 DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
12480 DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
12481 }
12482 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
12483
12484 DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
12485
12486 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
12487 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
12488 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
12489
12490 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
12491 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
12492
12493 DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
12494 DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
12495 DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12496
12497 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
12498 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
12499
12500 DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
12501
12502 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12503 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12504
12505 DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
12506 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
12507
12508 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
12509 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12510 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12511 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12512 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12513
12514 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
12515 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
12516 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
12517 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
12518 DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
12519
12520 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
12521 DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
12522 }
12523
12524 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
12525 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
12526
12527 DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
12528 DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
12529
12530 // Active
12531 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12532 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12533
12534 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
12535 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
12536
12537 l->refcyc_per_line_delivery_pre_c = 0.0;
12538 l->refcyc_per_line_delivery_c = 0.0;
12539
12540 if (l->dual_plane) {
12541 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12542 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12543
12544 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
12545 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
12546 }
12547
12548 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12549 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12550
12551 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12552 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12553
12554 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
12555 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
12556
12557 l->refcyc_per_req_delivery_pre_c = 0.0;
12558 l->refcyc_per_req_delivery_c = 0.0;
12559 if (l->dual_plane) {
12560 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12561 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12562
12563 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
12564 DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
12565 }
12566
12567 // TTU - Cursor
12568 DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
12569
12570 // Assign to register structures
12571 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
12572 DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
12573
12574 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
12575 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
12576 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
12577 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
12578 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
12579 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
12580 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
12581
12582 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
12583 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
12584
12585 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
12586 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
12587 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
12588 DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
12589
12590 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12591 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12592 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12593 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12594
12595 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12596 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12597 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12598 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12599 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12600 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12601 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12602 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12603 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12604
12605 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
12606 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
12607
12608 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
12609 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
12610 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
12611 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
12612 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
12613 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
12614 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
12615 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
12616 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
12617 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
12618
12619 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12620 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12621 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12622 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12623 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12624 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12625 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12626 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12627
12628 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
12629 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
12630 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
12631 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
12632 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
12633 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
12634 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
12635 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
12636
12637 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
12638 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
12639
12640 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
12641 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
12642 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
12643 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
12644 disp_ttu_regs->qos_level_low_wm = 0;
12645
12646 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
12647
12648 disp_ttu_regs->qos_level_flip = 14;
12649 disp_ttu_regs->qos_level_fixed_l = 8;
12650 disp_ttu_regs->qos_level_fixed_c = 8;
12651 disp_ttu_regs->qos_ramp_disable_l = 0;
12652 disp_ttu_regs->qos_ramp_disable_c = 0;
12653 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
12654
12655 // CHECK for HW registers' range, DML_ASSERT or clamp
12656 DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
12657 DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
12658 DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
12659 DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
12660 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
12661 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
12662
12663 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
12664 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
12665
12666 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
12667 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
12668
12669 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
12670 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
12671
12672
12673 DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
12674 DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
12675
12676 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
12677 DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
12678 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
12679 }
12680 if (l->dual_plane) {
12681 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
12682 DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
12683 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
12684 }
12685 }
12686
12687 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
12688 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
12689 if (l->dual_plane) {
12690 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
12691 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
12692 }
12693 DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
12694 if (l->dual_plane) {
12695 DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
12696 }
12697
12698 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
12699 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
12700 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
12701 DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
12702 DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
12703 DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
12704 DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
12705
12706 DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12707
12708 }
12709 }
12710
rq_dlg_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * arb_param)12711 static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
12712 {
12713 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12714
12715 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
12716 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
12717 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
12718 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
12719 arb_param->sat_level_us = 60;
12720 arb_param->hvm_max_qos_commit_threshold = 0xf;
12721 arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
12722 arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024;
12723 arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
12724 arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib);
12725 arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
12726 arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz);
12727
12728 #ifdef __DML_VBA_DEBUG__
12729 DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
12730 DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
12731 DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
12732 DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
12733 DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
12734 #endif
12735
12736 }
12737
dml2_core_calcs_get_watermarks(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * out)12738 void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
12739 {
12740 rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
12741 }
12742
dml2_core_calcs_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * out)12743 void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
12744 {
12745 rq_dlg_get_arb_params(display_cfg, mode_lib, out);
12746 }
12747
dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg * display_cfg,struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_per_pipe_register_set * out,int pipe_index)12748 void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
12749 struct dml2_core_internal_display_mode_lib *mode_lib,
12750 struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
12751 {
12752 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
12753 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
12754 out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
12755 }
12756
dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,union dml2_global_sync_programming * out,int pipe_index)12757 void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index)
12758 {
12759 out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index);
12760 out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index);
12761 out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index);
12762 out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index);
12763 out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index);
12764 }
12765
dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_per_stream_programming * out,int pipe_index)12766 void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
12767 {
12768 dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index);
12769 }
12770
dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,struct dmub_cmd_fams2_global_config * fams2_global_config)12771 void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12772 const struct display_configuation_with_meta *display_cfg,
12773 struct dmub_cmd_fams2_global_config *fams2_global_config)
12774 {
12775 fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required;
12776
12777 if (fams2_global_config->features.bits.enable) {
12778 fams2_global_config->features.bits.enable_stall_recovery = true;
12779 fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START;
12780
12781 fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us;
12782 fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us;
12783 fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us;
12784 fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us;
12785
12786 fams2_global_config->num_streams = display_cfg->display_config.num_streams;
12787 }
12788 }
12789
dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,union dmub_cmd_fams2_config * fams2_base_programming,union dmub_cmd_fams2_config * fams2_sub_programming,enum dml2_pstate_method pstate_method,int plane_index)12790 void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12791 const struct display_configuation_with_meta *display_cfg,
12792 union dmub_cmd_fams2_config *fams2_base_programming,
12793 union dmub_cmd_fams2_config *fams2_sub_programming,
12794 enum dml2_pstate_method pstate_method,
12795 int plane_index)
12796 {
12797 const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
12798 const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
12799 const struct dml2_pstate_meta *stream_pstate_meta = &display_cfg->stage3.stream_pstate_meta[plane_descriptor->stream_index];
12800
12801 struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base;
12802 union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state;
12803
12804 unsigned int i;
12805
12806 if (display_cfg->display_config.overrides.all_streams_blanked) {
12807 /* stream is blanked, so do nothing */
12808 return;
12809 }
12810
12811 /* from display configuration */
12812 base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
12813 base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
12814 base_programming->vblank_start = (uint16_t)(stream_pstate_meta->nom_vtotal -
12815 stream_descriptor->timing.v_front_porch);
12816 base_programming->vblank_end = (uint16_t)(stream_pstate_meta->nom_vtotal -
12817 stream_descriptor->timing.v_front_porch -
12818 stream_descriptor->timing.v_active);
12819 base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
12820
12821 /* from meta */
12822 base_programming->otg_vline_time_ns =
12823 (unsigned int)(stream_pstate_meta->otg_vline_time_us * 1000.0);
12824 base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_pstate_meta->scheduling_delay_otg_vlines;
12825 base_programming->contention_delay_otg_vlines = (uint8_t)stream_pstate_meta->contention_delay_otg_vlines;
12826 base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_pstate_meta->vertical_interrupt_ack_delay_otg_vlines;
12827 base_programming->drr_keepout_otg_vline = (uint16_t)(stream_pstate_meta->nom_vtotal -
12828 stream_descriptor->timing.v_front_porch -
12829 stream_pstate_meta->method_drr.programming_delay_otg_vlines);
12830 base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_pstate_meta->allow_to_target_delay_otg_vlines;
12831 base_programming->max_vtotal = (uint16_t)stream_pstate_meta->max_vtotal;
12832
12833 /* from core */
12834 base_programming->config.bits.min_ttu_vblank_usable = true;
12835 for (i = 0; i < display_cfg->display_config.num_planes; i++) {
12836 /* check if all planes support p-state in blank */
12837 if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
12838 mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
12839 base_programming->config.bits.min_ttu_vblank_usable = false;
12840 break;
12841 }
12842 }
12843
12844 switch (pstate_method) {
12845 case dml2_pstate_method_vactive:
12846 case dml2_pstate_method_fw_vactive_drr:
12847 /* legacy vactive */
12848 base_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
12849 sub_programming->legacy.vactive_det_fill_delay_otg_vlines =
12850 (uint8_t)stream_pstate_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
12851 base_programming->allow_start_otg_vline =
12852 (uint16_t)stream_pstate_meta->method_vactive.common.allow_start_otg_vline;
12853 base_programming->allow_end_otg_vline =
12854 (uint16_t)stream_pstate_meta->method_vactive.common.allow_end_otg_vline;
12855 base_programming->config.bits.clamp_vtotal_min = true;
12856 break;
12857 case dml2_pstate_method_vblank:
12858 case dml2_pstate_method_fw_vblank_drr:
12859 /* legacy vblank */
12860 base_programming->type = FAMS2_STREAM_TYPE_VBLANK;
12861 base_programming->allow_start_otg_vline =
12862 (uint16_t)stream_pstate_meta->method_vblank.common.allow_start_otg_vline;
12863 base_programming->allow_end_otg_vline =
12864 (uint16_t)stream_pstate_meta->method_vblank.common.allow_end_otg_vline;
12865 base_programming->config.bits.clamp_vtotal_min = true;
12866 break;
12867 case dml2_pstate_method_fw_drr:
12868 /* drr */
12869 base_programming->type = FAMS2_STREAM_TYPE_DRR;
12870 sub_programming->drr.programming_delay_otg_vlines =
12871 (uint8_t)stream_pstate_meta->method_drr.programming_delay_otg_vlines;
12872 sub_programming->drr.nom_stretched_vtotal =
12873 (uint16_t)stream_pstate_meta->method_drr.stretched_vtotal;
12874 base_programming->allow_start_otg_vline =
12875 (uint16_t)stream_pstate_meta->method_drr.common.allow_start_otg_vline;
12876 base_programming->allow_end_otg_vline =
12877 (uint16_t)stream_pstate_meta->method_drr.common.allow_end_otg_vline;
12878 /* drr only clamps to vtotal min for single display */
12879 base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
12880 sub_programming->drr.only_stretch_if_required = true;
12881 break;
12882 case dml2_pstate_method_fw_svp:
12883 case dml2_pstate_method_fw_svp_drr:
12884 /* subvp */
12885 base_programming->type = FAMS2_STREAM_TYPE_SUBVP;
12886 sub_programming->subvp.vratio_numerator =
12887 (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
12888 sub_programming->subvp.vratio_denominator = 1000;
12889 sub_programming->subvp.programming_delay_otg_vlines =
12890 (uint8_t)stream_pstate_meta->method_subvp.programming_delay_otg_vlines;
12891 sub_programming->subvp.prefetch_to_mall_otg_vlines =
12892 (uint8_t)stream_pstate_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
12893 sub_programming->subvp.phantom_vtotal =
12894 (uint16_t)stream_pstate_meta->method_subvp.phantom_vtotal;
12895 sub_programming->subvp.phantom_vactive =
12896 (uint16_t)stream_pstate_meta->method_subvp.phantom_vactive;
12897 sub_programming->subvp.config.bits.is_multi_planar =
12898 plane_descriptor->surface.plane1.height > 0;
12899 sub_programming->subvp.config.bits.is_yuv420 =
12900 plane_descriptor->pixel_format == dml2_420_8 ||
12901 plane_descriptor->pixel_format == dml2_420_10 ||
12902 plane_descriptor->pixel_format == dml2_420_12;
12903
12904 base_programming->allow_start_otg_vline =
12905 (uint16_t)stream_pstate_meta->method_subvp.common.allow_start_otg_vline;
12906 base_programming->allow_end_otg_vline =
12907 (uint16_t)stream_pstate_meta->method_subvp.common.allow_end_otg_vline;
12908 base_programming->config.bits.clamp_vtotal_min = true;
12909 break;
12910 case dml2_pstate_method_reserved_hw:
12911 case dml2_pstate_method_reserved_fw:
12912 case dml2_pstate_method_reserved_fw_drr_clamped:
12913 case dml2_pstate_method_reserved_fw_drr_var:
12914 case dml2_pstate_method_na:
12915 case dml2_pstate_method_count:
12916 default:
12917 /* this should never happen */
12918 break;
12919 }
12920 }
12921
dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_mcache_surface_allocation * out,int plane_idx)12922 void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
12923 {
12924 unsigned int n;
12925
12926 out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx);
12927 out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx);
12928 out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx);
12929 out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx);
12930
12931 for (n = 0; n < out->num_mcaches_plane0; n++)
12932 out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n);
12933
12934 for (n = 0; n < out->num_mcaches_plane1; n++)
12935 out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n);
12936
12937 out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx);
12938 out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx);
12939 out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx);
12940 out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx);
12941 out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx);
12942
12943 out->valid = true;
12944 }
12945
dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int * out,int pipe_index)12946 void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
12947 {
12948 *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index);
12949 }
12950
dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_plane_support_info * out,int plane_idx)12951 void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
12952 {
12953 out->mall_svp_size_requirement_ways = 0;
12954
12955 out->nominal_vblank_pstate_latency_hiding_us =
12956 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
12957 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
12958
12959 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
12960
12961 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
12962
12963 out->vactive_det_fill_delay_us[dml2_pstate_type_uclk] =
12964 (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk][plane_idx]);
12965 }
12966
dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_stream_support_info * out,int plane_index)12967 void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
12968 {
12969 double phantom_processing_delay_pix;
12970 unsigned int phantom_processing_delay_lines;
12971 unsigned int phantom_min_v_active_lines;
12972 unsigned int phantom_v_active_lines;
12973 unsigned int phantom_v_startup_lines;
12974 unsigned int phantom_v_blank_lines;
12975 unsigned int main_v_blank_lines;
12976 unsigned int rem;
12977
12978 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
12979 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
12980 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
12981 dml2_core_div_rem(phantom_processing_delay_pix,
12982 display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total,
12983 &rem);
12984 if (rem)
12985 phantom_processing_delay_lines++;
12986
12987 phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index);
12988 phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) /
12989 display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio);
12990 phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines;
12991
12992 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
12993 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
12994 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
12995 if (phantom_v_blank_lines > main_v_blank_lines)
12996 phantom_v_blank_lines = main_v_blank_lines;
12997
12998 out->phantom_v_active = phantom_v_active_lines;
12999 // phantom_vtotal = vactive + vblank
13000 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
13001
13002 out->phantom_min_v_active = phantom_min_v_active_lines;
13003 out->phantom_v_startup = phantom_v_startup_lines;
13004
13005 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
13006 #if defined(__DML_VBA_DEBUG__)
13007 DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
13008 DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
13009 DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
13010 DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us);
13011 #endif
13012 }
13013
dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_cfg_programming * out)13014 void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
13015 {
13016 unsigned int k, n;
13017
13018 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
13019 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
13020 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
13021 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
13022 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
13023 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
13024 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false;
13025 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
13026 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
13027 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
13028 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
13029 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
13030 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
13031 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
13032 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
13033 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
13034 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
13035 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
13036 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
13037 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
13038 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
13039 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
13040 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
13041 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
13042 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
13043 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
13044 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
13045 out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.global_temp_read_or_ppt_supported;
13046 out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support;
13047
13048 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
13049 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
13050 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
13051 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
13052 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
13053 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
13054 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
13055
13056 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
13057 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
13058 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
13059 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
13060 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
13061
13062 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
13063 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
13064 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
13065
13066 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
13067 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
13068 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
13069 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
13070 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
13071 out->informative.mode_support_info.NumberOfTDLUT33cubeSupport = mode_lib->ms.support.NumberOfTDLUT33cubeSupport;
13072 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
13073 out->informative.mode_support_info.qos_bandwidth_support = mode_lib->ms.support.qos_bandwidth_support;
13074 out->informative.mode_support_info.dcfclk_support = mode_lib->ms.support.dcfclk_support;
13075
13076 for (k = 0; k < out->display_config.num_planes; k++) {
13077
13078 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
13079 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
13080 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
13081 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
13082 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
13083 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
13084 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
13085 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
13086
13087 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
13088 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
13089 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
13090 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
13091 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
13092 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
13093 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
13094 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
13095 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
13096 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
13097 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
13098 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
13099
13100 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
13101 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
13102 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
13103 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
13104 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
13105 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
13106 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
13107 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
13108 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
13109 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
13110 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
13111 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
13112 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
13113 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
13114 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
13115 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
13116 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
13117 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
13118 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
13119 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
13120 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
13121 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
13122 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
13123 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
13124 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
13125 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
13126 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4)
13127 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4;
13128 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4)
13129 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4;
13130
13131 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
13132 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
13133 }
13134
13135 out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib);
13136 out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib);
13137 out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib);
13138 out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib);
13139
13140 out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib);
13141 out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib);
13142 out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib);
13143 out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib);
13144 out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
13145 out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
13146 out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
13147 out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib);
13148
13149 out->informative.mall.total_surface_size_in_mall_bytes = 0;
13150 out->informative.dpp.total_num_dpps_required = 0;
13151 for (k = 0; k < out->display_config.num_planes; ++k) {
13152 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
13153 out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k];
13154 }
13155
13156 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
13157 out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib);
13158
13159 out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib);
13160 out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib);
13161 out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib);
13162
13163 out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib);
13164 out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib);
13165 out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib);
13166 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib);
13167 out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib);
13168
13169 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib);
13170 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib);
13171 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib);
13172 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib);
13173
13174 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib);
13175 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib);
13176 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib);
13177 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib);
13178
13179 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib);
13180 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib);
13181 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib);
13182
13183 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib);
13184 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib);
13185 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib);
13186
13187 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib);
13188 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib);
13189 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib);
13190 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib);
13191
13192 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib);
13193 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib);
13194 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib);
13195 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib);
13196
13197 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib);
13198 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib);
13199 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib);
13200 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib);
13201
13202 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib);
13203 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib);
13204 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib);
13205 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib);
13206
13207 out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib);
13208 out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib);
13209
13210 out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib);
13211 out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib);
13212 out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
13213
13214 out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib);
13215 out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
13216 out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib);
13217
13218 out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_no_vblank_z8(mode_lib);
13219 out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency_z8(mode_lib);
13220 out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib);
13221 out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib);
13222
13223 out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib);
13224 out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib);
13225 out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib);
13226
13227 out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib);
13228
13229 out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib);
13230
13231 out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
13232
13233 out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000;
13234
13235 for (k = 0; k < out->display_config.num_planes; k++) {
13236
13237 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
13238 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13239 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13240 out->informative.misc.PrefetchMode[k] = 0;
13241 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13242 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13243 out->informative.misc.PrefetchMode[k] = 1;
13244 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
13245 out->informative.misc.PrefetchMode[k] = 2;
13246 else
13247 out->informative.misc.PrefetchMode[k] = 3;
13248
13249 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
13250 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
13251 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
13252 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
13253 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
13254 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
13255 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
13256 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
13257 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
13258 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
13259 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
13260 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
13261 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
13262 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
13263 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
13264 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
13265 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
13266 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
13267 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
13268 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
13269 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
13270 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
13271 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
13272 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
13273 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
13274 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
13275 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
13276 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
13277 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
13278 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
13279 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
13280 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
13281 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
13282 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
13283 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
13284 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
13285 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
13286 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
13287 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
13288 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
13289 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
13290 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
13291 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
13292 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
13293
13294 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
13295 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
13296 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
13297 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
13298 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
13299 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
13300 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
13301 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
13302 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
13303 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
13304 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
13305 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
13306 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
13307 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
13308 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
13309 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
13310 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
13311 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
13312 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
13313
13314 out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0;
13315 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
13316 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
13317 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
13318 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
13319 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
13320 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
13321 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
13322
13323 if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin)
13324 out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k];
13325 }
13326
13327 // For this DV informative layer, all pipes in the same planes will just use the same id
13328 // will have the optimization and helper layer later on
13329 // only work when we can have high "mcache" that fit everything without thrashing the cache
13330 for (k = 0; k < out->display_config.num_planes; k++) {
13331 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k);
13332 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k);
13333
13334 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
13335 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n);
13336 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
13337 }
13338
13339 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k);
13340 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k);
13341
13342 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
13343 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n);
13344 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
13345 }
13346 }
13347 out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib);
13348
13349 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
13350 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
13351 / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
13352 out->informative.misc.ROBUrgencyAvoidance = true;
13353 } else {
13354 out->informative.misc.ROBUrgencyAvoidance = false;
13355 }
13356 } else {
13357 out->informative.misc.ROBUrgencyAvoidance = true;
13358 }
13359 }
13360