1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_tuning.h"
7
8 #include <kunit/visibility.h>
9
10 #include <drm/drm_managed.h>
11
12 #include "regs/xe_gt_regs.h"
13 #include "xe_gt_types.h"
14 #include "xe_platform_types.h"
15 #include "xe_rtp.h"
16
17 #undef XE_REG_MCR
18 #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
19
20 static const struct xe_rtp_entry_sr gt_tunings[] = {
21 { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
22 XE_RTP_RULES(PLATFORM(DG2)),
23 XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
24 },
25 { XE_RTP_NAME("Tuning: 32B Access Enable"),
26 XE_RTP_RULES(PLATFORM(DG2)),
27 XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
28 },
29
30 /* Xe2 */
31
32 { XE_RTP_NAME("Tuning: L3 cache"),
33 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
34 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
35 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
36 },
37 { XE_RTP_NAME("Tuning: L3 cache - media"),
38 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
39 XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
40 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
41 },
42 { XE_RTP_NAME("Tuning: Compression Overfetch"),
43 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
44 XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
45 SET(CCCHKNREG1, L3CMPCTRL))
46 },
47 { XE_RTP_NAME("Tuning: Compression Overfetch - media"),
48 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
49 XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
50 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
51 },
52 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
53 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
54 XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
55 },
56 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
57 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
58 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
59 },
60 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
61 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
62 XE_RTP_ACTIONS(SET(L3SQCREG2,
63 COMPMEMRD256BOVRFETCHEN))
64 },
65 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
66 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
67 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
68 COMPMEMRD256BOVRFETCHEN))
69 },
70 { XE_RTP_NAME("Tuning: Stateless compression control"),
71 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
72 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
73 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
74 },
75 { XE_RTP_NAME("Tuning: Stateless compression control - media"),
76 XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
77 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
78 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
79 },
80 { XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
81 XE_RTP_RULES(GRAPHICS_VERSION(2004)),
82 XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
83 },
84 { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
85 XE_RTP_RULES(MEDIA_VERSION(2000)),
86 XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
87 },
88 };
89
90 static const struct xe_rtp_entry_sr engine_tunings[] = {
91 { XE_RTP_NAME("Tuning: L3 Hashing Mask"),
92 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
93 FUNC(xe_rtp_match_first_render_or_compute)),
94 XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC))
95 },
96 { XE_RTP_NAME("Tuning: Set Indirect State Override"),
97 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
98 ENGINE_CLASS(RENDER)),
99 XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
100 },
101 };
102
103 static const struct xe_rtp_entry_sr lrc_tunings[] = {
104 /* DG2 */
105
106 { XE_RTP_NAME("Tuning: L3 cache"),
107 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
108 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
109 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
110 },
111 { XE_RTP_NAME("Tuning: TDS gang timer"),
112 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
113 /* read verification is ignored as in i915 - need to check enabling */
114 XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
115 FF_MODE2_TDS_TIMER_MASK,
116 FF_MODE2_TDS_TIMER_128))
117 },
118 { XE_RTP_NAME("Tuning: TBIMR fast clip"),
119 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
120 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
121 },
122
123 /* Xe_LPG */
124
125 { XE_RTP_NAME("Tuning: L3 cache"),
126 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
127 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
128 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
129 },
130
131 /* Xe2_HPG */
132
133 { XE_RTP_NAME("Tuning: vs hit max value"),
134 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
135 XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
136 REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
137 },
138 };
139
140 /**
141 * xe_tuning_init - initialize gt with tunings bookkeeping
142 * @gt: GT instance to initialize
143 *
144 * Returns 0 for success, negative error code otherwise.
145 */
xe_tuning_init(struct xe_gt * gt)146 int xe_tuning_init(struct xe_gt *gt)
147 {
148 struct xe_device *xe = gt_to_xe(gt);
149 size_t n_lrc, n_engine, n_gt, total;
150 unsigned long *p;
151
152 n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings));
153 n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings));
154 n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings));
155 total = n_gt + n_engine + n_lrc;
156
157 p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
158 if (!p)
159 return -ENOMEM;
160
161 gt->tuning_active.gt = p;
162 p += n_gt;
163 gt->tuning_active.engine = p;
164 p += n_engine;
165 gt->tuning_active.lrc = p;
166
167 return 0;
168 }
169 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */
170
xe_tuning_process_gt(struct xe_gt * gt)171 void xe_tuning_process_gt(struct xe_gt *gt)
172 {
173 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
174
175 xe_rtp_process_ctx_enable_active_tracking(&ctx,
176 gt->tuning_active.gt,
177 ARRAY_SIZE(gt_tunings));
178 xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr);
179 }
180 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
181
xe_tuning_process_engine(struct xe_hw_engine * hwe)182 void xe_tuning_process_engine(struct xe_hw_engine *hwe)
183 {
184 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
185
186 xe_rtp_process_ctx_enable_active_tracking(&ctx,
187 hwe->gt->tuning_active.engine,
188 ARRAY_SIZE(engine_tunings));
189 xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
190 &hwe->reg_sr);
191 }
192 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
193
194 /**
195 * xe_tuning_process_lrc - process lrc tunings
196 * @hwe: engine instance to process tunings for
197 *
198 * Process LRC table for this platform, saving in @hwe all the tunings that need
199 * to be applied on context restore. These are tunings touching registers that
200 * are part of the HW context image.
201 */
xe_tuning_process_lrc(struct xe_hw_engine * hwe)202 void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
203 {
204 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
205
206 xe_rtp_process_ctx_enable_active_tracking(&ctx,
207 hwe->gt->tuning_active.lrc,
208 ARRAY_SIZE(lrc_tunings));
209 xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
210 }
211
xe_tuning_dump(struct xe_gt * gt,struct drm_printer * p)212 void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
213 {
214 size_t idx;
215
216 drm_printf(p, "GT Tunings\n");
217 for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
218 drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
219
220 drm_printf(p, "\nEngine Tunings\n");
221 for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
222 drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
223
224 drm_printf(p, "\nLRC Tunings\n");
225 for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
226 drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
227 }
228