1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Author: Monk.liu@amd.com
23 */
24 #ifndef AMDGPU_VIRT_H
25 #define AMDGPU_VIRT_H
26
27 #include "amdgv_sriovmsg.h"
28
29 #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS (1 << 0) /* vBIOS is sr-iov ready */
30 #define AMDGPU_SRIOV_CAPS_ENABLE_IOV (1 << 1) /* sr-iov is enabled on this GPU */
31 #define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
32 #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
33 #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
34 #define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
35
36 /* flags for indirect register access path supported by rlcg for sriov */
37 #define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
38 #define AMDGPU_RLCG_GC_WRITE (0x0 << 28)
39 #define AMDGPU_RLCG_GC_READ (0x1 << 28)
40 #define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28)
41
42 /* error code for indirect register access path supported by rlcg for sriov */
43 #define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000
44 #define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000
45 #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
46
47 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
48 #define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
49
50 /* all asic after AI use this offset */
51 #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
52 /* tonga/fiji use this offset */
53 #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
54
55 #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
56
57 enum amdgpu_sriov_vf_mode {
58 SRIOV_VF_MODE_BARE_METAL = 0,
59 SRIOV_VF_MODE_ONE_VF,
60 SRIOV_VF_MODE_MULTI_VF,
61 };
62
63 struct amdgpu_mm_table {
64 struct amdgpu_bo *bo;
65 uint32_t *cpu_addr;
66 uint64_t gpu_addr;
67 };
68
69 #define AMDGPU_VF_ERROR_ENTRY_SIZE 16
70
71 /* struct error_entry - amdgpu VF error information. */
72 struct amdgpu_vf_error_buffer {
73 struct mutex lock;
74 int read_count;
75 int write_count;
76 uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
77 uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
78 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
79 };
80
81 enum idh_request;
82
83 /**
84 * struct amdgpu_virt_ops - amdgpu device virt operations
85 */
86 struct amdgpu_virt_ops {
87 int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
88 int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
89 int (*req_init_data)(struct amdgpu_device *adev);
90 int (*reset_gpu)(struct amdgpu_device *adev);
91 void (*ready_to_reset)(struct amdgpu_device *adev);
92 int (*wait_reset)(struct amdgpu_device *adev);
93 void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
94 u32 data1, u32 data2, u32 data3);
95 void (*ras_poison_handler)(struct amdgpu_device *adev,
96 enum amdgpu_ras_block block);
97 bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
98 int (*req_ras_err_count)(struct amdgpu_device *adev);
99 int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
100 int (*req_bad_pages)(struct amdgpu_device *adev);
101 };
102
103 /*
104 * Firmware Reserve Frame buffer
105 */
106 struct amdgpu_virt_fw_reserve {
107 struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
108 struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
109 void *ras_telemetry;
110 unsigned int checksum_key;
111 };
112
113 /*
114 * Legacy GIM header
115 *
116 * Defination between PF and VF
117 * Structures forcibly aligned to 4 to keep the same style as PF.
118 */
119 #define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024)
120
121 #define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
122 (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
123
124 enum AMDGIM_FEATURE_FLAG {
125 /* GIM supports feature of Error log collecting */
126 AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
127 /* GIM supports feature of loading uCodes */
128 AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2,
129 /* VRAM LOST by GIM */
130 AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
131 /* MM bandwidth */
132 AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
133 /* PP ONE VF MODE in GIM */
134 AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
135 /* Indirect Reg Access enabled */
136 AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
137 /* AV1 Support MODE*/
138 AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
139 /* VCN RB decouple */
140 AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
141 /* MES info */
142 AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
143 AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
144 AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
145 AMDGIM_FEATURE_RAS_CPER = (1 << 11),
146 };
147
148 enum AMDGIM_REG_ACCESS_FLAG {
149 /* Use PSP to program IH_RB_CNTL */
150 AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
151 /* Use RLC to program MMHUB regs */
152 AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
153 /* Use RLC to program GC regs */
154 AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
155 /* Use PSP to program L1_TLB_CNTL */
156 AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
157 /* Use RLCG to program SQ_CONFIG1 */
158 AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4),
159 };
160
161 struct amdgim_pf2vf_info_v1 {
162 /* header contains size and version */
163 struct amd_sriov_msg_pf2vf_info_header header;
164 /* max_width * max_height */
165 unsigned int uvd_enc_max_pixels_count;
166 /* 16x16 pixels/sec, codec independent */
167 unsigned int uvd_enc_max_bandwidth;
168 /* max_width * max_height */
169 unsigned int vce_enc_max_pixels_count;
170 /* 16x16 pixels/sec, codec independent */
171 unsigned int vce_enc_max_bandwidth;
172 /* MEC FW position in kb from the start of visible frame buffer */
173 unsigned int mecfw_kboffset;
174 /* The features flags of the GIM driver supports. */
175 unsigned int feature_flags;
176 /* use private key from mailbox 2 to create chueksum */
177 unsigned int checksum;
178 } __aligned(4);
179
180 struct amdgim_vf2pf_info_v1 {
181 /* header contains size and version */
182 struct amd_sriov_msg_vf2pf_info_header header;
183 /* driver version */
184 char driver_version[64];
185 /* driver certification, 1=WHQL, 0=None */
186 unsigned int driver_cert;
187 /* guest OS type and version: need a define */
188 unsigned int os_info;
189 /* in the unit of 1M */
190 unsigned int fb_usage;
191 /* guest gfx engine usage percentage */
192 unsigned int gfx_usage;
193 /* guest gfx engine health percentage */
194 unsigned int gfx_health;
195 /* guest compute engine usage percentage */
196 unsigned int compute_usage;
197 /* guest compute engine health percentage */
198 unsigned int compute_health;
199 /* guest vce engine usage percentage. 0xffff means N/A. */
200 unsigned int vce_enc_usage;
201 /* guest vce engine health percentage. 0xffff means N/A. */
202 unsigned int vce_enc_health;
203 /* guest uvd engine usage percentage. 0xffff means N/A. */
204 unsigned int uvd_enc_usage;
205 /* guest uvd engine usage percentage. 0xffff means N/A. */
206 unsigned int uvd_enc_health;
207 unsigned int checksum;
208 } __aligned(4);
209
210 struct amdgim_vf2pf_info_v2 {
211 /* header contains size and version */
212 struct amd_sriov_msg_vf2pf_info_header header;
213 uint32_t checksum;
214 /* driver version */
215 uint8_t driver_version[64];
216 /* driver certification, 1=WHQL, 0=None */
217 uint32_t driver_cert;
218 /* guest OS type and version: need a define */
219 uint32_t os_info;
220 /* in the unit of 1M */
221 uint32_t fb_usage;
222 /* guest gfx engine usage percentage */
223 uint32_t gfx_usage;
224 /* guest gfx engine health percentage */
225 uint32_t gfx_health;
226 /* guest compute engine usage percentage */
227 uint32_t compute_usage;
228 /* guest compute engine health percentage */
229 uint32_t compute_health;
230 /* guest vce engine usage percentage. 0xffff means N/A. */
231 uint32_t vce_enc_usage;
232 /* guest vce engine health percentage. 0xffff means N/A. */
233 uint32_t vce_enc_health;
234 /* guest uvd engine usage percentage. 0xffff means N/A. */
235 uint32_t uvd_enc_usage;
236 /* guest uvd engine usage percentage. 0xffff means N/A. */
237 uint32_t uvd_enc_health;
238 uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
239 } __aligned(4);
240
241 struct amdgpu_virt_ras_err_handler_data {
242 /* point to bad page records array */
243 struct eeprom_table_record *bps;
244 /* point to reserved bo array */
245 struct amdgpu_bo **bps_bo;
246 /* the count of entries */
247 int count;
248 /* last reserved entry's index + 1 */
249 int last_reserved;
250 };
251
252 struct amdgpu_virt_ras {
253 struct ratelimit_state ras_error_cnt_rs;
254 struct ratelimit_state ras_cper_dump_rs;
255 struct mutex ras_telemetry_mutex;
256 uint64_t cper_rptr;
257 };
258
259 /* GPU virtualization */
260 struct amdgpu_virt {
261 uint32_t caps;
262 struct amdgpu_bo *csa_obj;
263 void *csa_cpu_addr;
264 bool chained_ib_support;
265 uint32_t reg_val_offs;
266 struct amdgpu_irq_src ack_irq;
267 struct amdgpu_irq_src rcv_irq;
268
269 struct work_struct flr_work;
270 struct work_struct bad_pages_work;
271
272 struct amdgpu_mm_table mm_table;
273 const struct amdgpu_virt_ops *ops;
274 struct amdgpu_vf_error_buffer vf_errors;
275 struct amdgpu_virt_fw_reserve fw_reserve;
276 uint32_t gim_feature;
277 uint32_t reg_access_mode;
278 int req_init_data_ver;
279 bool tdr_debug;
280 struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
281 bool ras_init_done;
282 uint32_t reg_access;
283
284 /* vf2pf message */
285 struct delayed_work vf2pf_work;
286 uint32_t vf2pf_update_interval_ms;
287 int vf2pf_update_retry_cnt;
288
289 /* multimedia bandwidth config */
290 bool is_mm_bw_enabled;
291 uint32_t decode_max_dimension_pixels;
292 uint32_t decode_max_frame_pixels;
293 uint32_t encode_max_dimension_pixels;
294 uint32_t encode_max_frame_pixels;
295
296 /* the ucode id to signal the autoload */
297 uint32_t autoload_ucode_id;
298
299 /* Spinlock to protect access to the RLCG register interface */
300 spinlock_t rlcg_reg_lock;
301
302 union amd_sriov_ras_caps ras_en_caps;
303 union amd_sriov_ras_caps ras_telemetry_en_caps;
304 struct amdgpu_virt_ras ras;
305 struct amd_sriov_ras_telemetry_error_count count_cache;
306
307 /* hibernate and resume with different VF feature for xgmi enabled system */
308 bool is_xgmi_node_migrate_enabled;
309 };
310
311 struct amdgpu_video_codec_info;
312
313 #define amdgpu_sriov_enabled(adev) \
314 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
315
316 #define amdgpu_sriov_vf(adev) \
317 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_IS_VF)
318
319 #define amdgpu_sriov_bios(adev) \
320 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)
321
322 #define amdgpu_sriov_runtime(adev) \
323 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
324
325 #define amdgpu_sriov_fullaccess(adev) \
326 (amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
327
328 #define amdgpu_sriov_reg_indirect_en(adev) \
329 (amdgpu_sriov_vf((adev)) && \
330 ((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
331
332 #define amdgpu_sriov_reg_indirect_ih(adev) \
333 (amdgpu_sriov_vf((adev)) && \
334 ((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
335
336 #define amdgpu_sriov_reg_indirect_mmhub(adev) \
337 (amdgpu_sriov_vf((adev)) && \
338 ((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
339
340 #define amdgpu_sriov_reg_indirect_gc(adev) \
341 (amdgpu_sriov_vf((adev)) && \
342 ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
343
344 #define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
345 (amdgpu_sriov_vf((adev)) && \
346 ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
347
348 #define amdgpu_sriov_rlcg_error_report_enabled(adev) \
349 (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
350
351 #define amdgpu_sriov_reg_access_sq_config(adev) \
352 (amdgpu_sriov_vf((adev)) && \
353 ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG)))
354
355 #define amdgpu_passthrough(adev) \
356 ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
357
358 #define amdgpu_sriov_vf_mmio_access_protection(adev) \
359 ((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
360
361 #define amdgpu_sriov_ras_caps_en(adev) \
362 ((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
363
364 #define amdgpu_sriov_ras_telemetry_en(adev) \
365 (((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
366
367 #define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
368 (amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
369
370 #define amdgpu_sriov_ras_cper_en(adev) \
371 ((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
372
is_virtual_machine(void)373 static inline bool is_virtual_machine(void)
374 {
375 #if defined(CONFIG_X86)
376 return boot_cpu_has(X86_FEATURE_HYPERVISOR);
377 #elif defined(CONFIG_ARM64)
378 return !is_kernel_in_hyp_mode();
379 #else
380 return false;
381 #endif
382 }
383
384 #define amdgpu_sriov_is_pp_one_vf(adev) \
385 ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
386 #define amdgpu_sriov_multi_vf_mode(adev) \
387 (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
388 #define amdgpu_sriov_is_debug(adev) \
389 ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
390 #define amdgpu_sriov_is_normal(adev) \
391 ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
392 #define amdgpu_sriov_is_av1_support(adev) \
393 ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
394 #define amdgpu_sriov_is_vcn_rb_decouple(adev) \
395 ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
396 #define amdgpu_sriov_is_mes_info_enable(adev) \
397 ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
398
399 #define amdgpu_virt_xgmi_migrate_enabled(adev) \
400 ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0)
401
402 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
403 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
404 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
405 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
406 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
407 void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
408 void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
409 int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
410 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
411 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
412 bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
413 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
414 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
415 void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
416 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
417 void amdgpu_virt_init(struct amdgpu_device *adev);
418
419 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
420 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
421 void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
422
423 enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
424
425 void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
426 struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
427 struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
428 void amdgpu_sriov_wreg(struct amdgpu_device *adev,
429 u32 offset, u32 value,
430 u32 acc_flags, u32 hwip, u32 xcc_id);
431 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
432 u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
433 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
434 uint32_t ucode_id);
435 void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
436 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
437 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
438 bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
439 u32 acc_flags, u32 hwip,
440 bool write, u32 *rlcg_flag);
441 u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
442 bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
443 int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
444 struct ras_err_data *err_data);
445 int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
446 int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
447 bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
448 enum amdgpu_ras_block block);
449 void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
450 #endif
451