1 /* 2 * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 */ 22 23 #ifndef AMDGV_SRIOV_MSG__H_ 24 #define AMDGV_SRIOV_MSG__H_ 25 26 /* unit in kilobytes */ 27 #define AMD_SRIOV_MSG_VBIOS_OFFSET 0 28 #define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 29 #define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB 30 #define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 31 #define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048 32 #define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2 33 #define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64 34 /* 35 * layout 36 * 0 64KB 65KB 66KB 68KB 132KB 37 * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ... 38 * | 64KB | 1KB | 1KB | 2KB | 64KB | ... 39 */ 40 41 #define AMD_SRIOV_MSG_SIZE_KB 1 42 #define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB 43 #define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) 44 #define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) 45 #define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB) 46 47 /* 48 * PF2VF history log: 49 * v1 defined in amdgim 50 * v2 current 51 * 52 * VF2PF history log: 53 * v1 defined in amdgim 54 * v2 defined in amdgim 55 * v3 current 56 */ 57 #define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 58 #define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 59 60 #define AMD_SRIOV_MSG_RESERVE_UCODE 24 61 62 #define AMD_SRIOV_MSG_RESERVE_VCN_INST 4 63 64 enum amd_sriov_ucode_engine_id { 65 AMD_SRIOV_UCODE_ID_VCE = 0, 66 AMD_SRIOV_UCODE_ID_UVD, 67 AMD_SRIOV_UCODE_ID_MC, 68 AMD_SRIOV_UCODE_ID_ME, 69 AMD_SRIOV_UCODE_ID_PFP, 70 AMD_SRIOV_UCODE_ID_CE, 71 AMD_SRIOV_UCODE_ID_RLC, 72 AMD_SRIOV_UCODE_ID_RLC_SRLC, 73 AMD_SRIOV_UCODE_ID_RLC_SRLG, 74 AMD_SRIOV_UCODE_ID_RLC_SRLS, 75 AMD_SRIOV_UCODE_ID_MEC, 76 AMD_SRIOV_UCODE_ID_MEC2, 77 AMD_SRIOV_UCODE_ID_SOS, 78 AMD_SRIOV_UCODE_ID_ASD, 79 AMD_SRIOV_UCODE_ID_TA_RAS, 80 AMD_SRIOV_UCODE_ID_TA_XGMI, 81 AMD_SRIOV_UCODE_ID_SMC, 82 AMD_SRIOV_UCODE_ID_SDMA, 83 AMD_SRIOV_UCODE_ID_SDMA2, 84 AMD_SRIOV_UCODE_ID_VCN, 85 AMD_SRIOV_UCODE_ID_DMCU, 86 AMD_SRIOV_UCODE_ID__MAX 87 }; 88 89 #pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed 90 91 union amd_sriov_msg_feature_flags { 92 struct { 93 uint32_t error_log_collect : 1; 94 uint32_t host_load_ucodes : 1; 95 uint32_t host_flr_vramlost : 1; 96 uint32_t mm_bw_management : 1; 97 uint32_t pp_one_vf_mode : 1; 98 uint32_t reg_indirect_acc : 1; 99 uint32_t av1_support : 1; 100 uint32_t vcn_rb_decouple : 1; 101 uint32_t mes_info_dump_enable : 1; 102 uint32_t ras_caps : 1; 103 uint32_t ras_telemetry : 1; 104 uint32_t ras_cper : 1; 105 uint32_t reserved : 20; 106 } flags; 107 uint32_t all; 108 }; 109 110 union amd_sriov_reg_access_flags { 111 struct { 112 uint32_t vf_reg_access_ih : 1; 113 uint32_t vf_reg_access_mmhub : 1; 114 uint32_t vf_reg_access_gc : 1; 115 uint32_t vf_reg_access_l1_tlb_cntl : 1; 116 uint32_t vf_reg_access_sq_config : 1; 117 uint32_t reserved : 27; 118 } flags; 119 uint32_t all; 120 }; 121 122 union amd_sriov_ras_caps { 123 struct { 124 uint64_t block_umc : 1; 125 uint64_t block_sdma : 1; 126 uint64_t block_gfx : 1; 127 uint64_t block_mmhub : 1; 128 uint64_t block_athub : 1; 129 uint64_t block_pcie_bif : 1; 130 uint64_t block_hdp : 1; 131 uint64_t block_xgmi_wafl : 1; 132 uint64_t block_df : 1; 133 uint64_t block_smn : 1; 134 uint64_t block_sem : 1; 135 uint64_t block_mp0 : 1; 136 uint64_t block_mp1 : 1; 137 uint64_t block_fuse : 1; 138 uint64_t block_mca : 1; 139 uint64_t block_vcn : 1; 140 uint64_t block_jpeg : 1; 141 uint64_t block_ih : 1; 142 uint64_t block_mpio : 1; 143 uint64_t poison_propogation_mode : 1; 144 uint64_t reserved : 44; 145 } bits; 146 uint64_t all; 147 }; 148 149 union amd_sriov_msg_os_info { 150 struct { 151 uint32_t windows : 1; 152 uint32_t reserved : 31; 153 } info; 154 uint32_t all; 155 }; 156 157 struct amd_sriov_msg_uuid_info { 158 union { 159 struct { 160 uint32_t did : 16; 161 uint32_t fcn : 8; 162 uint32_t asic_7 : 8; 163 }; 164 uint32_t time_low; 165 }; 166 167 struct { 168 uint32_t time_mid : 16; 169 uint32_t time_high : 12; 170 uint32_t version : 4; 171 }; 172 173 struct { 174 struct { 175 uint8_t clk_seq_hi : 6; 176 uint8_t variant : 2; 177 }; 178 union { 179 uint8_t clk_seq_low; 180 uint8_t asic_6; 181 }; 182 uint16_t asic_4; 183 }; 184 185 uint32_t asic_0; 186 }; 187 188 struct amd_sriov_msg_pf2vf_info_header { 189 /* the total structure size in byte */ 190 uint32_t size; 191 /* version of this structure, written by the HOST */ 192 uint32_t version; 193 /* reserved */ 194 uint32_t reserved[2]; 195 }; 196 197 #define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55) 198 struct amd_sriov_msg_pf2vf_info { 199 /* header contains size and version */ 200 struct amd_sriov_msg_pf2vf_info_header header; 201 /* use private key from mailbox 2 to create checksum */ 202 uint32_t checksum; 203 /* The features flags of the HOST driver supports */ 204 union amd_sriov_msg_feature_flags feature_flags; 205 /* (max_width * max_height * fps) / (16 * 16) */ 206 uint32_t hevc_enc_max_mb_per_second; 207 /* (max_width * max_height) / (16 * 16) */ 208 uint32_t hevc_enc_max_mb_per_frame; 209 /* (max_width * max_height * fps) / (16 * 16) */ 210 uint32_t avc_enc_max_mb_per_second; 211 /* (max_width * max_height) / (16 * 16) */ 212 uint32_t avc_enc_max_mb_per_frame; 213 /* MEC FW position in BYTE from the start of VF visible frame buffer */ 214 uint64_t mecfw_offset; 215 /* MEC FW size in BYTE */ 216 uint32_t mecfw_size; 217 /* UVD FW position in BYTE from the start of VF visible frame buffer */ 218 uint64_t uvdfw_offset; 219 /* UVD FW size in BYTE */ 220 uint32_t uvdfw_size; 221 /* VCE FW position in BYTE from the start of VF visible frame buffer */ 222 uint64_t vcefw_offset; 223 /* VCE FW size in BYTE */ 224 uint32_t vcefw_size; 225 /* Bad pages block position in BYTE */ 226 uint32_t bp_block_offset_low; 227 uint32_t bp_block_offset_high; 228 /* Bad pages block size in BYTE */ 229 uint32_t bp_block_size; 230 /* frequency for VF to update the VF2PF area in msec, 0 = manual */ 231 uint32_t vf2pf_update_interval_ms; 232 /* identification in ROCm SMI */ 233 uint64_t uuid; 234 uint32_t fcn_idx; 235 /* flags to indicate which register access method VF should use */ 236 union amd_sriov_reg_access_flags reg_access_flags; 237 /* MM BW management */ 238 struct { 239 uint32_t decode_max_dimension_pixels; 240 uint32_t decode_max_frame_pixels; 241 uint32_t encode_max_dimension_pixels; 242 uint32_t encode_max_frame_pixels; 243 } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST]; 244 /* UUID info */ 245 struct amd_sriov_msg_uuid_info uuid_info; 246 /* PCIE atomic ops support flag */ 247 uint32_t pcie_atomic_ops_support_flags; 248 /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */ 249 uint32_t gpu_capacity; 250 /* vf bdf on host pci tree for debug only */ 251 uint32_t bdf_on_host; 252 uint32_t more_bp; //Reserved for future use. 253 union amd_sriov_ras_caps ras_en_caps; 254 union amd_sriov_ras_caps ras_telemetry_en_caps; 255 256 /* reserved */ 257 uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; 258 } __packed; 259 260 struct amd_sriov_msg_vf2pf_info_header { 261 /* the total structure size in byte */ 262 uint32_t size; 263 /* version of this structure, written by the guest */ 264 uint32_t version; 265 /* reserved */ 266 uint32_t reserved[2]; 267 }; 268 269 #define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73) 270 struct amd_sriov_msg_vf2pf_info { 271 /* header contains size and version */ 272 struct amd_sriov_msg_vf2pf_info_header header; 273 uint32_t checksum; 274 /* driver version */ 275 uint8_t driver_version[64]; 276 /* driver certification, 1=WHQL, 0=None */ 277 uint32_t driver_cert; 278 /* guest OS type and version */ 279 union amd_sriov_msg_os_info os_info; 280 /* guest fb information in the unit of MB */ 281 uint32_t fb_usage; 282 /* guest gfx engine usage percentage */ 283 uint32_t gfx_usage; 284 /* guest gfx engine health percentage */ 285 uint32_t gfx_health; 286 /* guest compute engine usage percentage */ 287 uint32_t compute_usage; 288 /* guest compute engine health percentage */ 289 uint32_t compute_health; 290 /* guest avc engine usage percentage. 0xffff means N/A */ 291 uint32_t avc_enc_usage; 292 /* guest avc engine health percentage. 0xffff means N/A */ 293 uint32_t avc_enc_health; 294 /* guest hevc engine usage percentage. 0xffff means N/A */ 295 uint32_t hevc_enc_usage; 296 /* guest hevc engine usage percentage. 0xffff means N/A */ 297 uint32_t hevc_enc_health; 298 /* combined encode/decode usage */ 299 uint32_t encode_usage; 300 uint32_t decode_usage; 301 /* Version of PF2VF that VF understands */ 302 uint32_t pf2vf_version_required; 303 /* additional FB usage */ 304 uint32_t fb_vis_usage; 305 uint32_t fb_vis_size; 306 uint32_t fb_size; 307 /* guest ucode data, each one is 1.25 Dword */ 308 struct { 309 uint8_t id; 310 uint32_t version; 311 } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; 312 uint64_t dummy_page_addr; 313 /* FB allocated for guest MES to record UQ info */ 314 uint64_t mes_info_addr; 315 uint32_t mes_info_size; 316 /* reserved */ 317 uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; 318 } __packed; 319 320 /* mailbox message send from guest to host */ 321 enum amd_sriov_mailbox_request_message { 322 MB_REQ_MSG_REQ_GPU_INIT_ACCESS = 1, 323 MB_REQ_MSG_REL_GPU_INIT_ACCESS, 324 MB_REQ_MSG_REQ_GPU_FINI_ACCESS, 325 MB_REQ_MSG_REL_GPU_FINI_ACCESS, 326 MB_REQ_MSG_REQ_GPU_RESET_ACCESS, 327 MB_REQ_MSG_REQ_GPU_INIT_DATA, 328 MB_REQ_MSG_PSP_VF_CMD_RELAY, 329 330 MB_REQ_MSG_LOG_VF_ERROR = 200, 331 MB_REQ_MSG_READY_TO_RESET = 201, 332 MB_REQ_MSG_RAS_POISON = 202, 333 MB_REQ_RAS_ERROR_COUNT = 203, 334 MB_REQ_RAS_CPER_DUMP = 204, 335 MB_REQ_RAS_BAD_PAGES = 205, 336 }; 337 338 /* mailbox message send from host to guest */ 339 enum amd_sriov_mailbox_response_message { 340 MB_RES_MSG_CLR_MSG_BUF = 0, 341 MB_RES_MSG_READY_TO_ACCESS_GPU = 1, 342 MB_RES_MSG_FLR_NOTIFICATION = 2, 343 MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3, 344 MB_RES_MSG_SUCCESS = 4, 345 MB_RES_MSG_FAIL = 5, 346 MB_RES_MSG_QUERY_ALIVE = 6, 347 MB_RES_MSG_GPU_INIT_DATA_READY = 7, 348 MB_RES_MSG_RAS_POISON_READY = 8, 349 MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9, 350 MB_RES_MSG_GPU_RMA = 10, 351 MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, 352 MB_REQ_RAS_CPER_DUMP_READY = 14, 353 MB_RES_MSG_RAS_BAD_PAGES_READY = 15, 354 MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16, 355 MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17, 356 MB_RES_MSG_TEXT_MESSAGE = 255 357 }; 358 359 enum amd_sriov_ras_telemetry_gpu_block { 360 RAS_TELEMETRY_GPU_BLOCK_UMC = 0, 361 RAS_TELEMETRY_GPU_BLOCK_SDMA = 1, 362 RAS_TELEMETRY_GPU_BLOCK_GFX = 2, 363 RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3, 364 RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4, 365 RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5, 366 RAS_TELEMETRY_GPU_BLOCK_HDP = 6, 367 RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7, 368 RAS_TELEMETRY_GPU_BLOCK_DF = 8, 369 RAS_TELEMETRY_GPU_BLOCK_SMN = 9, 370 RAS_TELEMETRY_GPU_BLOCK_SEM = 10, 371 RAS_TELEMETRY_GPU_BLOCK_MP0 = 11, 372 RAS_TELEMETRY_GPU_BLOCK_MP1 = 12, 373 RAS_TELEMETRY_GPU_BLOCK_FUSE = 13, 374 RAS_TELEMETRY_GPU_BLOCK_MCA = 14, 375 RAS_TELEMETRY_GPU_BLOCK_VCN = 15, 376 RAS_TELEMETRY_GPU_BLOCK_JPEG = 16, 377 RAS_TELEMETRY_GPU_BLOCK_IH = 17, 378 RAS_TELEMETRY_GPU_BLOCK_MPIO = 18, 379 RAS_TELEMETRY_GPU_BLOCK_COUNT = 19, 380 }; 381 382 struct amd_sriov_ras_telemetry_header { 383 uint32_t checksum; 384 uint32_t used_size; 385 uint32_t reserved[2]; 386 }; 387 388 struct amd_sriov_ras_telemetry_error_count { 389 struct { 390 uint32_t ce_count; 391 uint32_t ue_count; 392 uint32_t de_count; 393 uint32_t ce_overflow_count; 394 uint32_t ue_overflow_count; 395 uint32_t de_overflow_count; 396 uint32_t reserved[6]; 397 } block[RAS_TELEMETRY_GPU_BLOCK_COUNT]; 398 }; 399 400 struct amd_sriov_ras_cper_dump { 401 uint32_t more; 402 uint64_t overflow_count; 403 uint64_t count; 404 uint64_t wptr; 405 uint32_t buf[]; 406 }; 407 408 struct amdsriov_ras_telemetry { 409 struct amd_sriov_ras_telemetry_header header; 410 411 union { 412 struct amd_sriov_ras_telemetry_error_count error_count; 413 struct amd_sriov_ras_cper_dump cper_dump; 414 } body; 415 }; 416 417 /* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */ 418 enum amd_sriov_gpu_init_data_version { 419 GPU_INIT_DATA_READY_V1 = 1, 420 }; 421 422 #pragma pack(pop) // Restore previous packing option 423 424 /* checksum function between host and guest */ 425 unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key, 426 unsigned int checksum); 427 428 /* assertion at compile time */ 429 #ifdef __linux__ 430 #define stringification(s) _stringification(s) 431 #define _stringification(s) #s 432 433 _Static_assert( 434 sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, 435 "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); 436 437 _Static_assert( 438 sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, 439 "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); 440 441 _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, 442 "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); 443 444 _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, 445 "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); 446 447 #undef _stringification 448 #undef stringification 449 #endif 450 451 #endif /* AMDGV_SRIOV_MSG__H_ */ 452