1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_ads.h" 7 8 #include <linux/fault-inject.h> 9 10 #include <drm/drm_managed.h> 11 12 #include <generated/xe_wa_oob.h> 13 14 #include "abi/guc_actions_abi.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_guc_regs.h" 18 #include "xe_bo.h" 19 #include "xe_gt.h" 20 #include "xe_gt_ccs_mode.h" 21 #include "xe_gt_printk.h" 22 #include "xe_guc.h" 23 #include "xe_guc_capture.h" 24 #include "xe_guc_ct.h" 25 #include "xe_hw_engine.h" 26 #include "xe_lrc.h" 27 #include "xe_map.h" 28 #include "xe_mmio.h" 29 #include "xe_platform_types.h" 30 #include "xe_uc_fw.h" 31 #include "xe_wa.h" 32 #include "xe_gt_mcr.h" 33 34 /* Slack of a few additional entries per engine */ 35 #define ADS_REGSET_EXTRA_MAX 8 36 37 static struct xe_guc * 38 ads_to_guc(struct xe_guc_ads *ads) 39 { 40 return container_of(ads, struct xe_guc, ads); 41 } 42 43 static struct xe_gt * 44 ads_to_gt(struct xe_guc_ads *ads) 45 { 46 return container_of(ads, struct xe_gt, uc.guc.ads); 47 } 48 49 static struct xe_device * 50 ads_to_xe(struct xe_guc_ads *ads) 51 { 52 return gt_to_xe(ads_to_gt(ads)); 53 } 54 55 static struct iosys_map * 56 ads_to_map(struct xe_guc_ads *ads) 57 { 58 return &ads->bo->vmap; 59 } 60 61 /* UM Queue parameters: */ 62 #define GUC_UM_QUEUE_SIZE (SZ_64K) 63 #define GUC_PAGE_RES_TIMEOUT_US (-1) 64 65 /* 66 * The Additional Data Struct (ADS) has pointers for different buffers used by 67 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and 68 * all the extra buffers indirectly linked via the ADS struct's entries. 69 * 70 * Layout of the ADS blob allocated for the GuC: 71 * 72 * +---------------------------------------+ <== base 73 * | guc_ads | 74 * +---------------------------------------+ 75 * | guc_policies | 76 * +---------------------------------------+ 77 * | guc_gt_system_info | 78 * +---------------------------------------+ 79 * | guc_engine_usage | 80 * +---------------------------------------+ 81 * | guc_um_init_params | 82 * +---------------------------------------+ <== static 83 * | guc_mmio_reg[countA] (engine 0.0) | 84 * | guc_mmio_reg[countB] (engine 0.1) | 85 * | guc_mmio_reg[countC] (engine 1.0) | 86 * | ... | 87 * +---------------------------------------+ <== dynamic 88 * | padding | 89 * +---------------------------------------+ <== 4K aligned 90 * | golden contexts | 91 * +---------------------------------------+ 92 * | padding | 93 * +---------------------------------------+ <== 4K aligned 94 * | w/a KLVs | 95 * +---------------------------------------+ 96 * | padding | 97 * +---------------------------------------+ <== 4K aligned 98 * | capture lists | 99 * +---------------------------------------+ 100 * | padding | 101 * +---------------------------------------+ <== 4K aligned 102 * | UM queues | 103 * +---------------------------------------+ 104 * | padding | 105 * +---------------------------------------+ <== 4K aligned 106 * | private data | 107 * +---------------------------------------+ 108 * | padding | 109 * +---------------------------------------+ <== 4K aligned 110 */ 111 struct __guc_ads_blob { 112 struct guc_ads ads; 113 struct guc_policies policies; 114 struct guc_gt_system_info system_info; 115 struct guc_engine_usage engine_usage; 116 struct guc_um_init_params um_init_params; 117 /* From here on, location is dynamic! Refer to above diagram. */ 118 struct guc_mmio_reg regset[]; 119 } __packed; 120 121 #define ads_blob_read(ads_, field_) \ 122 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 123 struct __guc_ads_blob, field_) 124 125 #define ads_blob_write(ads_, field_, val_) \ 126 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 127 struct __guc_ads_blob, field_, val_) 128 129 #define info_map_write(xe_, map_, field_, val_) \ 130 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) 131 132 #define info_map_read(xe_, map_, field_) \ 133 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) 134 135 static size_t guc_ads_regset_size(struct xe_guc_ads *ads) 136 { 137 struct xe_device *xe = ads_to_xe(ads); 138 139 xe_assert(xe, ads->regset_size); 140 141 return ads->regset_size; 142 } 143 144 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) 145 { 146 return PAGE_ALIGN(ads->golden_lrc_size); 147 } 148 149 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads) 150 { 151 return PAGE_ALIGN(ads->ads_waklv_size); 152 } 153 154 static size_t guc_ads_capture_size(struct xe_guc_ads *ads) 155 { 156 return PAGE_ALIGN(ads->capture_size); 157 } 158 159 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) 160 { 161 struct xe_device *xe = ads_to_xe(ads); 162 163 if (!xe->info.has_usm) 164 return 0; 165 166 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; 167 } 168 169 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) 170 { 171 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); 172 } 173 174 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) 175 { 176 return offsetof(struct __guc_ads_blob, regset); 177 } 178 179 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) 180 { 181 size_t offset; 182 183 offset = guc_ads_regset_offset(ads) + 184 guc_ads_regset_size(ads); 185 186 return PAGE_ALIGN(offset); 187 } 188 189 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) 190 { 191 u32 offset; 192 193 offset = guc_ads_golden_lrc_offset(ads) + 194 guc_ads_golden_lrc_size(ads); 195 196 return PAGE_ALIGN(offset); 197 } 198 199 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) 200 { 201 size_t offset; 202 203 offset = guc_ads_waklv_offset(ads) + 204 guc_ads_waklv_size(ads); 205 206 return PAGE_ALIGN(offset); 207 } 208 209 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) 210 { 211 u32 offset; 212 213 offset = guc_ads_capture_offset(ads) + 214 guc_ads_capture_size(ads); 215 216 return PAGE_ALIGN(offset); 217 } 218 219 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) 220 { 221 size_t offset; 222 223 offset = guc_ads_um_queues_offset(ads) + 224 guc_ads_um_queues_size(ads); 225 226 return PAGE_ALIGN(offset); 227 } 228 229 static size_t guc_ads_size(struct xe_guc_ads *ads) 230 { 231 return guc_ads_private_data_offset(ads) + 232 guc_ads_private_data_size(ads); 233 } 234 235 static size_t calculate_regset_size(struct xe_gt *gt) 236 { 237 struct xe_reg_sr_entry *sr_entry; 238 unsigned long sr_idx; 239 struct xe_hw_engine *hwe; 240 enum xe_hw_engine_id id; 241 unsigned int count = 0; 242 243 for_each_hw_engine(hwe, gt, id) 244 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) 245 count++; 246 247 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; 248 249 if (XE_WA(gt, 1607983814)) 250 count += LNCFCMOCS_REG_COUNT; 251 252 return count * sizeof(struct guc_mmio_reg); 253 } 254 255 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) 256 { 257 struct xe_hw_engine *hwe; 258 enum xe_hw_engine_id id; 259 u32 mask = 0; 260 261 for_each_hw_engine(hwe, gt, id) 262 if (hwe->class == class) 263 mask |= BIT(hwe->instance); 264 265 return mask; 266 } 267 268 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) 269 { 270 struct xe_gt *gt = ads_to_gt(ads); 271 size_t total_size = 0, alloc_size, real_size; 272 int class; 273 274 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 275 if (!engine_enable_mask(gt, class)) 276 continue; 277 278 real_size = xe_gt_lrc_size(gt, class); 279 alloc_size = PAGE_ALIGN(real_size); 280 total_size += alloc_size; 281 } 282 283 return total_size; 284 } 285 286 static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, 287 enum xe_guc_klv_ids klv_id, 288 u32 value, 289 u32 *offset, u32 *remain) 290 { 291 u32 size; 292 u32 klv_entry[] = { 293 /* 16:16 key/length */ 294 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | 295 FIELD_PREP(GUC_KLV_0_LEN, 1), 296 value, 297 /* 1 dword data */ 298 }; 299 300 size = sizeof(klv_entry); 301 302 if (*remain < size) { 303 drm_warn(&ads_to_xe(ads)->drm, 304 "w/a klv buffer too small to add klv id %d\n", klv_id); 305 } else { 306 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, 307 klv_entry, size); 308 *offset += size; 309 *remain -= size; 310 } 311 } 312 313 static void guc_waklv_enable_simple(struct xe_guc_ads *ads, 314 enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) 315 { 316 u32 klv_entry[] = { 317 /* 16:16 key/length */ 318 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | 319 FIELD_PREP(GUC_KLV_0_LEN, 0), 320 /* 0 dwords data */ 321 }; 322 u32 size; 323 324 size = sizeof(klv_entry); 325 326 if (xe_gt_WARN(ads_to_gt(ads), *remain < size, 327 "w/a klv buffer too small to add klv id %d\n", klv_id)) 328 return; 329 330 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, 331 klv_entry, size); 332 *offset += size; 333 *remain -= size; 334 } 335 336 static void guc_waklv_init(struct xe_guc_ads *ads) 337 { 338 struct xe_gt *gt = ads_to_gt(ads); 339 u64 addr_ggtt; 340 u32 offset, remain, size; 341 342 offset = guc_ads_waklv_offset(ads); 343 remain = guc_ads_waklv_size(ads); 344 345 if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562)) 346 guc_waklv_enable_simple(ads, 347 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, 348 &offset, &remain); 349 if (XE_WA(gt, 18024947630)) 350 guc_waklv_enable_simple(ads, 351 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, 352 &offset, &remain); 353 if (XE_WA(gt, 16022287689)) 354 guc_waklv_enable_simple(ads, 355 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, 356 &offset, &remain); 357 358 if (XE_WA(gt, 14022866841)) 359 guc_waklv_enable_simple(ads, 360 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, 361 &offset, &remain); 362 363 /* 364 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, 365 * the default value for this register is determined to be 0xC40. This could change in the 366 * future, so GuC depends on KMD to send it the correct value. 367 */ 368 if (XE_WA(gt, 13011645652)) 369 guc_waklv_enable_one_word(ads, 370 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, 371 0xC40, 372 &offset, &remain); 373 374 if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) 375 guc_waklv_enable_simple(ads, 376 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, 377 &offset, &remain); 378 379 if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708)) 380 guc_waklv_enable_simple(ads, 381 GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, 382 &offset, &remain); 383 384 size = guc_ads_waklv_size(ads) - remain; 385 if (!size) 386 return; 387 388 offset = guc_ads_waklv_offset(ads); 389 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 390 391 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt)); 392 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt)); 393 ads_blob_write(ads, ads.wa_klv_size, size); 394 } 395 396 static int calculate_waklv_size(struct xe_guc_ads *ads) 397 { 398 /* 399 * A single page is both the minimum size possible and 400 * is sufficiently large enough for all current platforms. 401 */ 402 return SZ_4K; 403 } 404 405 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) 406 407 int xe_guc_ads_init(struct xe_guc_ads *ads) 408 { 409 struct xe_device *xe = ads_to_xe(ads); 410 struct xe_gt *gt = ads_to_gt(ads); 411 struct xe_tile *tile = gt_to_tile(gt); 412 struct xe_bo *bo; 413 414 ads->golden_lrc_size = calculate_golden_lrc_size(ads); 415 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads)); 416 ads->regset_size = calculate_regset_size(gt); 417 ads->ads_waklv_size = calculate_waklv_size(ads); 418 419 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, 420 XE_BO_FLAG_SYSTEM | 421 XE_BO_FLAG_GGTT | 422 XE_BO_FLAG_GGTT_INVALIDATE | 423 XE_BO_FLAG_PINNED_NORESTORE); 424 if (IS_ERR(bo)) 425 return PTR_ERR(bo); 426 427 ads->bo = bo; 428 429 return 0; 430 } 431 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */ 432 433 /** 434 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load 435 * @ads: Additional data structures object 436 * 437 * Recalculate golden_lrc_size, capture_size and regset_size as the number 438 * hardware engines may have changed after the hwconfig was loaded. Also verify 439 * the new sizes fit in the already allocated ADS buffer object. 440 * 441 * Return: 0 on success, negative error code on error. 442 */ 443 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) 444 { 445 struct xe_gt *gt = ads_to_gt(ads); 446 u32 prev_regset_size = ads->regset_size; 447 448 xe_gt_assert(gt, ads->bo); 449 450 ads->golden_lrc_size = calculate_golden_lrc_size(ads); 451 /* Calculate Capture size with worst size */ 452 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads)); 453 ads->regset_size = calculate_regset_size(gt); 454 455 xe_gt_assert(gt, ads->golden_lrc_size + 456 (ads->regset_size - prev_regset_size) <= 457 MAX_GOLDEN_LRC_SIZE); 458 459 return 0; 460 } 461 462 static void guc_policies_init(struct xe_guc_ads *ads) 463 { 464 struct xe_device *xe = ads_to_xe(ads); 465 u32 global_flags = 0; 466 467 ads_blob_write(ads, policies.dpc_promote_time, 468 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); 469 ads_blob_write(ads, policies.max_num_work_items, 470 GLOBAL_POLICY_MAX_NUM_WI); 471 472 if (xe->wedged.mode == 2) 473 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 474 475 ads_blob_write(ads, policies.global_flags, global_flags); 476 ads_blob_write(ads, policies.is_valid, 1); 477 } 478 479 static void fill_engine_enable_masks(struct xe_gt *gt, 480 struct iosys_map *info_map) 481 { 482 struct xe_device *xe = gt_to_xe(gt); 483 484 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], 485 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); 486 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 487 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); 488 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], 489 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); 490 info_map_write(xe, info_map, 491 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], 492 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); 493 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], 494 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); 495 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS], 496 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); 497 } 498 499 /* 500 * Write the offsets corresponding to the golden LRCs. The actual data is 501 * populated later by guc_golden_lrc_populate() 502 */ 503 static void guc_golden_lrc_init(struct xe_guc_ads *ads) 504 { 505 struct xe_device *xe = ads_to_xe(ads); 506 struct xe_gt *gt = ads_to_gt(ads); 507 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 508 offsetof(struct __guc_ads_blob, system_info)); 509 size_t alloc_size, real_size; 510 u32 addr_ggtt, offset; 511 int class; 512 513 offset = guc_ads_golden_lrc_offset(ads); 514 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 515 516 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 517 u8 guc_class; 518 519 guc_class = xe_engine_class_to_guc_class(class); 520 521 if (!info_map_read(xe, &info_map, 522 engine_enabled_masks[guc_class])) 523 continue; 524 525 real_size = xe_gt_lrc_size(gt, class); 526 alloc_size = PAGE_ALIGN(real_size); 527 528 /* 529 * This interface is slightly confusing. We need to pass the 530 * base address of the full golden context and the size of just 531 * the engine state, which is the section of the context image 532 * that starts after the execlists LRC registers. This is 533 * required to allow the GuC to restore just the engine state 534 * when a watchdog reset occurs. 535 * We calculate the engine state size by removing the size of 536 * what comes before it in the context image (which is identical 537 * on all engines). 538 */ 539 ads_blob_write(ads, ads.eng_state_size[guc_class], 540 real_size - xe_lrc_skip_size(xe)); 541 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 542 addr_ggtt); 543 544 addr_ggtt += alloc_size; 545 } 546 } 547 548 static void guc_mapping_table_init_invalid(struct xe_gt *gt, 549 struct iosys_map *info_map) 550 { 551 struct xe_device *xe = gt_to_xe(gt); 552 unsigned int i, j; 553 554 /* Table must be set to invalid values for entries not used */ 555 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) 556 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) 557 info_map_write(xe, info_map, mapping_table[i][j], 558 GUC_MAX_INSTANCES_PER_CLASS); 559 } 560 561 static void guc_mapping_table_init(struct xe_gt *gt, 562 struct iosys_map *info_map) 563 { 564 struct xe_device *xe = gt_to_xe(gt); 565 struct xe_hw_engine *hwe; 566 enum xe_hw_engine_id id; 567 568 guc_mapping_table_init_invalid(gt, info_map); 569 570 for_each_hw_engine(hwe, gt, id) { 571 u8 guc_class; 572 573 guc_class = xe_engine_class_to_guc_class(hwe->class); 574 info_map_write(xe, info_map, 575 mapping_table[guc_class][hwe->logical_instance], 576 hwe->instance); 577 } 578 } 579 580 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map, 581 enum guc_capture_list_class_type capture_class) 582 { 583 struct xe_device *xe = gt_to_xe(gt); 584 u32 mask; 585 586 switch (capture_class) { 587 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE: 588 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]); 589 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]); 590 break; 591 case GUC_CAPTURE_LIST_CLASS_VIDEO: 592 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]); 593 break; 594 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE: 595 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]); 596 break; 597 case GUC_CAPTURE_LIST_CLASS_BLITTER: 598 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]); 599 break; 600 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER: 601 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]); 602 break; 603 default: 604 mask = 0; 605 } 606 607 return mask; 608 } 609 610 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt, 611 int owner, int type, int class, u32 *total_size, size_t *size, 612 void **pptr) 613 { 614 *size = 0; 615 616 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) { 617 if (*total_size + *size > ads->capture_size) 618 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n", 619 *total_size + *size, ads->capture_size); 620 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr)) 621 return false; 622 } 623 624 return true; 625 } 626 627 static int guc_capture_prep_lists(struct xe_guc_ads *ads) 628 { 629 struct xe_guc *guc = ads_to_guc(ads); 630 struct xe_gt *gt = ads_to_gt(ads); 631 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0; 632 struct iosys_map info_map; 633 size_t size = 0; 634 void *ptr; 635 int i, j; 636 637 /* 638 * GuC Capture's steered reg-list needs to be allocated and initialized 639 * after the GuC-hwconfig is available which guaranteed from here. 640 */ 641 xe_guc_capture_steered_list_init(ads_to_guc(ads)); 642 643 capture_offset = guc_ads_capture_offset(ads); 644 ads_ggtt = xe_bo_ggtt_addr(ads->bo); 645 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 646 offsetof(struct __guc_ads_blob, system_info)); 647 648 /* first, set aside the first page for a capture_list with zero descriptors */ 649 total_size = PAGE_SIZE; 650 if (!xe_guc_capture_getnullheader(guc, &ptr, &size)) 651 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size); 652 653 null_ggtt = ads_ggtt + capture_offset; 654 capture_offset += PAGE_SIZE; 655 656 /* 657 * Populate capture list : at this point adps is already allocated and 658 * mapped to worst case size 659 */ 660 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { 661 bool write_empty_list; 662 663 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) { 664 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j); 665 /* null list if we dont have said engine or list */ 666 if (!engine_mask) { 667 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt); 668 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt); 669 continue; 670 } 671 672 /* engine exists: start with engine-class registers */ 673 write_empty_list = get_capture_list(ads, guc, gt, i, 674 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS, 675 j, &total_size, &size, &ptr); 676 if (!write_empty_list) { 677 ads_blob_write(ads, ads.capture_class[i][j], 678 ads_ggtt + capture_offset); 679 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, 680 ptr, size); 681 total_size += size; 682 capture_offset += size; 683 } else { 684 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt); 685 } 686 687 /* engine exists: next, engine-instance registers */ 688 write_empty_list = get_capture_list(ads, guc, gt, i, 689 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE, 690 j, &total_size, &size, &ptr); 691 if (!write_empty_list) { 692 ads_blob_write(ads, ads.capture_instance[i][j], 693 ads_ggtt + capture_offset); 694 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, 695 ptr, size); 696 total_size += size; 697 capture_offset += size; 698 } else { 699 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt); 700 } 701 } 702 703 /* global registers is last in our PF/VF loops */ 704 write_empty_list = get_capture_list(ads, guc, gt, i, 705 GUC_STATE_CAPTURE_TYPE_GLOBAL, 706 0, &total_size, &size, &ptr); 707 if (!write_empty_list) { 708 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset); 709 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, 710 size); 711 total_size += size; 712 capture_offset += size; 713 } else { 714 ads_blob_write(ads, ads.capture_global[i], null_ggtt); 715 } 716 } 717 718 if (ads->capture_size != PAGE_ALIGN(total_size)) 719 xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n", 720 PAGE_ALIGN(total_size), ads->capture_size); 721 return PAGE_ALIGN(total_size); 722 } 723 724 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, 725 struct iosys_map *regset_map, 726 struct xe_reg reg, 727 unsigned int n_entry) 728 { 729 struct guc_mmio_reg entry = { 730 .offset = reg.addr, 731 .flags = reg.masked ? GUC_REGSET_MASKED : 0, 732 }; 733 734 if (reg.mcr) { 735 struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr); 736 u8 group, instance; 737 738 bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg, 739 &group, &instance); 740 741 if (steer) { 742 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group); 743 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance); 744 entry.flags |= GUC_REGSET_STEERING_NEEDED; 745 } 746 } 747 748 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), 749 &entry, sizeof(entry)); 750 } 751 752 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, 753 struct iosys_map *regset_map, 754 struct xe_hw_engine *hwe) 755 { 756 struct xe_hw_engine *hwe_rcs_reset_domain = 757 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); 758 struct xe_reg_sr_entry *entry; 759 unsigned long idx; 760 unsigned int count = 0; 761 const struct { 762 struct xe_reg reg; 763 bool skip; 764 } *e, extra_regs[] = { 765 { .reg = RING_MODE(hwe->mmio_base), }, 766 { .reg = RING_HWS_PGA(hwe->mmio_base), }, 767 { .reg = RING_IMR(hwe->mmio_base), }, 768 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain }, 769 { .reg = CCS_MODE, 770 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) }, 771 }; 772 u32 i; 773 774 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); 775 776 xa_for_each(&hwe->reg_sr.xa, idx, entry) 777 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++); 778 779 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { 780 if (e->skip) 781 continue; 782 783 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); 784 } 785 786 if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { 787 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { 788 guc_mmio_regset_write_one(ads, regset_map, 789 XELP_LNCFCMOCS(i), count++); 790 } 791 } 792 793 return count; 794 } 795 796 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) 797 { 798 size_t regset_offset = guc_ads_regset_offset(ads); 799 struct xe_gt *gt = ads_to_gt(ads); 800 struct xe_hw_engine *hwe; 801 enum xe_hw_engine_id id; 802 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; 803 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 804 regset_offset); 805 unsigned int regset_used = 0; 806 807 for_each_hw_engine(hwe, gt, id) { 808 unsigned int count; 809 u8 gc; 810 811 /* 812 * 1. Write all MMIO entries for this exec queue to the table. No 813 * need to worry about fused-off engines and when there are 814 * entries in the regset: the reg_state_list has been zero'ed 815 * by xe_guc_ads_populate() 816 */ 817 count = guc_mmio_regset_write(ads, ®set_map, hwe); 818 if (!count) 819 continue; 820 821 /* 822 * 2. Record in the header (ads.reg_state_list) the address 823 * location and number of entries 824 */ 825 gc = xe_engine_class_to_guc_class(hwe->class); 826 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); 827 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); 828 829 addr += count * sizeof(struct guc_mmio_reg); 830 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); 831 832 regset_used += count * sizeof(struct guc_mmio_reg); 833 } 834 835 xe_gt_assert(gt, regset_used <= ads->regset_size); 836 } 837 838 static void guc_um_init_params(struct xe_guc_ads *ads) 839 { 840 u32 um_queue_offset = guc_ads_um_queues_offset(ads); 841 u64 base_dpa; 842 u32 base_ggtt; 843 int i; 844 845 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; 846 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; 847 848 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { 849 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, 850 base_dpa + (i * GUC_UM_QUEUE_SIZE)); 851 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, 852 base_ggtt + (i * GUC_UM_QUEUE_SIZE)); 853 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, 854 GUC_UM_QUEUE_SIZE); 855 } 856 857 ads_blob_write(ads, um_init_params.page_response_timeout_in_us, 858 GUC_PAGE_RES_TIMEOUT_US); 859 } 860 861 static void guc_doorbell_init(struct xe_guc_ads *ads) 862 { 863 struct xe_device *xe = ads_to_xe(ads); 864 struct xe_gt *gt = ads_to_gt(ads); 865 866 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { 867 u32 distdbreg = 868 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED); 869 870 ads_blob_write(ads, 871 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], 872 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); 873 } 874 } 875 876 /** 877 * xe_guc_ads_populate_minimal - populate minimal ADS 878 * @ads: Additional data structures object 879 * 880 * This function populates a minimal ADS that does not support submissions but 881 * enough so the GuC can load and the hwconfig table can be read. 882 */ 883 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) 884 { 885 struct xe_gt *gt = ads_to_gt(ads); 886 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 887 offsetof(struct __guc_ads_blob, system_info)); 888 u32 base = xe_bo_ggtt_addr(ads->bo); 889 890 xe_gt_assert(gt, ads->bo); 891 892 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 893 guc_policies_init(ads); 894 guc_golden_lrc_init(ads); 895 guc_mapping_table_init_invalid(gt, &info_map); 896 guc_doorbell_init(ads); 897 898 ads_blob_write(ads, ads.scheduler_policies, base + 899 offsetof(struct __guc_ads_blob, policies)); 900 ads_blob_write(ads, ads.gt_system_info, base + 901 offsetof(struct __guc_ads_blob, system_info)); 902 ads_blob_write(ads, ads.private_data, base + 903 guc_ads_private_data_offset(ads)); 904 } 905 906 void xe_guc_ads_populate(struct xe_guc_ads *ads) 907 { 908 struct xe_device *xe = ads_to_xe(ads); 909 struct xe_gt *gt = ads_to_gt(ads); 910 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 911 offsetof(struct __guc_ads_blob, system_info)); 912 u32 base = xe_bo_ggtt_addr(ads->bo); 913 914 xe_gt_assert(gt, ads->bo); 915 916 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 917 guc_policies_init(ads); 918 fill_engine_enable_masks(gt, &info_map); 919 guc_mmio_reg_state_init(ads); 920 guc_golden_lrc_init(ads); 921 guc_mapping_table_init(gt, &info_map); 922 guc_capture_prep_lists(ads); 923 guc_doorbell_init(ads); 924 guc_waklv_init(ads); 925 926 if (xe->info.has_usm) { 927 guc_um_init_params(ads); 928 ads_blob_write(ads, ads.um_init_data, base + 929 offsetof(struct __guc_ads_blob, um_init_params)); 930 } 931 932 ads_blob_write(ads, ads.scheduler_policies, base + 933 offsetof(struct __guc_ads_blob, policies)); 934 ads_blob_write(ads, ads.gt_system_info, base + 935 offsetof(struct __guc_ads_blob, system_info)); 936 ads_blob_write(ads, ads.private_data, base + 937 guc_ads_private_data_offset(ads)); 938 } 939 940 /* 941 * After the golden LRC's are recorded for each engine class by the first 942 * submission, copy them to the ADS, as initialized earlier by 943 * guc_golden_lrc_init(). 944 */ 945 static void guc_golden_lrc_populate(struct xe_guc_ads *ads) 946 { 947 struct xe_device *xe = ads_to_xe(ads); 948 struct xe_gt *gt = ads_to_gt(ads); 949 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 950 offsetof(struct __guc_ads_blob, system_info)); 951 size_t total_size = 0, alloc_size, real_size; 952 u32 offset; 953 int class; 954 955 offset = guc_ads_golden_lrc_offset(ads); 956 957 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 958 u8 guc_class; 959 960 guc_class = xe_engine_class_to_guc_class(class); 961 962 if (!info_map_read(xe, &info_map, 963 engine_enabled_masks[guc_class])) 964 continue; 965 966 xe_gt_assert(gt, gt->default_lrc[class]); 967 968 real_size = xe_gt_lrc_size(gt, class); 969 alloc_size = PAGE_ALIGN(real_size); 970 total_size += alloc_size; 971 972 xe_map_memcpy_to(xe, ads_to_map(ads), offset, 973 gt->default_lrc[class], real_size); 974 975 offset += alloc_size; 976 } 977 978 xe_gt_assert(gt, total_size == ads->golden_lrc_size); 979 } 980 981 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 982 { 983 guc_golden_lrc_populate(ads); 984 } 985 986 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) 987 { 988 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct; 989 u32 action[] = { 990 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, 991 policy_offset 992 }; 993 994 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 995 } 996 997 /** 998 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy 999 * @ads: Additional data structures object 1000 * 1001 * This function update the GuC's engine reset policy based on wedged.mode. 1002 * 1003 * Return: 0 on success, and negative error code otherwise. 1004 */ 1005 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) 1006 { 1007 struct xe_device *xe = ads_to_xe(ads); 1008 struct xe_gt *gt = ads_to_gt(ads); 1009 struct xe_tile *tile = gt_to_tile(gt); 1010 struct guc_policies *policies; 1011 struct xe_bo *bo; 1012 int ret = 0; 1013 1014 policies = kmalloc(sizeof(*policies), GFP_KERNEL); 1015 if (!policies) 1016 return -ENOMEM; 1017 1018 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); 1019 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); 1020 policies->is_valid = 1; 1021 if (xe->wedged.mode == 2) 1022 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 1023 else 1024 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; 1025 1026 bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), 1027 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 1028 XE_BO_FLAG_GGTT); 1029 if (IS_ERR(bo)) { 1030 ret = PTR_ERR(bo); 1031 goto out; 1032 } 1033 1034 ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); 1035 out: 1036 kfree(policies); 1037 return ret; 1038 } 1039