1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_pagefault.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/circ_buf.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_managed.h> 13 14 #include "abi/guc_actions_abi.h" 15 #include "xe_bo.h" 16 #include "xe_gt.h" 17 #include "xe_gt_stats.h" 18 #include "xe_gt_tlb_invalidation.h" 19 #include "xe_guc.h" 20 #include "xe_guc_ct.h" 21 #include "xe_migrate.h" 22 #include "xe_svm.h" 23 #include "xe_trace_bo.h" 24 #include "xe_vm.h" 25 26 struct pagefault { 27 u64 page_addr; 28 u32 asid; 29 u16 pdata; 30 u8 vfid; 31 u8 access_type; 32 u8 fault_type; 33 u8 fault_level; 34 u8 engine_class; 35 u8 engine_instance; 36 u8 fault_unsuccessful; 37 bool trva_fault; 38 }; 39 40 enum access_type { 41 ACCESS_TYPE_READ = 0, 42 ACCESS_TYPE_WRITE = 1, 43 ACCESS_TYPE_ATOMIC = 2, 44 ACCESS_TYPE_RESERVED = 3, 45 }; 46 47 enum fault_type { 48 NOT_PRESENT = 0, 49 WRITE_ACCESS_VIOLATION = 1, 50 ATOMIC_ACCESS_VIOLATION = 2, 51 }; 52 53 struct acc { 54 u64 va_range_base; 55 u32 asid; 56 u32 sub_granularity; 57 u8 granularity; 58 u8 vfid; 59 u8 access_type; 60 u8 engine_class; 61 u8 engine_instance; 62 }; 63 64 static bool access_is_atomic(enum access_type access_type) 65 { 66 return access_type == ACCESS_TYPE_ATOMIC; 67 } 68 69 static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) 70 { 71 return BIT(tile->id) & vma->tile_present && 72 !(BIT(tile->id) & vma->tile_invalidated); 73 } 74 75 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 76 { 77 if (page_addr > xe_vma_end(vma) - 1 || 78 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 79 return false; 80 81 return true; 82 } 83 84 static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) 85 { 86 struct xe_vma *vma = NULL; 87 88 if (vm->usm.last_fault_vma) { /* Fast lookup */ 89 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 90 vma = vm->usm.last_fault_vma; 91 } 92 if (!vma) 93 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 94 95 return vma; 96 } 97 98 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, 99 bool atomic, unsigned int id) 100 { 101 struct xe_bo *bo = xe_vma_bo(vma); 102 struct xe_vm *vm = xe_vma_vm(vma); 103 int err; 104 105 err = xe_vm_lock_vma(exec, vma); 106 if (err) 107 return err; 108 109 if (atomic && IS_DGFX(vm->xe)) { 110 if (xe_vma_is_userptr(vma)) { 111 err = -EACCES; 112 return err; 113 } 114 115 /* Migrate to VRAM, move should invalidate the VMA first */ 116 err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); 117 if (err) 118 return err; 119 } else if (bo) { 120 /* Create backing store if needed */ 121 err = xe_bo_validate(bo, vm, true); 122 if (err) 123 return err; 124 } 125 126 return 0; 127 } 128 129 static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, 130 bool atomic) 131 { 132 struct xe_vm *vm = xe_vma_vm(vma); 133 struct xe_tile *tile = gt_to_tile(gt); 134 struct drm_exec exec; 135 struct dma_fence *fence; 136 ktime_t end = 0; 137 int err; 138 139 lockdep_assert_held_write(&vm->lock); 140 141 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); 142 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); 143 144 trace_xe_vma_pagefault(vma); 145 146 /* Check if VMA is valid */ 147 if (vma_is_valid(tile, vma) && !atomic) 148 return 0; 149 150 retry_userptr: 151 if (xe_vma_is_userptr(vma) && 152 xe_vma_userptr_check_repin(to_userptr_vma(vma))) { 153 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 154 155 err = xe_vma_userptr_pin_pages(uvma); 156 if (err) 157 return err; 158 } 159 160 /* Lock VM and BOs dma-resv */ 161 drm_exec_init(&exec, 0, 0); 162 drm_exec_until_all_locked(&exec) { 163 err = xe_pf_begin(&exec, vma, atomic, tile->id); 164 drm_exec_retry_on_contention(&exec); 165 if (xe_vm_validate_should_retry(&exec, err, &end)) 166 err = -EAGAIN; 167 if (err) 168 goto unlock_dma_resv; 169 170 /* Bind VMA only to the GT that has faulted */ 171 trace_xe_vma_pf_bind(vma); 172 fence = xe_vma_rebind(vm, vma, BIT(tile->id)); 173 if (IS_ERR(fence)) { 174 err = PTR_ERR(fence); 175 if (xe_vm_validate_should_retry(&exec, err, &end)) 176 err = -EAGAIN; 177 goto unlock_dma_resv; 178 } 179 } 180 181 dma_fence_wait(fence, false); 182 dma_fence_put(fence); 183 vma->tile_invalidated &= ~BIT(tile->id); 184 185 unlock_dma_resv: 186 drm_exec_fini(&exec); 187 if (err == -EAGAIN) 188 goto retry_userptr; 189 190 return err; 191 } 192 193 static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) 194 { 195 struct xe_vm *vm; 196 197 down_read(&xe->usm.lock); 198 vm = xa_load(&xe->usm.asid_to_vm, asid); 199 if (vm && xe_vm_in_fault_mode(vm)) 200 xe_vm_get(vm); 201 else 202 vm = ERR_PTR(-EINVAL); 203 up_read(&xe->usm.lock); 204 205 return vm; 206 } 207 208 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 209 { 210 struct xe_device *xe = gt_to_xe(gt); 211 struct xe_vm *vm; 212 struct xe_vma *vma = NULL; 213 int err; 214 bool atomic; 215 216 /* SW isn't expected to handle TRTT faults */ 217 if (pf->trva_fault) 218 return -EFAULT; 219 220 vm = asid_to_vm(xe, pf->asid); 221 if (IS_ERR(vm)) 222 return PTR_ERR(vm); 223 224 /* 225 * TODO: Change to read lock? Using write lock for simplicity. 226 */ 227 down_write(&vm->lock); 228 229 if (xe_vm_is_closed(vm)) { 230 err = -ENOENT; 231 goto unlock_vm; 232 } 233 234 vma = lookup_vma(vm, pf->page_addr); 235 if (!vma) { 236 err = -EINVAL; 237 goto unlock_vm; 238 } 239 240 atomic = access_is_atomic(pf->access_type); 241 242 if (xe_vma_is_cpu_addr_mirror(vma)) 243 err = xe_svm_handle_pagefault(vm, vma, gt, 244 pf->page_addr, atomic); 245 else 246 err = handle_vma_pagefault(gt, vma, atomic); 247 248 unlock_vm: 249 if (!err) 250 vm->usm.last_fault_vma = vma; 251 up_write(&vm->lock); 252 xe_vm_put(vm); 253 254 return err; 255 } 256 257 static int send_pagefault_reply(struct xe_guc *guc, 258 struct xe_guc_pagefault_reply *reply) 259 { 260 u32 action[] = { 261 XE_GUC_ACTION_PAGE_FAULT_RES_DESC, 262 reply->dw0, 263 reply->dw1, 264 }; 265 266 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 267 } 268 269 static void print_pagefault(struct xe_device *xe, struct pagefault *pf) 270 { 271 drm_dbg(&xe->drm, "\n\tASID: %d\n" 272 "\tVFID: %d\n" 273 "\tPDATA: 0x%04x\n" 274 "\tFaulted Address: 0x%08x%08x\n" 275 "\tFaultType: %d\n" 276 "\tAccessType: %d\n" 277 "\tFaultLevel: %d\n" 278 "\tEngineClass: %d %s\n" 279 "\tEngineInstance: %d\n", 280 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 281 lower_32_bits(pf->page_addr), 282 pf->fault_type, pf->access_type, pf->fault_level, 283 pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), 284 pf->engine_instance); 285 } 286 287 #define PF_MSG_LEN_DW 4 288 289 static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) 290 { 291 const struct xe_guc_pagefault_desc *desc; 292 bool ret = false; 293 294 spin_lock_irq(&pf_queue->lock); 295 if (pf_queue->tail != pf_queue->head) { 296 desc = (const struct xe_guc_pagefault_desc *) 297 (pf_queue->data + pf_queue->tail); 298 299 pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); 300 pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); 301 pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); 302 pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); 303 pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << 304 PFD_PDATA_HI_SHIFT; 305 pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); 306 pf->asid = FIELD_GET(PFD_ASID, desc->dw1); 307 pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); 308 pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); 309 pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); 310 pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << 311 PFD_VIRTUAL_ADDR_HI_SHIFT; 312 pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << 313 PFD_VIRTUAL_ADDR_LO_SHIFT; 314 315 pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % 316 pf_queue->num_dw; 317 ret = true; 318 } 319 spin_unlock_irq(&pf_queue->lock); 320 321 return ret; 322 } 323 324 static bool pf_queue_full(struct pf_queue *pf_queue) 325 { 326 lockdep_assert_held(&pf_queue->lock); 327 328 return CIRC_SPACE(pf_queue->head, pf_queue->tail, 329 pf_queue->num_dw) <= 330 PF_MSG_LEN_DW; 331 } 332 333 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 334 { 335 struct xe_gt *gt = guc_to_gt(guc); 336 struct xe_device *xe = gt_to_xe(gt); 337 struct pf_queue *pf_queue; 338 unsigned long flags; 339 u32 asid; 340 bool full; 341 342 if (unlikely(len != PF_MSG_LEN_DW)) 343 return -EPROTO; 344 345 asid = FIELD_GET(PFD_ASID, msg[1]); 346 pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); 347 348 /* 349 * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 350 */ 351 xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); 352 353 spin_lock_irqsave(&pf_queue->lock, flags); 354 full = pf_queue_full(pf_queue); 355 if (!full) { 356 memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); 357 pf_queue->head = (pf_queue->head + len) % 358 pf_queue->num_dw; 359 queue_work(gt->usm.pf_wq, &pf_queue->worker); 360 } else { 361 drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); 362 } 363 spin_unlock_irqrestore(&pf_queue->lock, flags); 364 365 return full ? -ENOSPC : 0; 366 } 367 368 #define USM_QUEUE_MAX_RUNTIME_MS 20 369 370 static void pf_queue_work_func(struct work_struct *w) 371 { 372 struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 373 struct xe_gt *gt = pf_queue->gt; 374 struct xe_device *xe = gt_to_xe(gt); 375 struct xe_guc_pagefault_reply reply = {}; 376 struct pagefault pf = {}; 377 unsigned long threshold; 378 int ret; 379 380 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 381 382 while (get_pagefault(pf_queue, &pf)) { 383 ret = handle_pagefault(gt, &pf); 384 if (unlikely(ret)) { 385 print_pagefault(xe, &pf); 386 pf.fault_unsuccessful = 1; 387 drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); 388 } 389 390 reply.dw0 = FIELD_PREP(PFR_VALID, 1) | 391 FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | 392 FIELD_PREP(PFR_REPLY, PFR_ACCESS) | 393 FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | 394 FIELD_PREP(PFR_ASID, pf.asid); 395 396 reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | 397 FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | 398 FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | 399 FIELD_PREP(PFR_PDATA, pf.pdata); 400 401 send_pagefault_reply(>->uc.guc, &reply); 402 403 if (time_after(jiffies, threshold) && 404 pf_queue->tail != pf_queue->head) { 405 queue_work(gt->usm.pf_wq, w); 406 break; 407 } 408 } 409 } 410 411 static void acc_queue_work_func(struct work_struct *w); 412 413 static void pagefault_fini(void *arg) 414 { 415 struct xe_gt *gt = arg; 416 struct xe_device *xe = gt_to_xe(gt); 417 418 if (!xe->info.has_usm) 419 return; 420 421 destroy_workqueue(gt->usm.acc_wq); 422 destroy_workqueue(gt->usm.pf_wq); 423 } 424 425 static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) 426 { 427 struct xe_device *xe = gt_to_xe(gt); 428 xe_dss_mask_t all_dss; 429 int num_dss, num_eus; 430 431 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 432 XE_MAX_DSS_FUSE_BITS); 433 434 num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); 435 num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, 436 XE_MAX_EU_FUSE_BITS) * num_dss; 437 438 /* 439 * user can issue separate page faults per EU and per CS 440 * 441 * XXX: Multiplier required as compute UMD are getting PF queue errors 442 * without it. Follow on why this multiplier is required. 443 */ 444 #define PF_MULTIPLIER 8 445 pf_queue->num_dw = 446 (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; 447 #undef PF_MULTIPLIER 448 449 pf_queue->gt = gt; 450 pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, 451 sizeof(u32), GFP_KERNEL); 452 if (!pf_queue->data) 453 return -ENOMEM; 454 455 spin_lock_init(&pf_queue->lock); 456 INIT_WORK(&pf_queue->worker, pf_queue_work_func); 457 458 return 0; 459 } 460 461 int xe_gt_pagefault_init(struct xe_gt *gt) 462 { 463 struct xe_device *xe = gt_to_xe(gt); 464 int i, ret = 0; 465 466 if (!xe->info.has_usm) 467 return 0; 468 469 for (i = 0; i < NUM_PF_QUEUE; ++i) { 470 ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); 471 if (ret) 472 return ret; 473 } 474 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 475 gt->usm.acc_queue[i].gt = gt; 476 spin_lock_init(>->usm.acc_queue[i].lock); 477 INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); 478 } 479 480 gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", 481 WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); 482 if (!gt->usm.pf_wq) 483 return -ENOMEM; 484 485 gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", 486 WQ_UNBOUND | WQ_HIGHPRI, 487 NUM_ACC_QUEUE); 488 if (!gt->usm.acc_wq) { 489 destroy_workqueue(gt->usm.pf_wq); 490 return -ENOMEM; 491 } 492 493 return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); 494 } 495 496 void xe_gt_pagefault_reset(struct xe_gt *gt) 497 { 498 struct xe_device *xe = gt_to_xe(gt); 499 int i; 500 501 if (!xe->info.has_usm) 502 return; 503 504 for (i = 0; i < NUM_PF_QUEUE; ++i) { 505 spin_lock_irq(>->usm.pf_queue[i].lock); 506 gt->usm.pf_queue[i].head = 0; 507 gt->usm.pf_queue[i].tail = 0; 508 spin_unlock_irq(>->usm.pf_queue[i].lock); 509 } 510 511 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 512 spin_lock(>->usm.acc_queue[i].lock); 513 gt->usm.acc_queue[i].head = 0; 514 gt->usm.acc_queue[i].tail = 0; 515 spin_unlock(>->usm.acc_queue[i].lock); 516 } 517 } 518 519 static int granularity_in_byte(int val) 520 { 521 switch (val) { 522 case 0: 523 return SZ_128K; 524 case 1: 525 return SZ_2M; 526 case 2: 527 return SZ_16M; 528 case 3: 529 return SZ_64M; 530 default: 531 return 0; 532 } 533 } 534 535 static int sub_granularity_in_byte(int val) 536 { 537 return (granularity_in_byte(val) / 32); 538 } 539 540 static void print_acc(struct xe_device *xe, struct acc *acc) 541 { 542 drm_warn(&xe->drm, "Access counter request:\n" 543 "\tType: %s\n" 544 "\tASID: %d\n" 545 "\tVFID: %d\n" 546 "\tEngine: %d:%d\n" 547 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 548 "\tSub_Granularity Vector: 0x%08x\n" 549 "\tVA Range base: 0x%016llx\n", 550 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 551 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 552 granularity_in_byte(acc->granularity) / SZ_1K, 553 sub_granularity_in_byte(acc->granularity) / SZ_1K, 554 acc->sub_granularity, acc->va_range_base); 555 } 556 557 static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) 558 { 559 u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * 560 sub_granularity_in_byte(acc->granularity); 561 562 return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); 563 } 564 565 static int handle_acc(struct xe_gt *gt, struct acc *acc) 566 { 567 struct xe_device *xe = gt_to_xe(gt); 568 struct xe_tile *tile = gt_to_tile(gt); 569 struct drm_exec exec; 570 struct xe_vm *vm; 571 struct xe_vma *vma; 572 int ret = 0; 573 574 /* We only support ACC_TRIGGER at the moment */ 575 if (acc->access_type != ACC_TRIGGER) 576 return -EINVAL; 577 578 vm = asid_to_vm(xe, acc->asid); 579 if (IS_ERR(vm)) 580 return PTR_ERR(vm); 581 582 down_read(&vm->lock); 583 584 /* Lookup VMA */ 585 vma = get_acc_vma(vm, acc); 586 if (!vma) { 587 ret = -EINVAL; 588 goto unlock_vm; 589 } 590 591 trace_xe_vma_acc(vma); 592 593 /* Userptr or null can't be migrated, nothing to do */ 594 if (xe_vma_has_no_bo(vma)) 595 goto unlock_vm; 596 597 /* Lock VM and BOs dma-resv */ 598 drm_exec_init(&exec, 0, 0); 599 drm_exec_until_all_locked(&exec) { 600 ret = xe_pf_begin(&exec, vma, true, tile->id); 601 drm_exec_retry_on_contention(&exec); 602 if (ret) 603 break; 604 } 605 606 drm_exec_fini(&exec); 607 unlock_vm: 608 up_read(&vm->lock); 609 xe_vm_put(vm); 610 611 return ret; 612 } 613 614 #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) 615 616 #define ACC_MSG_LEN_DW 4 617 618 static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) 619 { 620 const struct xe_guc_acc_desc *desc; 621 bool ret = false; 622 623 spin_lock(&acc_queue->lock); 624 if (acc_queue->tail != acc_queue->head) { 625 desc = (const struct xe_guc_acc_desc *) 626 (acc_queue->data + acc_queue->tail); 627 628 acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); 629 acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | 630 FIELD_GET(ACC_SUBG_LO, desc->dw0); 631 acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); 632 acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); 633 acc->asid = FIELD_GET(ACC_ASID, desc->dw1); 634 acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); 635 acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); 636 acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, 637 desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); 638 639 acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % 640 ACC_QUEUE_NUM_DW; 641 ret = true; 642 } 643 spin_unlock(&acc_queue->lock); 644 645 return ret; 646 } 647 648 static void acc_queue_work_func(struct work_struct *w) 649 { 650 struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 651 struct xe_gt *gt = acc_queue->gt; 652 struct xe_device *xe = gt_to_xe(gt); 653 struct acc acc = {}; 654 unsigned long threshold; 655 int ret; 656 657 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 658 659 while (get_acc(acc_queue, &acc)) { 660 ret = handle_acc(gt, &acc); 661 if (unlikely(ret)) { 662 print_acc(xe, &acc); 663 drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); 664 } 665 666 if (time_after(jiffies, threshold) && 667 acc_queue->tail != acc_queue->head) { 668 queue_work(gt->usm.acc_wq, w); 669 break; 670 } 671 } 672 } 673 674 static bool acc_queue_full(struct acc_queue *acc_queue) 675 { 676 lockdep_assert_held(&acc_queue->lock); 677 678 return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= 679 ACC_MSG_LEN_DW; 680 } 681 682 int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) 683 { 684 struct xe_gt *gt = guc_to_gt(guc); 685 struct acc_queue *acc_queue; 686 u32 asid; 687 bool full; 688 689 /* 690 * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 691 */ 692 BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); 693 694 if (unlikely(len != ACC_MSG_LEN_DW)) 695 return -EPROTO; 696 697 asid = FIELD_GET(ACC_ASID, msg[1]); 698 acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; 699 700 spin_lock(&acc_queue->lock); 701 full = acc_queue_full(acc_queue); 702 if (!full) { 703 memcpy(acc_queue->data + acc_queue->head, msg, 704 len * sizeof(u32)); 705 acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; 706 queue_work(gt->usm.acc_wq, &acc_queue->worker); 707 } else { 708 drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); 709 } 710 spin_unlock(&acc_queue->lock); 711 712 return full ? -ENOSPC : 0; 713 } 714