1 // SPDX-License-Identifier: GPL-2.0 AND MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <kunit/test.h> 7 #include <kunit/visibility.h> 8 9 #include "tests/xe_kunit_helpers.h" 10 #include "tests/xe_pci_test.h" 11 #include "tests/xe_test.h" 12 13 #include "xe_bo_evict.h" 14 #include "xe_pci.h" 15 #include "xe_pm.h" 16 17 static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, 18 bool clear, u64 get_val, u64 assign_val, 19 struct kunit *test) 20 { 21 struct dma_fence *fence; 22 struct ttm_tt *ttm; 23 struct page *page; 24 pgoff_t ccs_page; 25 long timeout; 26 u64 *cpu_map; 27 int ret; 28 u32 offset; 29 30 /* Move bo to VRAM if not already there. */ 31 ret = xe_bo_validate(bo, NULL, false); 32 if (ret) { 33 KUNIT_FAIL(test, "Failed to validate bo.\n"); 34 return ret; 35 } 36 37 /* Optionally clear bo *and* CCS data in VRAM. */ 38 if (clear) { 39 fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); 40 if (IS_ERR(fence)) { 41 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 42 return PTR_ERR(fence); 43 } 44 dma_fence_put(fence); 45 } 46 47 /* Evict to system. CCS data should be copied. */ 48 ret = xe_bo_evict(bo, true); 49 if (ret) { 50 KUNIT_FAIL(test, "Failed to evict bo.\n"); 51 return ret; 52 } 53 54 /* Sync all migration blits */ 55 timeout = dma_resv_wait_timeout(bo->ttm.base.resv, 56 DMA_RESV_USAGE_KERNEL, 57 true, 58 5 * HZ); 59 if (timeout <= 0) { 60 KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); 61 return -ETIME; 62 } 63 64 /* 65 * Bo with CCS data is now in system memory. Verify backing store 66 * and data integrity. Then assign for the next testing round while 67 * we still have a CPU map. 68 */ 69 ttm = bo->ttm.ttm; 70 if (!ttm || !ttm_tt_is_populated(ttm)) { 71 KUNIT_FAIL(test, "Bo was not in expected placement.\n"); 72 return -EINVAL; 73 } 74 75 ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; 76 if (ccs_page >= ttm->num_pages) { 77 KUNIT_FAIL(test, "No TTM CCS pages present.\n"); 78 return -EINVAL; 79 } 80 81 page = ttm->pages[ccs_page]; 82 cpu_map = kmap_local_page(page); 83 84 /* Check first CCS value */ 85 if (cpu_map[0] != get_val) { 86 KUNIT_FAIL(test, 87 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 88 (unsigned long long)get_val, 89 (unsigned long long)cpu_map[0]); 90 ret = -EINVAL; 91 } 92 93 /* Check last CCS value, or at least last value in page. */ 94 offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); 95 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 96 if (cpu_map[offset] != get_val) { 97 KUNIT_FAIL(test, 98 "Expected CCS readout 0x%016llx, got 0x%016llx.\n", 99 (unsigned long long)get_val, 100 (unsigned long long)cpu_map[offset]); 101 ret = -EINVAL; 102 } 103 104 cpu_map[0] = assign_val; 105 cpu_map[offset] = assign_val; 106 kunmap_local(cpu_map); 107 108 return ret; 109 } 110 111 static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, 112 struct kunit *test) 113 { 114 struct xe_bo *bo; 115 116 int ret; 117 118 /* TODO: Sanity check */ 119 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 120 121 if (IS_DGFX(xe)) 122 kunit_info(test, "Testing vram id %u\n", tile->id); 123 else 124 kunit_info(test, "Testing system memory\n"); 125 126 bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, 127 ttm_bo_type_device, bo_flags); 128 if (IS_ERR(bo)) { 129 KUNIT_FAIL(test, "Failed to create bo.\n"); 130 return; 131 } 132 133 xe_bo_lock(bo, false); 134 135 kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); 136 ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, 137 test); 138 if (ret) 139 goto out_unlock; 140 141 kunit_info(test, "Verifying that CCS data survives migration.\n"); 142 ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, 143 0xdeadbeefdeadbeefULL, test); 144 if (ret) 145 goto out_unlock; 146 147 kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); 148 ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); 149 150 out_unlock: 151 xe_bo_unlock(bo); 152 xe_bo_put(bo); 153 } 154 155 static int ccs_test_run_device(struct xe_device *xe) 156 { 157 struct kunit *test = kunit_get_current_test(); 158 struct xe_tile *tile; 159 int id; 160 161 if (!xe_device_has_flat_ccs(xe)) { 162 kunit_skip(test, "non-flat-ccs device\n"); 163 return 0; 164 } 165 166 /* For xe2+ dgfx, we don't handle ccs metadata */ 167 if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) { 168 kunit_skip(test, "xe2+ dgfx device\n"); 169 return 0; 170 } 171 172 xe_pm_runtime_get(xe); 173 174 for_each_tile(tile, xe, id) { 175 /* For igfx run only for primary tile */ 176 if (!IS_DGFX(xe) && id > 0) 177 continue; 178 ccs_test_run_tile(xe, tile, test); 179 } 180 181 xe_pm_runtime_put(xe); 182 183 return 0; 184 } 185 186 static void xe_ccs_migrate_kunit(struct kunit *test) 187 { 188 struct xe_device *xe = test->priv; 189 190 ccs_test_run_device(xe); 191 } 192 193 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) 194 { 195 struct xe_bo *bo, *external; 196 unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); 197 struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); 198 struct xe_gt *__gt; 199 int err, i, id; 200 201 kunit_info(test, "Testing device %s vram id %u\n", 202 dev_name(xe->drm.dev), tile->id); 203 204 for (i = 0; i < 2; ++i) { 205 xe_vm_lock(vm, false); 206 bo = xe_bo_create_user(xe, NULL, vm, 0x10000, 207 DRM_XE_GEM_CPU_CACHING_WC, 208 ttm_bo_type_device, 209 bo_flags); 210 xe_vm_unlock(vm); 211 if (IS_ERR(bo)) { 212 KUNIT_FAIL(test, "bo create err=%pe\n", bo); 213 break; 214 } 215 216 external = xe_bo_create_user(xe, NULL, NULL, 0x10000, 217 DRM_XE_GEM_CPU_CACHING_WC, 218 ttm_bo_type_device, bo_flags); 219 if (IS_ERR(external)) { 220 KUNIT_FAIL(test, "external bo create err=%pe\n", external); 221 goto cleanup_bo; 222 } 223 224 xe_bo_lock(external, false); 225 err = xe_bo_pin_external(external); 226 xe_bo_unlock(external); 227 if (err) { 228 KUNIT_FAIL(test, "external bo pin err=%pe\n", 229 ERR_PTR(err)); 230 goto cleanup_external; 231 } 232 233 err = xe_bo_evict_all(xe); 234 if (err) { 235 KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); 236 goto cleanup_all; 237 } 238 239 for_each_gt(__gt, xe, id) 240 xe_gt_sanitize(__gt); 241 err = xe_bo_restore_kernel(xe); 242 /* 243 * Snapshotting the CTB and copying back a potentially old 244 * version seems risky, depending on what might have been 245 * inflight. Also it seems snapshotting the ADS object and 246 * copying back results in serious breakage. Normally when 247 * calling xe_bo_restore_kernel() we always fully restart the 248 * GT, which re-intializes such things. We could potentially 249 * skip saving and restoring such objects in xe_bo_evict_all() 250 * however seems quite fragile not to also restart the GT. Try 251 * to do that here by triggering a GT reset. 252 */ 253 for_each_gt(__gt, xe, id) { 254 xe_gt_reset_async(__gt); 255 flush_work(&__gt->reset.worker); 256 } 257 if (err) { 258 KUNIT_FAIL(test, "restore kernel err=%pe\n", 259 ERR_PTR(err)); 260 goto cleanup_all; 261 } 262 263 err = xe_bo_restore_user(xe); 264 if (err) { 265 KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); 266 goto cleanup_all; 267 } 268 269 if (!xe_bo_is_vram(external)) { 270 KUNIT_FAIL(test, "external bo is not vram\n"); 271 err = -EPROTO; 272 goto cleanup_all; 273 } 274 275 if (xe_bo_is_vram(bo)) { 276 KUNIT_FAIL(test, "bo is vram\n"); 277 err = -EPROTO; 278 goto cleanup_all; 279 } 280 281 if (i) { 282 down_read(&vm->lock); 283 xe_vm_lock(vm, false); 284 err = xe_bo_validate(bo, bo->vm, false); 285 xe_vm_unlock(vm); 286 up_read(&vm->lock); 287 if (err) { 288 KUNIT_FAIL(test, "bo valid err=%pe\n", 289 ERR_PTR(err)); 290 goto cleanup_all; 291 } 292 xe_bo_lock(external, false); 293 err = xe_bo_validate(external, NULL, false); 294 xe_bo_unlock(external); 295 if (err) { 296 KUNIT_FAIL(test, "external bo valid err=%pe\n", 297 ERR_PTR(err)); 298 goto cleanup_all; 299 } 300 } 301 302 xe_bo_lock(external, false); 303 xe_bo_unpin_external(external); 304 xe_bo_unlock(external); 305 306 xe_bo_put(external); 307 308 xe_bo_lock(bo, false); 309 __xe_bo_unset_bulk_move(bo); 310 xe_bo_unlock(bo); 311 xe_bo_put(bo); 312 continue; 313 314 cleanup_all: 315 xe_bo_lock(external, false); 316 xe_bo_unpin_external(external); 317 xe_bo_unlock(external); 318 cleanup_external: 319 xe_bo_put(external); 320 cleanup_bo: 321 xe_bo_lock(bo, false); 322 __xe_bo_unset_bulk_move(bo); 323 xe_bo_unlock(bo); 324 xe_bo_put(bo); 325 break; 326 } 327 328 xe_vm_put(vm); 329 330 return 0; 331 } 332 333 static int evict_test_run_device(struct xe_device *xe) 334 { 335 struct kunit *test = kunit_get_current_test(); 336 struct xe_tile *tile; 337 int id; 338 339 if (!IS_DGFX(xe)) { 340 kunit_skip(test, "non-discrete device\n"); 341 return 0; 342 } 343 344 xe_pm_runtime_get(xe); 345 346 for_each_tile(tile, xe, id) 347 evict_test_run_tile(xe, tile, test); 348 349 xe_pm_runtime_put(xe); 350 351 return 0; 352 } 353 354 static void xe_bo_evict_kunit(struct kunit *test) 355 { 356 struct xe_device *xe = test->priv; 357 358 evict_test_run_device(xe); 359 } 360 361 static struct kunit_case xe_bo_tests[] = { 362 KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), 363 KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), 364 {} 365 }; 366 367 VISIBLE_IF_KUNIT 368 struct kunit_suite xe_bo_test_suite = { 369 .name = "xe_bo", 370 .test_cases = xe_bo_tests, 371 .init = xe_kunit_helper_xe_device_live_test_init, 372 }; 373 EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite); 374