1 // SPDX-License-Identifier: MIT 2 3 #include "nouveau_drv.h" 4 #include "nouveau_gem.h" 5 #include "nouveau_mem.h" 6 #include "nouveau_dma.h" 7 #include "nouveau_exec.h" 8 #include "nouveau_abi16.h" 9 #include "nouveau_chan.h" 10 #include "nouveau_sched.h" 11 #include "nouveau_uvmm.h" 12 13 #include <nvif/class.h> 14 15 /** 16 * DOC: Overview 17 * 18 * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT, 19 * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC. 20 * 21 * In order to use the UAPI firstly a user client must initialize the VA space 22 * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space 23 * should be managed by the kernel and which by the UMD. 24 * 25 * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the 26 * userspace-managable portion of the VA space. It provides operations to map 27 * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not 28 * backed by a GEM object and the kernel will ignore GEM handles provided 29 * alongside a sparse mapping. 30 * 31 * Userspace may request memory backed mappings either within or outside of the 32 * bounds (but not crossing those bounds) of a previously mapped sparse 33 * mapping. Subsequently requested memory backed mappings within a sparse 34 * mapping will take precedence over the corresponding range of the sparse 35 * mapping. If such memory backed mappings are unmapped the kernel will make 36 * sure that the corresponding sparse mapping will take their place again. 37 * Requests to unmap a sparse mapping that still contains memory backed mappings 38 * will result in those memory backed mappings being unmapped first. 39 * 40 * Unmap requests are not bound to the range of existing mappings and can even 41 * overlap the bounds of sparse mappings. For such a request the kernel will 42 * make sure to unmap all memory backed mappings within the given range, 43 * splitting up memory backed mappings which are only partially contained 44 * within the given range. Unmap requests with the sparse flag set must match 45 * the range of a previously mapped sparse mapping exactly though. 46 * 47 * While the kernel generally permits arbitrary sequences and ranges of memory 48 * backed mappings being mapped and unmapped, either within a single or multiple 49 * VM_BIND ioctl calls, there are some restrictions for sparse mappings. 50 * 51 * The kernel does not permit to: 52 * - unmap non-existent sparse mappings 53 * - unmap a sparse mapping and map a new sparse mapping overlapping the range 54 * of the previously unmapped sparse mapping within the same VM_BIND ioctl 55 * - unmap a sparse mapping and map new memory backed mappings overlapping the 56 * range of the previously unmapped sparse mapping within the same VM_BIND 57 * ioctl 58 * 59 * When using the VM_BIND ioctl to request the kernel to map memory to a given 60 * virtual address in the GPU's VA space there is no guarantee that the actual 61 * mappings are created in the GPU's MMU. If the given memory is swapped out 62 * at the time the bind operation is executed the kernel will stash the mapping 63 * details into it's internal alloctor and create the actual MMU mappings once 64 * the memory is swapped back in. While this is transparent for userspace, it is 65 * guaranteed that all the backing memory is swapped back in and all the memory 66 * mappings, as requested by userspace previously, are actually mapped once the 67 * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. 68 * 69 * A VM_BIND job can be executed either synchronously or asynchronously. If 70 * exectued asynchronously, userspace may provide a list of syncobjs this job 71 * will wait for and/or a list of syncobj the kernel will signal once the 72 * VM_BIND job finished execution. If executed synchronously the ioctl will 73 * block until the bind job is finished. For synchronous jobs the kernel will 74 * not permit any syncobjs submitted to the kernel. 75 * 76 * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC 77 * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide 78 * the option to synchronize them with syncobjs. 79 * 80 * Besides that, EXEC jobs can be scheduled for a specified channel to execute on. 81 * 82 * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have 83 * an up to date view of the VA space. However, the actual mappings might still 84 * be pending. Hence, EXEC jobs require to have the particular fences - of 85 * the corresponding VM_BIND jobs they depent on - attached to them. 86 */ 87 88 static int 89 nouveau_exec_job_submit(struct nouveau_job *job, 90 struct drm_gpuvm_exec *vme) 91 { 92 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 93 struct nouveau_cli *cli = job->cli; 94 struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); 95 int ret; 96 97 /* Create a new fence, but do not emit yet. */ 98 ret = nouveau_fence_create(&exec_job->fence, exec_job->chan); 99 if (ret) 100 return ret; 101 102 nouveau_uvmm_lock(uvmm); 103 ret = drm_gpuvm_exec_lock(vme); 104 if (ret) { 105 nouveau_uvmm_unlock(uvmm); 106 return ret; 107 } 108 nouveau_uvmm_unlock(uvmm); 109 110 ret = drm_gpuvm_exec_validate(vme); 111 if (ret) { 112 drm_gpuvm_exec_unlock(vme); 113 return ret; 114 } 115 116 return 0; 117 } 118 119 static void 120 nouveau_exec_job_armed_submit(struct nouveau_job *job, 121 struct drm_gpuvm_exec *vme) 122 { 123 drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, 124 job->resv_usage, job->resv_usage); 125 drm_gpuvm_exec_unlock(vme); 126 } 127 128 static struct dma_fence * 129 nouveau_exec_job_run(struct nouveau_job *job) 130 { 131 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 132 struct nouveau_channel *chan = exec_job->chan; 133 struct nouveau_fence *fence = exec_job->fence; 134 int i, ret; 135 136 ret = nvif_chan_gpfifo_wait(&chan->chan, exec_job->push.count + 1, 16); 137 if (ret) { 138 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret); 139 return ERR_PTR(ret); 140 } 141 142 for (i = 0; i < exec_job->push.count; i++) { 143 struct drm_nouveau_exec_push *p = &exec_job->push.s[i]; 144 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH; 145 146 nvif_chan_gpfifo_push(&chan->chan, p->va, p->va_len, no_prefetch); 147 } 148 149 nvif_chan_gpfifo_post(&chan->chan); 150 151 ret = nouveau_fence_emit(fence); 152 if (ret) { 153 nouveau_fence_unref(&exec_job->fence); 154 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret); 155 WIND_RING(chan); 156 return ERR_PTR(ret); 157 } 158 159 /* The fence was emitted successfully, set the job's fence pointer to 160 * NULL in order to avoid freeing it up when the job is cleaned up. 161 */ 162 exec_job->fence = NULL; 163 164 return &fence->base; 165 } 166 167 static void 168 nouveau_exec_job_free(struct nouveau_job *job) 169 { 170 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 171 172 nouveau_job_done(job); 173 nouveau_job_free(job); 174 175 kfree(exec_job->fence); 176 kfree(exec_job->push.s); 177 kfree(exec_job); 178 } 179 180 static enum drm_gpu_sched_stat 181 nouveau_exec_job_timeout(struct nouveau_job *job) 182 { 183 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 184 struct nouveau_channel *chan = exec_job->chan; 185 186 if (unlikely(!atomic_read(&chan->killed))) 187 nouveau_channel_kill(chan); 188 189 NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", 190 chan->chid); 191 192 return DRM_GPU_SCHED_STAT_NOMINAL; 193 } 194 195 static const struct nouveau_job_ops nouveau_exec_job_ops = { 196 .submit = nouveau_exec_job_submit, 197 .armed_submit = nouveau_exec_job_armed_submit, 198 .run = nouveau_exec_job_run, 199 .free = nouveau_exec_job_free, 200 .timeout = nouveau_exec_job_timeout, 201 }; 202 203 int 204 nouveau_exec_job_init(struct nouveau_exec_job **pjob, 205 struct nouveau_exec_job_args *__args) 206 { 207 struct nouveau_exec_job *job; 208 struct nouveau_job_args args = {}; 209 int i, ret; 210 211 for (i = 0; i < __args->push.count; i++) { 212 struct drm_nouveau_exec_push *p = &__args->push.s[i]; 213 214 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) { 215 NV_PRINTK(err, nouveau_cli(__args->file_priv), 216 "pushbuf size exceeds limit: 0x%x max 0x%x\n", 217 p->va_len, NV50_DMA_PUSH_MAX_LENGTH); 218 return -EINVAL; 219 } 220 } 221 222 job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL); 223 if (!job) 224 return -ENOMEM; 225 226 job->push.count = __args->push.count; 227 if (__args->push.count) { 228 job->push.s = kmemdup(__args->push.s, 229 sizeof(*__args->push.s) * 230 __args->push.count, 231 GFP_KERNEL); 232 if (!job->push.s) { 233 ret = -ENOMEM; 234 goto err_free_job; 235 } 236 } 237 238 args.file_priv = __args->file_priv; 239 job->chan = __args->chan; 240 241 args.sched = __args->sched; 242 /* Plus one to account for the HW fence. */ 243 args.credits = job->push.count + 1; 244 245 args.in_sync.count = __args->in_sync.count; 246 args.in_sync.s = __args->in_sync.s; 247 248 args.out_sync.count = __args->out_sync.count; 249 args.out_sync.s = __args->out_sync.s; 250 251 args.ops = &nouveau_exec_job_ops; 252 args.resv_usage = DMA_RESV_USAGE_WRITE; 253 254 ret = nouveau_job_init(&job->base, &args); 255 if (ret) 256 goto err_free_pushs; 257 258 return 0; 259 260 err_free_pushs: 261 kfree(job->push.s); 262 err_free_job: 263 kfree(job); 264 *pjob = NULL; 265 266 return ret; 267 } 268 269 static int 270 nouveau_exec(struct nouveau_exec_job_args *args) 271 { 272 struct nouveau_exec_job *job; 273 int ret; 274 275 ret = nouveau_exec_job_init(&job, args); 276 if (ret) 277 return ret; 278 279 ret = nouveau_job_submit(&job->base); 280 if (ret) 281 goto err_job_fini; 282 283 return 0; 284 285 err_job_fini: 286 nouveau_job_fini(&job->base); 287 return ret; 288 } 289 290 static int 291 nouveau_exec_ucopy(struct nouveau_exec_job_args *args, 292 struct drm_nouveau_exec *req) 293 { 294 struct drm_nouveau_sync **s; 295 u32 inc = req->wait_count; 296 u64 ins = req->wait_ptr; 297 u32 outc = req->sig_count; 298 u64 outs = req->sig_ptr; 299 u32 pushc = req->push_count; 300 u64 pushs = req->push_ptr; 301 int ret; 302 303 if (pushc) { 304 args->push.count = pushc; 305 args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s)); 306 if (IS_ERR(args->push.s)) 307 return PTR_ERR(args->push.s); 308 } 309 310 if (inc) { 311 s = &args->in_sync.s; 312 313 args->in_sync.count = inc; 314 *s = u_memcpya(ins, inc, sizeof(**s)); 315 if (IS_ERR(*s)) { 316 ret = PTR_ERR(*s); 317 goto err_free_pushs; 318 } 319 } 320 321 if (outc) { 322 s = &args->out_sync.s; 323 324 args->out_sync.count = outc; 325 *s = u_memcpya(outs, outc, sizeof(**s)); 326 if (IS_ERR(*s)) { 327 ret = PTR_ERR(*s); 328 goto err_free_ins; 329 } 330 } 331 332 return 0; 333 334 err_free_pushs: 335 u_free(args->push.s); 336 err_free_ins: 337 u_free(args->in_sync.s); 338 return ret; 339 } 340 341 static void 342 nouveau_exec_ufree(struct nouveau_exec_job_args *args) 343 { 344 u_free(args->push.s); 345 u_free(args->in_sync.s); 346 u_free(args->out_sync.s); 347 } 348 349 int 350 nouveau_exec_ioctl_exec(struct drm_device *dev, 351 void *data, 352 struct drm_file *file_priv) 353 { 354 struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); 355 struct nouveau_cli *cli = nouveau_cli(file_priv); 356 struct nouveau_abi16_chan *chan16; 357 struct nouveau_channel *chan = NULL; 358 struct nouveau_exec_job_args args = {}; 359 struct drm_nouveau_exec *req = data; 360 int push_max, ret = 0; 361 362 if (unlikely(!abi16)) 363 return -ENOMEM; 364 365 /* abi16 locks already */ 366 if (unlikely(!nouveau_cli_uvmm(cli))) 367 return nouveau_abi16_put(abi16, -ENOSYS); 368 369 list_for_each_entry(chan16, &abi16->channels, head) { 370 if (chan16->chan->chid == req->channel) { 371 chan = chan16->chan; 372 break; 373 } 374 } 375 376 if (!chan) 377 return nouveau_abi16_put(abi16, -ENOENT); 378 379 if (unlikely(atomic_read(&chan->killed))) 380 return nouveau_abi16_put(abi16, -ENODEV); 381 382 if (chan->user.oclass < NV50_CHANNEL_GPFIFO) 383 return nouveau_abi16_put(abi16, -ENOSYS); 384 385 push_max = nouveau_exec_push_max_from_ib_max(chan->chan.gpfifo.max); 386 if (unlikely(req->push_count > push_max)) { 387 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n", 388 req->push_count, push_max); 389 return nouveau_abi16_put(abi16, -EINVAL); 390 } 391 392 ret = nouveau_exec_ucopy(&args, req); 393 if (ret) 394 goto out; 395 396 args.sched = chan16->sched; 397 args.file_priv = file_priv; 398 args.chan = chan; 399 400 ret = nouveau_exec(&args); 401 if (ret) 402 goto out_free_args; 403 404 out_free_args: 405 nouveau_exec_ufree(&args); 406 out: 407 return nouveau_abi16_put(abi16, ret); 408 } 409