1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * DOC: Command list validator for VC4.
26 *
27 * Since the VC4 has no IOMMU between it and system memory, a user
28 * with access to execute command lists could escalate privilege by
29 * overwriting system memory (drawing to it as a framebuffer) or
30 * reading system memory it shouldn't (reading it as a vertex buffer
31 * or index buffer)
32 *
33 * We validate binner command lists to ensure that all accesses are
34 * within the bounds of the GEM objects referenced by the submitted
35 * job. It explicitly whitelists packets, and looks at the offsets in
36 * any address fields to make sure they're contained within the BOs
37 * they reference.
38 *
39 * Note that because CL validation is already reading the
40 * user-submitted CL and writing the validated copy out to the memory
41 * that the GPU will actually read, this is also where GEM relocation
42 * processing (turning BO references into actual addresses for the GPU
43 * to use) happens.
44 */
45
46 #include <drm/drm_print.h>
47
48 #include "uapi/drm/vc4_drm.h"
49 #include "vc4_drv.h"
50 #include "vc4_packet.h"
51
52 #define VALIDATE_ARGS \
53 struct vc4_exec_info *exec, \
54 void *validated, \
55 void *untrusted
56
57 /** Return the width in pixels of a 64-byte microtile. */
58 static uint32_t
utile_width(int cpp)59 utile_width(int cpp)
60 {
61 switch (cpp) {
62 case 1:
63 case 2:
64 return 8;
65 case 4:
66 return 4;
67 case 8:
68 return 2;
69 default:
70 pr_err("unknown cpp: %d\n", cpp);
71 return 1;
72 }
73 }
74
75 /** Return the height in pixels of a 64-byte microtile. */
76 static uint32_t
utile_height(int cpp)77 utile_height(int cpp)
78 {
79 switch (cpp) {
80 case 1:
81 return 8;
82 case 2:
83 case 4:
84 case 8:
85 return 4;
86 default:
87 pr_err("unknown cpp: %d\n", cpp);
88 return 1;
89 }
90 }
91
92 /**
93 * size_is_lt() - Returns whether a miplevel of the given size will
94 * use the lineartile (LT) tiling layout rather than the normal T
95 * tiling layout.
96 * @width: Width in pixels of the miplevel
97 * @height: Height in pixels of the miplevel
98 * @cpp: Bytes per pixel of the pixel format
99 */
100 static bool
size_is_lt(uint32_t width,uint32_t height,int cpp)101 size_is_lt(uint32_t width, uint32_t height, int cpp)
102 {
103 return (width <= 4 * utile_width(cpp) ||
104 height <= 4 * utile_height(cpp));
105 }
106
107 struct drm_gem_dma_object *
vc4_use_bo(struct vc4_exec_info * exec,uint32_t hindex)108 vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
109 {
110 struct vc4_dev *vc4 = exec->dev;
111 struct drm_gem_dma_object *obj;
112 struct vc4_bo *bo;
113
114 if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
115 return NULL;
116
117 if (hindex >= exec->bo_count) {
118 DRM_DEBUG("BO index %d greater than BO count %d\n",
119 hindex, exec->bo_count);
120 return NULL;
121 }
122 obj = to_drm_gem_dma_obj(exec->bo[hindex]);
123 bo = to_vc4_bo(&obj->base);
124
125 if (bo->validated_shader) {
126 DRM_DEBUG("Trying to use shader BO as something other than "
127 "a shader\n");
128 return NULL;
129 }
130
131 return obj;
132 }
133
134 static struct drm_gem_dma_object *
vc4_use_handle(struct vc4_exec_info * exec,uint32_t gem_handles_packet_index)135 vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
136 {
137 return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
138 }
139
140 static bool
validate_bin_pos(struct vc4_exec_info * exec,void * untrusted,uint32_t pos)141 validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
142 {
143 /* Note that the untrusted pointer passed to these functions is
144 * incremented past the packet byte.
145 */
146 return (untrusted - 1 == exec->bin_u + pos);
147 }
148
149 static uint32_t
gl_shader_rec_size(uint32_t pointer_bits)150 gl_shader_rec_size(uint32_t pointer_bits)
151 {
152 uint32_t attribute_count = pointer_bits & 7;
153 bool extended = pointer_bits & 8;
154
155 if (attribute_count == 0)
156 attribute_count = 8;
157
158 if (extended)
159 return 100 + attribute_count * 4;
160 else
161 return 36 + attribute_count * 8;
162 }
163
164 bool
vc4_check_tex_size(struct vc4_exec_info * exec,struct drm_gem_dma_object * fbo,uint32_t offset,uint8_t tiling_format,uint32_t width,uint32_t height,uint8_t cpp)165 vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_dma_object *fbo,
166 uint32_t offset, uint8_t tiling_format,
167 uint32_t width, uint32_t height, uint8_t cpp)
168 {
169 struct vc4_dev *vc4 = exec->dev;
170 uint32_t aligned_width, aligned_height, stride, size;
171 uint32_t utile_w = utile_width(cpp);
172 uint32_t utile_h = utile_height(cpp);
173
174 if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
175 return false;
176
177 /* The shaded vertex format stores signed 12.4 fixed point
178 * (-2048,2047) offsets from the viewport center, so we should
179 * never have a render target larger than 4096. The texture
180 * unit can only sample from 2048x2048, so it's even more
181 * restricted. This lets us avoid worrying about overflow in
182 * our math.
183 */
184 if (width > 4096 || height > 4096) {
185 DRM_DEBUG("Surface dimensions (%d,%d) too large",
186 width, height);
187 return false;
188 }
189
190 switch (tiling_format) {
191 case VC4_TILING_FORMAT_LINEAR:
192 aligned_width = round_up(width, utile_w);
193 aligned_height = height;
194 break;
195 case VC4_TILING_FORMAT_T:
196 aligned_width = round_up(width, utile_w * 8);
197 aligned_height = round_up(height, utile_h * 8);
198 break;
199 case VC4_TILING_FORMAT_LT:
200 aligned_width = round_up(width, utile_w);
201 aligned_height = round_up(height, utile_h);
202 break;
203 default:
204 DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
205 return false;
206 }
207
208 stride = aligned_width * cpp;
209 size = stride * aligned_height;
210
211 if (size + offset < size ||
212 size + offset > fbo->base.size) {
213 DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
214 width, height,
215 aligned_width, aligned_height,
216 size, offset, fbo->base.size);
217 return false;
218 }
219
220 return true;
221 }
222
223 static int
validate_flush(VALIDATE_ARGS)224 validate_flush(VALIDATE_ARGS)
225 {
226 if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
227 DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
228 return -EINVAL;
229 }
230 exec->found_flush = true;
231
232 return 0;
233 }
234
235 static int
validate_start_tile_binning(VALIDATE_ARGS)236 validate_start_tile_binning(VALIDATE_ARGS)
237 {
238 if (exec->found_start_tile_binning_packet) {
239 DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
240 return -EINVAL;
241 }
242 exec->found_start_tile_binning_packet = true;
243
244 if (!exec->found_tile_binning_mode_config_packet) {
245 DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
246 return -EINVAL;
247 }
248
249 return 0;
250 }
251
252 static int
validate_increment_semaphore(VALIDATE_ARGS)253 validate_increment_semaphore(VALIDATE_ARGS)
254 {
255 if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
256 DRM_DEBUG("Bin CL must end with "
257 "VC4_PACKET_INCREMENT_SEMAPHORE\n");
258 return -EINVAL;
259 }
260 exec->found_increment_semaphore_packet = true;
261
262 return 0;
263 }
264
265 static int
validate_indexed_prim_list(VALIDATE_ARGS)266 validate_indexed_prim_list(VALIDATE_ARGS)
267 {
268 struct drm_gem_dma_object *ib;
269 uint32_t length = *(uint32_t *)(untrusted + 1);
270 uint32_t offset = *(uint32_t *)(untrusted + 5);
271 uint32_t max_index = *(uint32_t *)(untrusted + 9);
272 uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
273 struct vc4_shader_state *shader_state;
274
275 /* Check overflow condition */
276 if (exec->shader_state_count == 0) {
277 DRM_DEBUG("shader state must precede primitives\n");
278 return -EINVAL;
279 }
280 shader_state = &exec->shader_state[exec->shader_state_count - 1];
281
282 if (max_index > shader_state->max_index)
283 shader_state->max_index = max_index;
284
285 ib = vc4_use_handle(exec, 0);
286 if (!ib)
287 return -EINVAL;
288
289 if (offset > ib->base.size ||
290 (ib->base.size - offset) / index_size < length) {
291 DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
292 offset, length, index_size, ib->base.size);
293 return -EINVAL;
294 }
295
296 *(uint32_t *)(validated + 5) = ib->dma_addr + offset;
297
298 return 0;
299 }
300
301 static int
validate_gl_array_primitive(VALIDATE_ARGS)302 validate_gl_array_primitive(VALIDATE_ARGS)
303 {
304 uint32_t length = *(uint32_t *)(untrusted + 1);
305 uint32_t base_index = *(uint32_t *)(untrusted + 5);
306 uint32_t max_index;
307 struct vc4_shader_state *shader_state;
308
309 /* Check overflow condition */
310 if (exec->shader_state_count == 0) {
311 DRM_DEBUG("shader state must precede primitives\n");
312 return -EINVAL;
313 }
314 shader_state = &exec->shader_state[exec->shader_state_count - 1];
315
316 if (length + base_index < length) {
317 DRM_DEBUG("primitive vertex count overflow\n");
318 return -EINVAL;
319 }
320 max_index = length + base_index - 1;
321
322 if (max_index > shader_state->max_index)
323 shader_state->max_index = max_index;
324
325 return 0;
326 }
327
328 static int
validate_gl_shader_state(VALIDATE_ARGS)329 validate_gl_shader_state(VALIDATE_ARGS)
330 {
331 uint32_t i = exec->shader_state_count++;
332
333 if (i >= exec->shader_state_size) {
334 DRM_DEBUG("More requests for shader states than declared\n");
335 return -EINVAL;
336 }
337
338 exec->shader_state[i].addr = *(uint32_t *)untrusted;
339 exec->shader_state[i].max_index = 0;
340
341 if (exec->shader_state[i].addr & ~0xf) {
342 DRM_DEBUG("high bits set in GL shader rec reference\n");
343 return -EINVAL;
344 }
345
346 *(uint32_t *)validated = (exec->shader_rec_p +
347 exec->shader_state[i].addr);
348
349 exec->shader_rec_p +=
350 roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
351
352 return 0;
353 }
354
355 static int
validate_tile_binning_config(VALIDATE_ARGS)356 validate_tile_binning_config(VALIDATE_ARGS)
357 {
358 struct drm_device *dev = exec->exec_bo->base.dev;
359 struct vc4_dev *vc4 = to_vc4_dev(dev);
360 uint8_t flags;
361 uint32_t tile_state_size;
362 uint32_t tile_count, bin_addr;
363 int bin_slot;
364
365 if (exec->found_tile_binning_mode_config_packet) {
366 DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
367 return -EINVAL;
368 }
369 exec->found_tile_binning_mode_config_packet = true;
370
371 exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
372 exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
373 tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
374 flags = *(uint8_t *)(untrusted + 14);
375
376 if (exec->bin_tiles_x == 0 ||
377 exec->bin_tiles_y == 0) {
378 DRM_DEBUG("Tile binning config of %dx%d too small\n",
379 exec->bin_tiles_x, exec->bin_tiles_y);
380 return -EINVAL;
381 }
382
383 if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
384 VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
385 DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
386 return -EINVAL;
387 }
388
389 bin_slot = vc4_v3d_get_bin_slot(vc4);
390 if (bin_slot < 0) {
391 if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
392 drm_err(dev, "Failed to allocate binner memory: %d\n",
393 bin_slot);
394 }
395 return bin_slot;
396 }
397
398 /* The slot we allocated will only be used by this job, and is
399 * free when the job completes rendering.
400 */
401 exec->bin_slots |= BIT(bin_slot);
402 bin_addr = vc4->bin_bo->base.dma_addr + bin_slot * vc4->bin_alloc_size;
403
404 /* The tile state data array is 48 bytes per tile, and we put it at
405 * the start of a BO containing both it and the tile alloc.
406 */
407 tile_state_size = 48 * tile_count;
408
409 /* Since the tile alloc array will follow us, align. */
410 exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
411
412 *(uint8_t *)(validated + 14) =
413 ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
414 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
415 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
416 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
417 VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
418 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
419 VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
420
421 /* tile alloc address. */
422 *(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
423 /* tile alloc size. */
424 *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
425 exec->tile_alloc_offset);
426 /* tile state address. */
427 *(uint32_t *)(validated + 8) = bin_addr;
428
429 return 0;
430 }
431
432 static int
validate_gem_handles(VALIDATE_ARGS)433 validate_gem_handles(VALIDATE_ARGS)
434 {
435 memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
436 return 0;
437 }
438
439 #define VC4_DEFINE_PACKET(packet, func) \
440 [packet] = { packet ## _SIZE, #packet, func }
441
442 static const struct cmd_info {
443 uint16_t len;
444 const char *name;
445 int (*func)(struct vc4_exec_info *exec, void *validated,
446 void *untrusted);
447 } cmd_info[] = {
448 VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
449 VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
450 VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
451 VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
452 VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
453 validate_start_tile_binning),
454 VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
455 validate_increment_semaphore),
456
457 VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
458 validate_indexed_prim_list),
459 VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
460 validate_gl_array_primitive),
461
462 VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
463
464 VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
465
466 VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
467 VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
468 VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
469 VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
470 VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
471 VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
472 VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
473 VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
474 VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
475 /* Note: The docs say this was also 105, but it was 106 in the
476 * initial userland code drop.
477 */
478 VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
479
480 VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
481 validate_tile_binning_config),
482
483 VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
484 };
485
486 int
vc4_validate_bin_cl(struct drm_device * dev,void * validated,void * unvalidated,struct vc4_exec_info * exec)487 vc4_validate_bin_cl(struct drm_device *dev,
488 void *validated,
489 void *unvalidated,
490 struct vc4_exec_info *exec)
491 {
492 struct vc4_dev *vc4 = to_vc4_dev(dev);
493 uint32_t len = exec->args->bin_cl_size;
494 uint32_t dst_offset = 0;
495 uint32_t src_offset = 0;
496
497 if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
498 return -ENODEV;
499
500 while (src_offset < len) {
501 void *dst_pkt = validated + dst_offset;
502 void *src_pkt = unvalidated + src_offset;
503 u8 cmd = *(uint8_t *)src_pkt;
504 const struct cmd_info *info;
505
506 if (cmd >= ARRAY_SIZE(cmd_info)) {
507 DRM_DEBUG("0x%08x: packet %d out of bounds\n",
508 src_offset, cmd);
509 return -EINVAL;
510 }
511
512 info = &cmd_info[cmd];
513 if (!info->name) {
514 DRM_DEBUG("0x%08x: packet %d invalid\n",
515 src_offset, cmd);
516 return -EINVAL;
517 }
518
519 if (src_offset + info->len > len) {
520 DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
521 "exceeds bounds (0x%08x)\n",
522 src_offset, cmd, info->name, info->len,
523 src_offset + len);
524 return -EINVAL;
525 }
526
527 if (cmd != VC4_PACKET_GEM_HANDLES)
528 memcpy(dst_pkt, src_pkt, info->len);
529
530 if (info->func && info->func(exec,
531 dst_pkt + 1,
532 src_pkt + 1)) {
533 DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
534 src_offset, cmd, info->name);
535 return -EINVAL;
536 }
537
538 src_offset += info->len;
539 /* GEM handle loading doesn't produce HW packets. */
540 if (cmd != VC4_PACKET_GEM_HANDLES)
541 dst_offset += info->len;
542
543 /* When the CL hits halt, it'll stop reading anything else. */
544 if (cmd == VC4_PACKET_HALT)
545 break;
546 }
547
548 exec->ct0ea = exec->ct0ca + dst_offset;
549
550 if (!exec->found_start_tile_binning_packet) {
551 DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
552 return -EINVAL;
553 }
554
555 /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
556 * semaphore is used to trigger the render CL to start up, and the
557 * FLUSH is what caps the bin lists with
558 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
559 * render CL when they get called to) and actually triggers the queued
560 * semaphore increment.
561 */
562 if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
563 DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
564 "VC4_PACKET_FLUSH\n");
565 return -EINVAL;
566 }
567
568 return 0;
569 }
570
571 static bool
reloc_tex(struct vc4_exec_info * exec,void * uniform_data_u,struct vc4_texture_sample_info * sample,uint32_t texture_handle_index,bool is_cs)572 reloc_tex(struct vc4_exec_info *exec,
573 void *uniform_data_u,
574 struct vc4_texture_sample_info *sample,
575 uint32_t texture_handle_index, bool is_cs)
576 {
577 struct drm_gem_dma_object *tex;
578 uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
579 uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
580 uint32_t p2 = (sample->p_offset[2] != ~0 ?
581 *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
582 uint32_t p3 = (sample->p_offset[3] != ~0 ?
583 *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
584 uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
585 uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
586 uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
587 uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
588 uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
589 uint32_t cpp, tiling_format, utile_w, utile_h;
590 uint32_t i;
591 uint32_t cube_map_stride = 0;
592 enum vc4_texture_data_type type;
593
594 tex = vc4_use_bo(exec, texture_handle_index);
595 if (!tex)
596 return false;
597
598 if (sample->is_direct) {
599 uint32_t remaining_size = tex->base.size - p0;
600
601 if (p0 > tex->base.size - 4) {
602 DRM_DEBUG("UBO offset greater than UBO size\n");
603 goto fail;
604 }
605 if (p1 > remaining_size - 4) {
606 DRM_DEBUG("UBO clamp would allow reads "
607 "outside of UBO\n");
608 goto fail;
609 }
610 *validated_p0 = tex->dma_addr + p0;
611 return true;
612 }
613
614 if (width == 0)
615 width = 2048;
616 if (height == 0)
617 height = 2048;
618
619 if (p0 & VC4_TEX_P0_CMMODE_MASK) {
620 if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
621 VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
622 cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
623 if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
624 VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
625 if (cube_map_stride) {
626 DRM_DEBUG("Cube map stride set twice\n");
627 goto fail;
628 }
629
630 cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
631 }
632 if (!cube_map_stride) {
633 DRM_DEBUG("Cube map stride not set\n");
634 goto fail;
635 }
636 }
637
638 type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
639 (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
640
641 switch (type) {
642 case VC4_TEXTURE_TYPE_RGBA8888:
643 case VC4_TEXTURE_TYPE_RGBX8888:
644 case VC4_TEXTURE_TYPE_RGBA32R:
645 cpp = 4;
646 break;
647 case VC4_TEXTURE_TYPE_RGBA4444:
648 case VC4_TEXTURE_TYPE_RGBA5551:
649 case VC4_TEXTURE_TYPE_RGB565:
650 case VC4_TEXTURE_TYPE_LUMALPHA:
651 case VC4_TEXTURE_TYPE_S16F:
652 case VC4_TEXTURE_TYPE_S16:
653 cpp = 2;
654 break;
655 case VC4_TEXTURE_TYPE_LUMINANCE:
656 case VC4_TEXTURE_TYPE_ALPHA:
657 case VC4_TEXTURE_TYPE_S8:
658 cpp = 1;
659 break;
660 case VC4_TEXTURE_TYPE_ETC1:
661 /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
662 * pixels.
663 */
664 cpp = 8;
665 width = (width + 3) >> 2;
666 height = (height + 3) >> 2;
667 break;
668 case VC4_TEXTURE_TYPE_BW1:
669 case VC4_TEXTURE_TYPE_A4:
670 case VC4_TEXTURE_TYPE_A1:
671 case VC4_TEXTURE_TYPE_RGBA64:
672 case VC4_TEXTURE_TYPE_YUV422R:
673 default:
674 DRM_DEBUG("Texture format %d unsupported\n", type);
675 goto fail;
676 }
677 utile_w = utile_width(cpp);
678 utile_h = utile_height(cpp);
679
680 if (type == VC4_TEXTURE_TYPE_RGBA32R) {
681 tiling_format = VC4_TILING_FORMAT_LINEAR;
682 } else {
683 if (size_is_lt(width, height, cpp))
684 tiling_format = VC4_TILING_FORMAT_LT;
685 else
686 tiling_format = VC4_TILING_FORMAT_T;
687 }
688
689 if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
690 tiling_format, width, height, cpp)) {
691 goto fail;
692 }
693
694 /* The mipmap levels are stored before the base of the texture. Make
695 * sure there is actually space in the BO.
696 */
697 for (i = 1; i <= miplevels; i++) {
698 uint32_t level_width = max(width >> i, 1u);
699 uint32_t level_height = max(height >> i, 1u);
700 uint32_t aligned_width, aligned_height;
701 uint32_t level_size;
702
703 /* Once the levels get small enough, they drop from T to LT. */
704 if (tiling_format == VC4_TILING_FORMAT_T &&
705 size_is_lt(level_width, level_height, cpp)) {
706 tiling_format = VC4_TILING_FORMAT_LT;
707 }
708
709 switch (tiling_format) {
710 case VC4_TILING_FORMAT_T:
711 aligned_width = round_up(level_width, utile_w * 8);
712 aligned_height = round_up(level_height, utile_h * 8);
713 break;
714 case VC4_TILING_FORMAT_LT:
715 aligned_width = round_up(level_width, utile_w);
716 aligned_height = round_up(level_height, utile_h);
717 break;
718 default:
719 aligned_width = round_up(level_width, utile_w);
720 aligned_height = level_height;
721 break;
722 }
723
724 level_size = aligned_width * cpp * aligned_height;
725
726 if (offset < level_size) {
727 DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
728 "overflowed buffer bounds (offset %d)\n",
729 i, level_width, level_height,
730 aligned_width, aligned_height,
731 level_size, offset);
732 goto fail;
733 }
734
735 offset -= level_size;
736 }
737
738 *validated_p0 = tex->dma_addr + p0;
739
740 return true;
741 fail:
742 DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
743 DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
744 DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
745 DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
746 return false;
747 }
748
749 static int
validate_gl_shader_rec(struct drm_device * dev,struct vc4_exec_info * exec,struct vc4_shader_state * state)750 validate_gl_shader_rec(struct drm_device *dev,
751 struct vc4_exec_info *exec,
752 struct vc4_shader_state *state)
753 {
754 uint32_t *src_handles;
755 void *pkt_u, *pkt_v;
756 static const uint32_t shader_reloc_offsets[] = {
757 4, /* fs */
758 16, /* vs */
759 28, /* cs */
760 };
761 uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
762 struct drm_gem_dma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
763 uint32_t nr_attributes, nr_relocs, packet_size;
764 int i;
765
766 nr_attributes = state->addr & 0x7;
767 if (nr_attributes == 0)
768 nr_attributes = 8;
769 packet_size = gl_shader_rec_size(state->addr);
770
771 nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
772 if (nr_relocs * 4 > exec->shader_rec_size) {
773 DRM_DEBUG("overflowed shader recs reading %d handles "
774 "from %d bytes left\n",
775 nr_relocs, exec->shader_rec_size);
776 return -EINVAL;
777 }
778 src_handles = exec->shader_rec_u;
779 exec->shader_rec_u += nr_relocs * 4;
780 exec->shader_rec_size -= nr_relocs * 4;
781
782 if (packet_size > exec->shader_rec_size) {
783 DRM_DEBUG("overflowed shader recs copying %db packet "
784 "from %d bytes left\n",
785 packet_size, exec->shader_rec_size);
786 return -EINVAL;
787 }
788 pkt_u = exec->shader_rec_u;
789 pkt_v = exec->shader_rec_v;
790 memcpy(pkt_v, pkt_u, packet_size);
791 exec->shader_rec_u += packet_size;
792 /* Shader recs have to be aligned to 16 bytes (due to the attribute
793 * flags being in the low bytes), so round the next validated shader
794 * rec address up. This should be safe, since we've got so many
795 * relocations in a shader rec packet.
796 */
797 BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
798 exec->shader_rec_v += roundup(packet_size, 16);
799 exec->shader_rec_size -= packet_size;
800
801 for (i = 0; i < shader_reloc_count; i++) {
802 if (src_handles[i] > exec->bo_count) {
803 DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
804 return -EINVAL;
805 }
806
807 bo[i] = to_drm_gem_dma_obj(exec->bo[src_handles[i]]);
808 if (!bo[i])
809 return -EINVAL;
810 }
811 for (i = shader_reloc_count; i < nr_relocs; i++) {
812 bo[i] = vc4_use_bo(exec, src_handles[i]);
813 if (!bo[i])
814 return -EINVAL;
815 }
816
817 if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
818 to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
819 DRM_DEBUG("Thread mode of CL and FS do not match\n");
820 return -EINVAL;
821 }
822
823 if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
824 to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
825 DRM_DEBUG("cs and vs cannot be threaded\n");
826 return -EINVAL;
827 }
828
829 for (i = 0; i < shader_reloc_count; i++) {
830 struct vc4_validated_shader_info *validated_shader;
831 uint32_t o = shader_reloc_offsets[i];
832 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
833 uint32_t *texture_handles_u;
834 void *uniform_data_u;
835 uint32_t tex, uni;
836
837 *(uint32_t *)(pkt_v + o) = bo[i]->dma_addr + src_offset;
838
839 if (src_offset != 0) {
840 DRM_DEBUG("Shaders must be at offset 0 of "
841 "the BO.\n");
842 return -EINVAL;
843 }
844
845 validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
846 if (!validated_shader)
847 return -EINVAL;
848
849 if (validated_shader->uniforms_src_size >
850 exec->uniforms_size) {
851 DRM_DEBUG("Uniforms src buffer overflow\n");
852 return -EINVAL;
853 }
854
855 texture_handles_u = exec->uniforms_u;
856 uniform_data_u = (texture_handles_u +
857 validated_shader->num_texture_samples);
858
859 memcpy(exec->uniforms_v, uniform_data_u,
860 validated_shader->uniforms_size);
861
862 for (tex = 0;
863 tex < validated_shader->num_texture_samples;
864 tex++) {
865 if (!reloc_tex(exec,
866 uniform_data_u,
867 &validated_shader->texture_samples[tex],
868 texture_handles_u[tex],
869 i == 2)) {
870 return -EINVAL;
871 }
872 }
873
874 /* Fill in the uniform slots that need this shader's
875 * start-of-uniforms address (used for resetting the uniform
876 * stream in the presence of control flow).
877 */
878 for (uni = 0;
879 uni < validated_shader->num_uniform_addr_offsets;
880 uni++) {
881 uint32_t o = validated_shader->uniform_addr_offsets[uni];
882 ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
883 }
884
885 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
886
887 exec->uniforms_u += validated_shader->uniforms_src_size;
888 exec->uniforms_v += validated_shader->uniforms_size;
889 exec->uniforms_p += validated_shader->uniforms_size;
890 }
891
892 for (i = 0; i < nr_attributes; i++) {
893 struct drm_gem_dma_object *vbo =
894 bo[ARRAY_SIZE(shader_reloc_offsets) + i];
895 uint32_t o = 36 + i * 8;
896 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
897 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
898 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
899 uint32_t max_index;
900
901 if (state->addr & 0x8)
902 stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
903
904 if (vbo->base.size < offset ||
905 vbo->base.size - offset < attr_size) {
906 DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
907 offset, attr_size, vbo->base.size);
908 return -EINVAL;
909 }
910
911 if (stride != 0) {
912 max_index = ((vbo->base.size - offset - attr_size) /
913 stride);
914 if (state->max_index > max_index) {
915 DRM_DEBUG("primitives use index %d out of "
916 "supplied %d\n",
917 state->max_index, max_index);
918 return -EINVAL;
919 }
920 }
921
922 *(uint32_t *)(pkt_v + o) = vbo->dma_addr + offset;
923 }
924
925 return 0;
926 }
927
928 int
vc4_validate_shader_recs(struct drm_device * dev,struct vc4_exec_info * exec)929 vc4_validate_shader_recs(struct drm_device *dev,
930 struct vc4_exec_info *exec)
931 {
932 struct vc4_dev *vc4 = to_vc4_dev(dev);
933 uint32_t i;
934 int ret = 0;
935
936 if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
937 return -ENODEV;
938
939 for (i = 0; i < exec->shader_state_count; i++) {
940 ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
941 if (ret)
942 return ret;
943 }
944
945 return ret;
946 }
947