1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 #include <linux/pm_runtime.h>
16
17 #include <drm/drm_drv.h>
18 #include <drm/drm_managed.h>
19
20 #include "panthor_device.h"
21 #include "panthor_fw.h"
22 #include "panthor_gem.h"
23 #include "panthor_gpu.h"
24 #include "panthor_mmu.h"
25 #include "panthor_regs.h"
26 #include "panthor_sched.h"
27
28 #define CSF_FW_NAME "mali_csffw.bin"
29
30 #define PING_INTERVAL_MS 12000
31 #define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024)
32 #define PROGRESS_TIMEOUT_SCALE_SHIFT 10
33 #define IDLE_HYSTERESIS_US 800
34 #define PWROFF_HYSTERESIS_US 10000
35
36 /**
37 * struct panthor_fw_binary_hdr - Firmware binary header.
38 */
39 struct panthor_fw_binary_hdr {
40 /** @magic: Magic value to check binary validity. */
41 u32 magic;
42 #define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e
43
44 /** @minor: Minor FW version. */
45 u8 minor;
46
47 /** @major: Major FW version. */
48 u8 major;
49 #define CSF_FW_BINARY_HEADER_MAJOR_MAX 0
50
51 /** @padding1: MBZ. */
52 u16 padding1;
53
54 /** @version_hash: FW version hash. */
55 u32 version_hash;
56
57 /** @padding2: MBZ. */
58 u32 padding2;
59
60 /** @size: FW binary size. */
61 u32 size;
62 };
63
64 /**
65 * enum panthor_fw_binary_entry_type - Firmware binary entry type
66 */
67 enum panthor_fw_binary_entry_type {
68 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
69 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
70
71 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
72 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
73
74 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
75 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
76
77 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
78 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
79
80 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
81 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
82
83 /**
84 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
85 * the FW binary was built.
86 */
87 CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
88 };
89
90 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff)
91 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff)
92 #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30)
93 #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31)
94
95 #define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0)
96 #define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1)
97 #define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2)
98 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3)
99 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3)
100 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
101 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3)
102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3)
103 #define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5)
104 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30)
105 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31)
106
107 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \
108 (CSF_FW_BINARY_IFACE_ENTRY_RD | \
109 CSF_FW_BINARY_IFACE_ENTRY_WR | \
110 CSF_FW_BINARY_IFACE_ENTRY_EX | \
111 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \
112 CSF_FW_BINARY_IFACE_ENTRY_PROT | \
113 CSF_FW_BINARY_IFACE_ENTRY_SHARED | \
114 CSF_FW_BINARY_IFACE_ENTRY_ZERO)
115
116 /**
117 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
118 */
119 struct panthor_fw_binary_section_entry_hdr {
120 /** @flags: Section flags. */
121 u32 flags;
122
123 /** @va: MCU virtual range to map this binary section to. */
124 struct {
125 /** @start: Start address. */
126 u32 start;
127
128 /** @end: End address. */
129 u32 end;
130 } va;
131
132 /** @data: Data to initialize the FW section with. */
133 struct {
134 /** @start: Start offset in the FW binary. */
135 u32 start;
136
137 /** @end: End offset in the FW binary. */
138 u32 end;
139 } data;
140 };
141
142 struct panthor_fw_build_info_hdr {
143 /** @meta_start: Offset of the build info data in the FW binary */
144 u32 meta_start;
145 /** @meta_size: Size of the build info data in the FW binary */
146 u32 meta_size;
147 };
148
149 /**
150 * struct panthor_fw_binary_iter - Firmware binary iterator
151 *
152 * Used to parse a firmware binary.
153 */
154 struct panthor_fw_binary_iter {
155 /** @data: FW binary data. */
156 const void *data;
157
158 /** @size: FW binary size. */
159 size_t size;
160
161 /** @offset: Iterator offset. */
162 size_t offset;
163 };
164
165 /**
166 * struct panthor_fw_section - FW section
167 */
168 struct panthor_fw_section {
169 /** @node: Used to keep track of FW sections. */
170 struct list_head node;
171
172 /** @flags: Section flags, as encoded in the FW binary. */
173 u32 flags;
174
175 /** @mem: Section memory. */
176 struct panthor_kernel_bo *mem;
177
178 /**
179 * @name: Name of the section, as specified in the binary.
180 *
181 * Can be NULL.
182 */
183 const char *name;
184
185 /**
186 * @data: Initial data copied to the FW memory.
187 *
188 * We keep data around so we can reload sections after a reset.
189 */
190 struct {
191 /** @buf: Buffed used to store init data. */
192 const void *buf;
193
194 /** @size: Size of @buf in bytes. */
195 size_t size;
196 } data;
197 };
198
199 #define CSF_MCU_SHARED_REGION_START 0x04000000ULL
200 #define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL
201
202 #define MIN_CS_PER_CSG 8
203 #define MIN_CSGS 3
204
205 #define CSF_IFACE_VERSION(major, minor, patch) \
206 (((major) << 24) | ((minor) << 16) | (patch))
207 #define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24)
208 #define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff)
209 #define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff)
210
211 #define CSF_GROUP_CONTROL_OFFSET 0x1000
212 #define CSF_STREAM_CONTROL_OFFSET 0x40
213 #define CSF_UNPRESERVED_REG_COUNT 4
214
215 /**
216 * struct panthor_fw_iface - FW interfaces
217 */
218 struct panthor_fw_iface {
219 /** @global: Global interface. */
220 struct panthor_fw_global_iface global;
221
222 /** @groups: Group slot interfaces. */
223 struct panthor_fw_csg_iface groups[MAX_CSGS];
224
225 /** @streams: Command stream slot interfaces. */
226 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
227 };
228
229 /**
230 * struct panthor_fw - Firmware management
231 */
232 struct panthor_fw {
233 /** @vm: MCU VM. */
234 struct panthor_vm *vm;
235
236 /** @sections: List of FW sections. */
237 struct list_head sections;
238
239 /** @shared_section: The section containing the FW interfaces. */
240 struct panthor_fw_section *shared_section;
241
242 /** @iface: FW interfaces. */
243 struct panthor_fw_iface iface;
244
245 /** @watchdog: Collection of fields relating to the FW watchdog. */
246 struct {
247 /** @ping_work: Delayed work used to ping the FW. */
248 struct delayed_work ping_work;
249 } watchdog;
250
251 /**
252 * @req_waitqueue: FW request waitqueue.
253 *
254 * Everytime a request is sent to a command stream group or the global
255 * interface, the caller will first busy wait for the request to be
256 * acknowledged, and then fallback to a sleeping wait.
257 *
258 * This wait queue is here to support the sleeping wait flavor.
259 */
260 wait_queue_head_t req_waitqueue;
261
262 /** @booted: True is the FW is booted */
263 bool booted;
264
265 /** @irq: Job irq data. */
266 struct panthor_irq irq;
267 };
268
panthor_fw_vm(struct panthor_device * ptdev)269 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
270 {
271 return ptdev->fw->vm;
272 }
273
274 /**
275 * panthor_fw_get_glb_iface() - Get the global interface
276 * @ptdev: Device.
277 *
278 * Return: The global interface.
279 */
280 struct panthor_fw_global_iface *
panthor_fw_get_glb_iface(struct panthor_device * ptdev)281 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
282 {
283 return &ptdev->fw->iface.global;
284 }
285
286 /**
287 * panthor_fw_get_csg_iface() - Get a command stream group slot interface
288 * @ptdev: Device.
289 * @csg_slot: Index of the command stream group slot.
290 *
291 * Return: The command stream group slot interface.
292 */
293 struct panthor_fw_csg_iface *
panthor_fw_get_csg_iface(struct panthor_device * ptdev,u32 csg_slot)294 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
295 {
296 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
297 return NULL;
298
299 return &ptdev->fw->iface.groups[csg_slot];
300 }
301
302 /**
303 * panthor_fw_get_cs_iface() - Get a command stream slot interface
304 * @ptdev: Device.
305 * @csg_slot: Index of the command stream group slot.
306 * @cs_slot: Index of the command stream slot.
307 *
308 * Return: The command stream slot interface.
309 */
310 struct panthor_fw_cs_iface *
panthor_fw_get_cs_iface(struct panthor_device * ptdev,u32 csg_slot,u32 cs_slot)311 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
312 {
313 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
314 return NULL;
315
316 return &ptdev->fw->iface.streams[csg_slot][cs_slot];
317 }
318
319 /**
320 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
321 * @ptdev: Device.
322 * @timeout_us: Timeout expressed in micro-seconds.
323 *
324 * The FW has two timer sources: the GPU counter or arch-timer. We need
325 * to express timeouts in term of number of cycles and specify which
326 * timer source should be used.
327 *
328 * Return: A value suitable for timeout fields in the global interface.
329 */
panthor_fw_conv_timeout(struct panthor_device * ptdev,u32 timeout_us)330 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
331 {
332 bool use_cycle_counter = false;
333 u32 timer_rate = 0;
334 u64 mod_cycles;
335
336 #ifdef CONFIG_ARM_ARCH_TIMER
337 timer_rate = arch_timer_get_cntfrq();
338 #endif
339
340 if (!timer_rate) {
341 use_cycle_counter = true;
342 timer_rate = clk_get_rate(ptdev->clks.core);
343 }
344
345 if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
346 /* We couldn't get a valid clock rate, let's just pick the
347 * maximum value so the FW still handles the core
348 * power on/off requests.
349 */
350 return GLB_TIMER_VAL(~0) |
351 GLB_TIMER_SOURCE_GPU_COUNTER;
352 }
353
354 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
355 1000000ull << 10);
356 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
357 mod_cycles = GLB_TIMER_VAL(~0);
358
359 return GLB_TIMER_VAL(mod_cycles) |
360 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
361 }
362
panthor_fw_binary_iter_read(struct panthor_device * ptdev,struct panthor_fw_binary_iter * iter,void * out,size_t size)363 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
364 struct panthor_fw_binary_iter *iter,
365 void *out, size_t size)
366 {
367 size_t new_offset = iter->offset + size;
368
369 if (new_offset > iter->size || new_offset < iter->offset) {
370 drm_err(&ptdev->base, "Firmware too small\n");
371 return -EINVAL;
372 }
373
374 memcpy(out, iter->data + iter->offset, size);
375 iter->offset = new_offset;
376 return 0;
377 }
378
panthor_fw_binary_sub_iter_init(struct panthor_device * ptdev,struct panthor_fw_binary_iter * iter,struct panthor_fw_binary_iter * sub_iter,size_t size)379 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
380 struct panthor_fw_binary_iter *iter,
381 struct panthor_fw_binary_iter *sub_iter,
382 size_t size)
383 {
384 size_t new_offset = iter->offset + size;
385
386 if (new_offset > iter->size || new_offset < iter->offset) {
387 drm_err(&ptdev->base, "Firmware entry too long\n");
388 return -EINVAL;
389 }
390
391 sub_iter->offset = 0;
392 sub_iter->data = iter->data + iter->offset;
393 sub_iter->size = size;
394 iter->offset = new_offset;
395 return 0;
396 }
397
panthor_fw_init_section_mem(struct panthor_device * ptdev,struct panthor_fw_section * section)398 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
399 struct panthor_fw_section *section)
400 {
401 bool was_mapped = !!section->mem->kmap;
402 int ret;
403
404 if (!section->data.size &&
405 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
406 return;
407
408 ret = panthor_kernel_bo_vmap(section->mem);
409 if (drm_WARN_ON(&ptdev->base, ret))
410 return;
411
412 memcpy(section->mem->kmap, section->data.buf, section->data.size);
413 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
414 memset(section->mem->kmap + section->data.size, 0,
415 panthor_kernel_bo_size(section->mem) - section->data.size);
416 }
417
418 if (!was_mapped)
419 panthor_kernel_bo_vunmap(section->mem);
420 }
421
422 /**
423 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
424 * @ptdev: Device.
425 * @input: Pointer holding the input interface on success.
426 * Should be ignored on failure.
427 * @output: Pointer holding the output interface on success.
428 * Should be ignored on failure.
429 * @input_fw_va: Pointer holding the input interface FW VA on success.
430 * Should be ignored on failure.
431 * @output_fw_va: Pointer holding the output interface FW VA on success.
432 * Should be ignored on failure.
433 *
434 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
435 * interface is at offset 0, and the output interface at offset 4096.
436 *
437 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
438 */
439 struct panthor_kernel_bo *
panthor_fw_alloc_queue_iface_mem(struct panthor_device * ptdev,struct panthor_fw_ringbuf_input_iface ** input,const struct panthor_fw_ringbuf_output_iface ** output,u32 * input_fw_va,u32 * output_fw_va)440 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
441 struct panthor_fw_ringbuf_input_iface **input,
442 const struct panthor_fw_ringbuf_output_iface **output,
443 u32 *input_fw_va, u32 *output_fw_va)
444 {
445 struct panthor_kernel_bo *mem;
446 int ret;
447
448 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
449 DRM_PANTHOR_BO_NO_MMAP,
450 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
451 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
452 PANTHOR_VM_KERNEL_AUTO_VA,
453 "Queue FW interface");
454 if (IS_ERR(mem))
455 return mem;
456
457 ret = panthor_kernel_bo_vmap(mem);
458 if (ret) {
459 panthor_kernel_bo_destroy(mem);
460 return ERR_PTR(ret);
461 }
462
463 memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
464 *input = mem->kmap;
465 *output = mem->kmap + SZ_4K;
466 *input_fw_va = panthor_kernel_bo_gpuva(mem);
467 *output_fw_va = *input_fw_va + SZ_4K;
468
469 return mem;
470 }
471
472 /**
473 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
474 * @ptdev: Device.
475 * @size: Size of the suspend buffer.
476 *
477 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
478 */
479 struct panthor_kernel_bo *
panthor_fw_alloc_suspend_buf_mem(struct panthor_device * ptdev,size_t size)480 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
481 {
482 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
483 DRM_PANTHOR_BO_NO_MMAP,
484 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
485 PANTHOR_VM_KERNEL_AUTO_VA,
486 "FW suspend buffer");
487 }
488
panthor_fw_load_section_entry(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter,u32 ehdr)489 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
490 const struct firmware *fw,
491 struct panthor_fw_binary_iter *iter,
492 u32 ehdr)
493 {
494 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
495 struct panthor_fw_binary_section_entry_hdr hdr;
496 struct panthor_fw_section *section;
497 u32 section_size;
498 u32 name_len;
499 int ret;
500
501 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
502 if (ret)
503 return ret;
504
505 if (hdr.data.end < hdr.data.start) {
506 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
507 hdr.data.end, hdr.data.start);
508 return -EINVAL;
509 }
510
511 if (hdr.va.end < hdr.va.start) {
512 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
513 hdr.va.end, hdr.va.start);
514 return -EINVAL;
515 }
516
517 if (hdr.data.end > fw->size) {
518 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
519 hdr.data.end, fw->size);
520 return -EINVAL;
521 }
522
523 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
524 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
525 hdr.va.start, hdr.va.end);
526 return -EINVAL;
527 }
528
529 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
530 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
531 hdr.flags);
532 return -EINVAL;
533 }
534
535 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
536 drm_warn(&ptdev->base,
537 "Firmware protected mode entry not be supported, ignoring");
538 return 0;
539 }
540
541 if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
542 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
543 drm_err(&ptdev->base,
544 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
545 return -EINVAL;
546 }
547
548 name_len = iter->size - iter->offset;
549
550 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
551 if (!section)
552 return -ENOMEM;
553
554 list_add_tail(§ion->node, &ptdev->fw->sections);
555 section->flags = hdr.flags;
556 section->data.size = hdr.data.end - hdr.data.start;
557
558 if (section->data.size > 0) {
559 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
560
561 if (!data)
562 return -ENOMEM;
563
564 memcpy(data, fw->data + hdr.data.start, section->data.size);
565 section->data.buf = data;
566 }
567
568 if (name_len > 0) {
569 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
570
571 if (!name)
572 return -ENOMEM;
573
574 memcpy(name, iter->data + iter->offset, name_len);
575 name[name_len] = '\0';
576 section->name = name;
577 }
578
579 section_size = hdr.va.end - hdr.va.start;
580 if (section_size) {
581 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
582 struct panthor_gem_object *bo;
583 u32 vm_map_flags = 0;
584 struct sg_table *sgt;
585 u64 va = hdr.va.start;
586
587 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
588 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
589
590 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
591 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
592
593 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
594 * non-cacheable for now. We might want to introduce a new
595 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
596 * memory and is currently not used by our driver) for
597 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
598 * of IO-coherent systems.
599 */
600 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
601 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
602
603 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
604 section_size,
605 DRM_PANTHOR_BO_NO_MMAP,
606 vm_map_flags, va, "FW section");
607 if (IS_ERR(section->mem))
608 return PTR_ERR(section->mem);
609
610 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
611 return -EINVAL;
612
613 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
614 ret = panthor_kernel_bo_vmap(section->mem);
615 if (ret)
616 return ret;
617 }
618
619 panthor_fw_init_section_mem(ptdev, section);
620
621 bo = to_panthor_bo(section->mem->obj);
622 sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
623 if (IS_ERR(sgt))
624 return PTR_ERR(sgt);
625
626 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
627 }
628
629 if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
630 ptdev->fw->shared_section = section;
631
632 return 0;
633 }
634
panthor_fw_read_build_info(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter,u32 ehdr)635 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
636 const struct firmware *fw,
637 struct panthor_fw_binary_iter *iter,
638 u32 ehdr)
639 {
640 struct panthor_fw_build_info_hdr hdr;
641 static const char git_sha_header[] = "git_sha: ";
642 const int header_len = sizeof(git_sha_header) - 1;
643 int ret;
644
645 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
646 if (ret)
647 return ret;
648
649 if (hdr.meta_start > fw->size ||
650 hdr.meta_start + hdr.meta_size > fw->size) {
651 drm_err(&ptdev->base, "Firmware build info corrupt\n");
652 /* We don't need the build info, so continue */
653 return 0;
654 }
655
656 if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
657 /* Not the expected header, this isn't metadata we understand */
658 return 0;
659 }
660
661 /* Check that the git SHA is NULL terminated as expected */
662 if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
663 drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
664 /* Don't treat as fatal */
665 return 0;
666 }
667
668 drm_info(&ptdev->base, "Firmware git sha: %s\n",
669 fw->data + hdr.meta_start + header_len);
670
671 return 0;
672 }
673
674 static void
panthor_reload_fw_sections(struct panthor_device * ptdev,bool full_reload)675 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
676 {
677 struct panthor_fw_section *section;
678
679 list_for_each_entry(section, &ptdev->fw->sections, node) {
680 struct sg_table *sgt;
681
682 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
683 continue;
684
685 panthor_fw_init_section_mem(ptdev, section);
686 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
687 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
688 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
689 }
690 }
691
panthor_fw_load_entry(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter)692 static int panthor_fw_load_entry(struct panthor_device *ptdev,
693 const struct firmware *fw,
694 struct panthor_fw_binary_iter *iter)
695 {
696 struct panthor_fw_binary_iter eiter;
697 u32 ehdr;
698 int ret;
699
700 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
701 if (ret)
702 return ret;
703
704 if ((iter->offset % sizeof(u32)) ||
705 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
706 drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
707 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
708 return -EINVAL;
709 }
710
711 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
712 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
713 return -EINVAL;
714
715 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
716 case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
717 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
718 case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
719 return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
720
721 /* FIXME: handle those entry types? */
722 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
723 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
724 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
725 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
726 return 0;
727 default:
728 break;
729 }
730
731 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
732 return 0;
733
734 drm_err(&ptdev->base,
735 "Unsupported non-optional entry type %u in firmware\n",
736 CSF_FW_BINARY_ENTRY_TYPE(ehdr));
737 return -EINVAL;
738 }
739
panthor_fw_load(struct panthor_device * ptdev)740 static int panthor_fw_load(struct panthor_device *ptdev)
741 {
742 const struct firmware *fw = NULL;
743 struct panthor_fw_binary_iter iter = {};
744 struct panthor_fw_binary_hdr hdr;
745 char fw_path[128];
746 int ret;
747
748 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
749 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
750 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
751 CSF_FW_NAME);
752
753 ret = request_firmware(&fw, fw_path, ptdev->base.dev);
754 if (ret) {
755 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
756 CSF_FW_NAME);
757 return ret;
758 }
759
760 iter.data = fw->data;
761 iter.size = fw->size;
762 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
763 if (ret)
764 goto out;
765
766 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
767 ret = -EINVAL;
768 drm_err(&ptdev->base, "Invalid firmware magic\n");
769 goto out;
770 }
771
772 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
773 ret = -EINVAL;
774 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
775 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
776 goto out;
777 }
778
779 if (hdr.size > iter.size) {
780 drm_err(&ptdev->base, "Firmware image is truncated\n");
781 goto out;
782 }
783
784 iter.size = hdr.size;
785
786 while (iter.offset < hdr.size) {
787 ret = panthor_fw_load_entry(ptdev, fw, &iter);
788 if (ret)
789 goto out;
790 }
791
792 if (!ptdev->fw->shared_section) {
793 drm_err(&ptdev->base, "Shared interface region not found\n");
794 ret = -EINVAL;
795 goto out;
796 }
797
798 out:
799 release_firmware(fw);
800 return ret;
801 }
802
803 /**
804 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
805 * @ptdev: Device.
806 * @mcu_va: MCU address.
807 *
808 * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
809 */
iface_fw_to_cpu_addr(struct panthor_device * ptdev,u32 mcu_va)810 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
811 {
812 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
813 u64 shared_mem_end = shared_mem_start +
814 panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
815 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
816 return NULL;
817
818 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
819 }
820
panthor_init_cs_iface(struct panthor_device * ptdev,unsigned int csg_idx,unsigned int cs_idx)821 static int panthor_init_cs_iface(struct panthor_device *ptdev,
822 unsigned int csg_idx, unsigned int cs_idx)
823 {
824 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
825 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
826 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
827 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
828 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
829 (csg_idx * glb_iface->control->group_stride) +
830 CSF_STREAM_CONTROL_OFFSET +
831 (cs_idx * csg_iface->control->stream_stride);
832 struct panthor_fw_cs_iface *first_cs_iface =
833 panthor_fw_get_cs_iface(ptdev, 0, 0);
834
835 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
836 return -EINVAL;
837
838 spin_lock_init(&cs_iface->lock);
839 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
840 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
841 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
842
843 if (!cs_iface->input || !cs_iface->output) {
844 drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
845 return -EINVAL;
846 }
847
848 if (cs_iface != first_cs_iface) {
849 if (cs_iface->control->features != first_cs_iface->control->features) {
850 drm_err(&ptdev->base, "Expecting identical CS slots");
851 return -EINVAL;
852 }
853 } else {
854 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
855
856 ptdev->csif_info.cs_reg_count = reg_count;
857 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
858 }
859
860 return 0;
861 }
862
compare_csg(const struct panthor_fw_csg_control_iface * a,const struct panthor_fw_csg_control_iface * b)863 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
864 const struct panthor_fw_csg_control_iface *b)
865 {
866 if (a->features != b->features)
867 return false;
868 if (a->suspend_size != b->suspend_size)
869 return false;
870 if (a->protm_suspend_size != b->protm_suspend_size)
871 return false;
872 if (a->stream_num != b->stream_num)
873 return false;
874 return true;
875 }
876
panthor_init_csg_iface(struct panthor_device * ptdev,unsigned int csg_idx)877 static int panthor_init_csg_iface(struct panthor_device *ptdev,
878 unsigned int csg_idx)
879 {
880 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
881 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
882 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
883 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
884 unsigned int i;
885
886 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
887 return -EINVAL;
888
889 spin_lock_init(&csg_iface->lock);
890 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
891 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
892 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
893
894 if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
895 csg_iface->control->stream_num > MAX_CS_PER_CSG)
896 return -EINVAL;
897
898 if (!csg_iface->input || !csg_iface->output) {
899 drm_err(&ptdev->base, "Invalid group control interface input/output VA");
900 return -EINVAL;
901 }
902
903 if (csg_idx > 0) {
904 struct panthor_fw_csg_iface *first_csg_iface =
905 panthor_fw_get_csg_iface(ptdev, 0);
906
907 if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
908 drm_err(&ptdev->base, "Expecting identical CSG slots");
909 return -EINVAL;
910 }
911 }
912
913 for (i = 0; i < csg_iface->control->stream_num; i++) {
914 int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
915
916 if (ret)
917 return ret;
918 }
919
920 return 0;
921 }
922
panthor_get_instr_features(struct panthor_device * ptdev)923 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
924 {
925 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
926
927 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
928 return 0;
929
930 return glb_iface->control->instr_features;
931 }
932
panthor_fw_init_ifaces(struct panthor_device * ptdev)933 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
934 {
935 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
936 unsigned int i;
937
938 if (!ptdev->fw->shared_section->mem->kmap)
939 return -EINVAL;
940
941 spin_lock_init(&glb_iface->lock);
942 glb_iface->control = ptdev->fw->shared_section->mem->kmap;
943
944 if (!glb_iface->control->version) {
945 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
946 return -EINVAL;
947 }
948
949 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
950 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
951 if (!glb_iface->input || !glb_iface->output) {
952 drm_err(&ptdev->base, "Invalid global control interface input/output VA");
953 return -EINVAL;
954 }
955
956 if (glb_iface->control->group_num > MAX_CSGS ||
957 glb_iface->control->group_num < MIN_CSGS) {
958 drm_err(&ptdev->base, "Invalid number of control groups");
959 return -EINVAL;
960 }
961
962 for (i = 0; i < glb_iface->control->group_num; i++) {
963 int ret = panthor_init_csg_iface(ptdev, i);
964
965 if (ret)
966 return ret;
967 }
968
969 drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
970 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
971 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
972 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
973 glb_iface->control->features,
974 panthor_get_instr_features(ptdev));
975 return 0;
976 }
977
panthor_fw_init_global_iface(struct panthor_device * ptdev)978 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
979 {
980 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
981
982 /* Enable all cores. */
983 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
984
985 /* Setup timers. */
986 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
987 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
988 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
989
990 /* Enable interrupts we care about. */
991 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
992 GLB_PING |
993 GLB_CFG_PROGRESS_TIMER |
994 GLB_CFG_POWEROFF_TIMER |
995 GLB_IDLE_EN |
996 GLB_IDLE;
997
998 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
999 panthor_fw_toggle_reqs(glb_iface, req, ack,
1000 GLB_CFG_ALLOC_EN |
1001 GLB_CFG_POWEROFF_TIMER |
1002 GLB_CFG_PROGRESS_TIMER);
1003
1004 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1005
1006 /* Kick the watchdog. */
1007 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1008 msecs_to_jiffies(PING_INTERVAL_MS));
1009 }
1010
panthor_job_irq_handler(struct panthor_device * ptdev,u32 status)1011 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1012 {
1013 gpu_write(ptdev, JOB_INT_CLEAR, status);
1014
1015 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1016 ptdev->fw->booted = true;
1017
1018 wake_up_all(&ptdev->fw->req_waitqueue);
1019
1020 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1021 if (!ptdev->fw->booted)
1022 return;
1023
1024 panthor_sched_report_fw_events(ptdev, status);
1025 }
1026 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1027
panthor_fw_start(struct panthor_device * ptdev)1028 static int panthor_fw_start(struct panthor_device *ptdev)
1029 {
1030 bool timedout = false;
1031
1032 ptdev->fw->booted = false;
1033 panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1034 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1035
1036 if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1037 ptdev->fw->booted,
1038 msecs_to_jiffies(1000))) {
1039 if (!ptdev->fw->booted &&
1040 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1041 timedout = true;
1042 }
1043
1044 if (timedout) {
1045 static const char * const status_str[] = {
1046 [MCU_STATUS_DISABLED] = "disabled",
1047 [MCU_STATUS_ENABLED] = "enabled",
1048 [MCU_STATUS_HALT] = "halt",
1049 [MCU_STATUS_FATAL] = "fatal",
1050 };
1051 u32 status = gpu_read(ptdev, MCU_STATUS);
1052
1053 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1054 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1055 return -ETIMEDOUT;
1056 }
1057
1058 return 0;
1059 }
1060
panthor_fw_stop(struct panthor_device * ptdev)1061 static void panthor_fw_stop(struct panthor_device *ptdev)
1062 {
1063 u32 status;
1064
1065 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1066 if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
1067 status == MCU_STATUS_DISABLED, 10, 100000))
1068 drm_err(&ptdev->base, "Failed to stop MCU");
1069 }
1070
1071 /**
1072 * panthor_fw_pre_reset() - Call before a reset.
1073 * @ptdev: Device.
1074 * @on_hang: true if the reset was triggered on a GPU hang.
1075 *
1076 * If the reset is not triggered on a hang, we try to gracefully halt the
1077 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1078 */
panthor_fw_pre_reset(struct panthor_device * ptdev,bool on_hang)1079 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1080 {
1081 /* Make sure we won't be woken up by a ping. */
1082 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1083
1084 ptdev->reset.fast = false;
1085
1086 if (!on_hang) {
1087 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1088 u32 status;
1089
1090 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1091 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1092 if (!gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
1093 status == MCU_STATUS_HALT, 10,
1094 100000)) {
1095 ptdev->reset.fast = true;
1096 } else {
1097 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1098 }
1099 }
1100
1101 panthor_job_irq_suspend(&ptdev->fw->irq);
1102 }
1103
1104 /**
1105 * panthor_fw_post_reset() - Call after a reset.
1106 * @ptdev: Device.
1107 *
1108 * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1109 * make sure we can recover from a memory corruption.
1110 */
panthor_fw_post_reset(struct panthor_device * ptdev)1111 int panthor_fw_post_reset(struct panthor_device *ptdev)
1112 {
1113 int ret;
1114
1115 /* Make the MCU VM active. */
1116 ret = panthor_vm_active(ptdev->fw->vm);
1117 if (ret)
1118 return ret;
1119
1120 if (!ptdev->reset.fast) {
1121 /* On a slow reset, reload all sections, including RO ones.
1122 * We're not supposed to end up here anyway, let's just assume
1123 * the overhead of reloading everything is acceptable.
1124 */
1125 panthor_reload_fw_sections(ptdev, true);
1126 } else {
1127 /* The FW detects 0 -> 1 transitions. Make sure we reset
1128 * the HALT bit before the FW is rebooted.
1129 * This is not needed on a slow reset because FW sections are
1130 * re-initialized.
1131 */
1132 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1133
1134 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1135 }
1136
1137 ret = panthor_fw_start(ptdev);
1138 if (ret) {
1139 drm_err(&ptdev->base, "FW %s reset failed",
1140 ptdev->reset.fast ? "fast" : "slow");
1141 return ret;
1142 }
1143
1144 /* We must re-initialize the global interface even on fast-reset. */
1145 panthor_fw_init_global_iface(ptdev);
1146 return 0;
1147 }
1148
1149 /**
1150 * panthor_fw_unplug() - Called when the device is unplugged.
1151 * @ptdev: Device.
1152 *
1153 * This function must make sure all pending operations are flushed before
1154 * will release device resources, thus preventing any interaction with
1155 * the HW.
1156 *
1157 * If there is still FW-related work running after this function returns,
1158 * they must use drm_dev_{enter,exit}() and skip any HW access when
1159 * drm_dev_enter() returns false.
1160 */
panthor_fw_unplug(struct panthor_device * ptdev)1161 void panthor_fw_unplug(struct panthor_device *ptdev)
1162 {
1163 struct panthor_fw_section *section;
1164
1165 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1166
1167 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
1168 /* Make sure the IRQ handler cannot be called after that point. */
1169 if (ptdev->fw->irq.irq)
1170 panthor_job_irq_suspend(&ptdev->fw->irq);
1171
1172 panthor_fw_stop(ptdev);
1173 }
1174
1175 list_for_each_entry(section, &ptdev->fw->sections, node)
1176 panthor_kernel_bo_destroy(section->mem);
1177
1178 /* We intentionally don't call panthor_vm_idle() and let
1179 * panthor_mmu_unplug() release the AS we acquired with
1180 * panthor_vm_active() so we don't have to track the VM active/idle
1181 * state to keep the active_refcnt balanced.
1182 */
1183 panthor_vm_put(ptdev->fw->vm);
1184 ptdev->fw->vm = NULL;
1185
1186 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
1187 panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1188 }
1189
1190 /**
1191 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1192 * @req_ptr: Pointer to the req register.
1193 * @ack_ptr: Pointer to the ack register.
1194 * @wq: Wait queue to use for the sleeping wait.
1195 * @req_mask: Mask of requests to wait for.
1196 * @acked: Pointer to field that's updated with the acked requests.
1197 * If the function returns 0, *acked == req_mask.
1198 * @timeout_ms: Timeout expressed in milliseconds.
1199 *
1200 * Return: 0 on success, -ETIMEDOUT otherwise.
1201 */
panthor_fw_wait_acks(const u32 * req_ptr,const u32 * ack_ptr,wait_queue_head_t * wq,u32 req_mask,u32 * acked,u32 timeout_ms)1202 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1203 wait_queue_head_t *wq,
1204 u32 req_mask, u32 *acked,
1205 u32 timeout_ms)
1206 {
1207 u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1208 int ret;
1209
1210 /* Busy wait for a few µsecs before falling back to a sleeping wait. */
1211 *acked = req_mask;
1212 ret = read_poll_timeout_atomic(READ_ONCE, ack,
1213 (ack & req_mask) == req,
1214 0, 10, 0,
1215 *ack_ptr);
1216 if (!ret)
1217 return 0;
1218
1219 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1220 msecs_to_jiffies(timeout_ms)))
1221 return 0;
1222
1223 /* Check one last time, in case we were not woken up for some reason. */
1224 ack = READ_ONCE(*ack_ptr);
1225 if ((ack & req_mask) == req)
1226 return 0;
1227
1228 *acked = ~(req ^ ack) & req_mask;
1229 return -ETIMEDOUT;
1230 }
1231
1232 /**
1233 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1234 * @ptdev: Device.
1235 * @req_mask: Mask of requests to wait for.
1236 * @acked: Pointer to field that's updated with the acked requests.
1237 * If the function returns 0, *acked == req_mask.
1238 * @timeout_ms: Timeout expressed in milliseconds.
1239 *
1240 * Return: 0 on success, -ETIMEDOUT otherwise.
1241 */
panthor_fw_glb_wait_acks(struct panthor_device * ptdev,u32 req_mask,u32 * acked,u32 timeout_ms)1242 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1243 u32 req_mask, u32 *acked,
1244 u32 timeout_ms)
1245 {
1246 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1247
1248 /* GLB_HALT doesn't get acked through the FW interface. */
1249 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1250 return -EINVAL;
1251
1252 return panthor_fw_wait_acks(&glb_iface->input->req,
1253 &glb_iface->output->ack,
1254 &ptdev->fw->req_waitqueue,
1255 req_mask, acked, timeout_ms);
1256 }
1257
1258 /**
1259 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1260 * @ptdev: Device.
1261 * @csg_slot: CSG slot ID.
1262 * @req_mask: Mask of requests to wait for.
1263 * @acked: Pointer to field that's updated with the acked requests.
1264 * If the function returns 0, *acked == req_mask.
1265 * @timeout_ms: Timeout expressed in milliseconds.
1266 *
1267 * Return: 0 on success, -ETIMEDOUT otherwise.
1268 */
panthor_fw_csg_wait_acks(struct panthor_device * ptdev,u32 csg_slot,u32 req_mask,u32 * acked,u32 timeout_ms)1269 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1270 u32 req_mask, u32 *acked, u32 timeout_ms)
1271 {
1272 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1273 int ret;
1274
1275 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1276 return -EINVAL;
1277
1278 ret = panthor_fw_wait_acks(&csg_iface->input->req,
1279 &csg_iface->output->ack,
1280 &ptdev->fw->req_waitqueue,
1281 req_mask, acked, timeout_ms);
1282
1283 /*
1284 * Check that all bits in the state field were updated, if any mismatch
1285 * then clear all bits in the state field. This allows code to do
1286 * (acked & CSG_STATE_MASK) and get the right value.
1287 */
1288
1289 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1290 *acked &= ~CSG_STATE_MASK;
1291
1292 return ret;
1293 }
1294
1295 /**
1296 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1297 * @ptdev: Device.
1298 * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1299 *
1300 * This function is toggling bits in the doorbell_req and ringing the
1301 * global doorbell. It doesn't require a user doorbell to be attached to
1302 * the group.
1303 */
panthor_fw_ring_csg_doorbells(struct panthor_device * ptdev,u32 csg_mask)1304 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1305 {
1306 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1307
1308 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1309 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1310 }
1311
panthor_fw_ping_work(struct work_struct * work)1312 static void panthor_fw_ping_work(struct work_struct *work)
1313 {
1314 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1315 struct panthor_device *ptdev = fw->irq.ptdev;
1316 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1317 u32 acked;
1318 int ret;
1319
1320 if (panthor_device_reset_is_pending(ptdev))
1321 return;
1322
1323 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1324 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1325
1326 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1327 if (ret) {
1328 panthor_device_schedule_reset(ptdev);
1329 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1330 } else {
1331 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1332 msecs_to_jiffies(PING_INTERVAL_MS));
1333 }
1334 }
1335
1336 /**
1337 * panthor_fw_init() - Initialize FW related data.
1338 * @ptdev: Device.
1339 *
1340 * Return: 0 on success, a negative error code otherwise.
1341 */
panthor_fw_init(struct panthor_device * ptdev)1342 int panthor_fw_init(struct panthor_device *ptdev)
1343 {
1344 struct panthor_fw *fw;
1345 int ret, irq;
1346
1347 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1348 if (!fw)
1349 return -ENOMEM;
1350
1351 ptdev->fw = fw;
1352 init_waitqueue_head(&fw->req_waitqueue);
1353 INIT_LIST_HEAD(&fw->sections);
1354 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1355
1356 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1357 if (irq <= 0)
1358 return -ENODEV;
1359
1360 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1361 if (ret) {
1362 drm_err(&ptdev->base, "failed to request job irq");
1363 return ret;
1364 }
1365
1366 ret = panthor_gpu_l2_power_on(ptdev);
1367 if (ret)
1368 return ret;
1369
1370 fw->vm = panthor_vm_create(ptdev, true,
1371 0, SZ_4G,
1372 CSF_MCU_SHARED_REGION_START,
1373 CSF_MCU_SHARED_REGION_SIZE);
1374 if (IS_ERR(fw->vm)) {
1375 ret = PTR_ERR(fw->vm);
1376 fw->vm = NULL;
1377 goto err_unplug_fw;
1378 }
1379
1380 ret = panthor_fw_load(ptdev);
1381 if (ret)
1382 goto err_unplug_fw;
1383
1384 ret = panthor_vm_active(fw->vm);
1385 if (ret)
1386 goto err_unplug_fw;
1387
1388 ret = panthor_fw_start(ptdev);
1389 if (ret)
1390 goto err_unplug_fw;
1391
1392 ret = panthor_fw_init_ifaces(ptdev);
1393 if (ret)
1394 goto err_unplug_fw;
1395
1396 panthor_fw_init_global_iface(ptdev);
1397 return 0;
1398
1399 err_unplug_fw:
1400 panthor_fw_unplug(ptdev);
1401 return ret;
1402 }
1403
1404 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1405