1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15
16 #include <drm/drm_drv.h>
17 #include <drm/drm_managed.h>
18
19 #include "panthor_device.h"
20 #include "panthor_fw.h"
21 #include "panthor_gem.h"
22 #include "panthor_gpu.h"
23 #include "panthor_mmu.h"
24 #include "panthor_regs.h"
25 #include "panthor_sched.h"
26
27 #define CSF_FW_NAME "mali_csffw.bin"
28
29 #define PING_INTERVAL_MS 12000
30 #define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024)
31 #define PROGRESS_TIMEOUT_SCALE_SHIFT 10
32 #define IDLE_HYSTERESIS_US 800
33 #define PWROFF_HYSTERESIS_US 10000
34
35 /**
36 * struct panthor_fw_binary_hdr - Firmware binary header.
37 */
38 struct panthor_fw_binary_hdr {
39 /** @magic: Magic value to check binary validity. */
40 u32 magic;
41 #define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e
42
43 /** @minor: Minor FW version. */
44 u8 minor;
45
46 /** @major: Major FW version. */
47 u8 major;
48 #define CSF_FW_BINARY_HEADER_MAJOR_MAX 0
49
50 /** @padding1: MBZ. */
51 u16 padding1;
52
53 /** @version_hash: FW version hash. */
54 u32 version_hash;
55
56 /** @padding2: MBZ. */
57 u32 padding2;
58
59 /** @size: FW binary size. */
60 u32 size;
61 };
62
63 /**
64 * enum panthor_fw_binary_entry_type - Firmware binary entry type
65 */
66 enum panthor_fw_binary_entry_type {
67 /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
68 CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
69
70 /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
71 CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
72
73 /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
74 CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
75
76 /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
77 CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
78
79 /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
80 CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
81 };
82
83 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff)
84 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff)
85 #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30)
86 #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31)
87
88 #define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0)
89 #define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1)
90 #define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2)
91 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3)
92 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3)
93 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
94 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3)
95 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3)
96 #define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5)
97 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30)
98 #define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31)
99
100 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \
101 (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \
102 CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \
103 CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \
104 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \
105 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \
106 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \
107 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
108
109 /**
110 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
111 */
112 struct panthor_fw_binary_section_entry_hdr {
113 /** @flags: Section flags. */
114 u32 flags;
115
116 /** @va: MCU virtual range to map this binary section to. */
117 struct {
118 /** @start: Start address. */
119 u32 start;
120
121 /** @end: End address. */
122 u32 end;
123 } va;
124
125 /** @data: Data to initialize the FW section with. */
126 struct {
127 /** @start: Start offset in the FW binary. */
128 u32 start;
129
130 /** @end: End offset in the FW binary. */
131 u32 end;
132 } data;
133 };
134
135 /**
136 * struct panthor_fw_binary_iter - Firmware binary iterator
137 *
138 * Used to parse a firmware binary.
139 */
140 struct panthor_fw_binary_iter {
141 /** @data: FW binary data. */
142 const void *data;
143
144 /** @size: FW binary size. */
145 size_t size;
146
147 /** @offset: Iterator offset. */
148 size_t offset;
149 };
150
151 /**
152 * struct panthor_fw_section - FW section
153 */
154 struct panthor_fw_section {
155 /** @node: Used to keep track of FW sections. */
156 struct list_head node;
157
158 /** @flags: Section flags, as encoded in the FW binary. */
159 u32 flags;
160
161 /** @mem: Section memory. */
162 struct panthor_kernel_bo *mem;
163
164 /**
165 * @name: Name of the section, as specified in the binary.
166 *
167 * Can be NULL.
168 */
169 const char *name;
170
171 /**
172 * @data: Initial data copied to the FW memory.
173 *
174 * We keep data around so we can reload sections after a reset.
175 */
176 struct {
177 /** @buf: Buffed used to store init data. */
178 const void *buf;
179
180 /** @size: Size of @buf in bytes. */
181 size_t size;
182 } data;
183 };
184
185 #define CSF_MCU_SHARED_REGION_START 0x04000000ULL
186 #define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL
187
188 #define MIN_CS_PER_CSG 8
189 #define MIN_CSGS 3
190 #define MAX_CSG_PRIO 0xf
191
192 #define CSF_IFACE_VERSION(major, minor, patch) \
193 (((major) << 24) | ((minor) << 16) | (patch))
194 #define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24)
195 #define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff)
196 #define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff)
197
198 #define CSF_GROUP_CONTROL_OFFSET 0x1000
199 #define CSF_STREAM_CONTROL_OFFSET 0x40
200 #define CSF_UNPRESERVED_REG_COUNT 4
201
202 /**
203 * struct panthor_fw_iface - FW interfaces
204 */
205 struct panthor_fw_iface {
206 /** @global: Global interface. */
207 struct panthor_fw_global_iface global;
208
209 /** @groups: Group slot interfaces. */
210 struct panthor_fw_csg_iface groups[MAX_CSGS];
211
212 /** @streams: Command stream slot interfaces. */
213 struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
214 };
215
216 /**
217 * struct panthor_fw - Firmware management
218 */
219 struct panthor_fw {
220 /** @vm: MCU VM. */
221 struct panthor_vm *vm;
222
223 /** @sections: List of FW sections. */
224 struct list_head sections;
225
226 /** @shared_section: The section containing the FW interfaces. */
227 struct panthor_fw_section *shared_section;
228
229 /** @iface: FW interfaces. */
230 struct panthor_fw_iface iface;
231
232 /** @watchdog: Collection of fields relating to the FW watchdog. */
233 struct {
234 /** @ping_work: Delayed work used to ping the FW. */
235 struct delayed_work ping_work;
236 } watchdog;
237
238 /**
239 * @req_waitqueue: FW request waitqueue.
240 *
241 * Everytime a request is sent to a command stream group or the global
242 * interface, the caller will first busy wait for the request to be
243 * acknowledged, and then fallback to a sleeping wait.
244 *
245 * This wait queue is here to support the sleeping wait flavor.
246 */
247 wait_queue_head_t req_waitqueue;
248
249 /** @booted: True is the FW is booted */
250 bool booted;
251
252 /**
253 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
254 *
255 * A fast reset is just a reset where the driver doesn't reload the FW sections.
256 *
257 * Any time the firmware is properly suspended, a fast reset can take place.
258 * On the other hand, if the halt operation failed, the driver will reload
259 * all sections to make sure we start from a fresh state.
260 */
261 bool fast_reset;
262
263 /** @irq: Job irq data. */
264 struct panthor_irq irq;
265 };
266
panthor_fw_vm(struct panthor_device * ptdev)267 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
268 {
269 return ptdev->fw->vm;
270 }
271
272 /**
273 * panthor_fw_get_glb_iface() - Get the global interface
274 * @ptdev: Device.
275 *
276 * Return: The global interface.
277 */
278 struct panthor_fw_global_iface *
panthor_fw_get_glb_iface(struct panthor_device * ptdev)279 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
280 {
281 return &ptdev->fw->iface.global;
282 }
283
284 /**
285 * panthor_fw_get_csg_iface() - Get a command stream group slot interface
286 * @ptdev: Device.
287 * @csg_slot: Index of the command stream group slot.
288 *
289 * Return: The command stream group slot interface.
290 */
291 struct panthor_fw_csg_iface *
panthor_fw_get_csg_iface(struct panthor_device * ptdev,u32 csg_slot)292 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
293 {
294 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
295 return NULL;
296
297 return &ptdev->fw->iface.groups[csg_slot];
298 }
299
300 /**
301 * panthor_fw_get_cs_iface() - Get a command stream slot interface
302 * @ptdev: Device.
303 * @csg_slot: Index of the command stream group slot.
304 * @cs_slot: Index of the command stream slot.
305 *
306 * Return: The command stream slot interface.
307 */
308 struct panthor_fw_cs_iface *
panthor_fw_get_cs_iface(struct panthor_device * ptdev,u32 csg_slot,u32 cs_slot)309 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
310 {
311 if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
312 return NULL;
313
314 return &ptdev->fw->iface.streams[csg_slot][cs_slot];
315 }
316
317 /**
318 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
319 * @ptdev: Device.
320 * @timeout_us: Timeout expressed in micro-seconds.
321 *
322 * The FW has two timer sources: the GPU counter or arch-timer. We need
323 * to express timeouts in term of number of cycles and specify which
324 * timer source should be used.
325 *
326 * Return: A value suitable for timeout fields in the global interface.
327 */
panthor_fw_conv_timeout(struct panthor_device * ptdev,u32 timeout_us)328 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
329 {
330 bool use_cycle_counter = false;
331 u32 timer_rate = 0;
332 u64 mod_cycles;
333
334 #ifdef CONFIG_ARM_ARCH_TIMER
335 timer_rate = arch_timer_get_cntfrq();
336 #endif
337
338 if (!timer_rate) {
339 use_cycle_counter = true;
340 timer_rate = clk_get_rate(ptdev->clks.core);
341 }
342
343 if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
344 /* We couldn't get a valid clock rate, let's just pick the
345 * maximum value so the FW still handles the core
346 * power on/off requests.
347 */
348 return GLB_TIMER_VAL(~0) |
349 GLB_TIMER_SOURCE_GPU_COUNTER;
350 }
351
352 mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
353 1000000ull << 10);
354 if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
355 mod_cycles = GLB_TIMER_VAL(~0);
356
357 return GLB_TIMER_VAL(mod_cycles) |
358 (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
359 }
360
panthor_fw_binary_iter_read(struct panthor_device * ptdev,struct panthor_fw_binary_iter * iter,void * out,size_t size)361 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
362 struct panthor_fw_binary_iter *iter,
363 void *out, size_t size)
364 {
365 size_t new_offset = iter->offset + size;
366
367 if (new_offset > iter->size || new_offset < iter->offset) {
368 drm_err(&ptdev->base, "Firmware too small\n");
369 return -EINVAL;
370 }
371
372 memcpy(out, iter->data + iter->offset, size);
373 iter->offset = new_offset;
374 return 0;
375 }
376
panthor_fw_binary_sub_iter_init(struct panthor_device * ptdev,struct panthor_fw_binary_iter * iter,struct panthor_fw_binary_iter * sub_iter,size_t size)377 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
378 struct panthor_fw_binary_iter *iter,
379 struct panthor_fw_binary_iter *sub_iter,
380 size_t size)
381 {
382 size_t new_offset = iter->offset + size;
383
384 if (new_offset > iter->size || new_offset < iter->offset) {
385 drm_err(&ptdev->base, "Firmware entry too long\n");
386 return -EINVAL;
387 }
388
389 sub_iter->offset = 0;
390 sub_iter->data = iter->data + iter->offset;
391 sub_iter->size = size;
392 iter->offset = new_offset;
393 return 0;
394 }
395
panthor_fw_init_section_mem(struct panthor_device * ptdev,struct panthor_fw_section * section)396 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
397 struct panthor_fw_section *section)
398 {
399 bool was_mapped = !!section->mem->kmap;
400 int ret;
401
402 if (!section->data.size &&
403 !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
404 return;
405
406 ret = panthor_kernel_bo_vmap(section->mem);
407 if (drm_WARN_ON(&ptdev->base, ret))
408 return;
409
410 memcpy(section->mem->kmap, section->data.buf, section->data.size);
411 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
412 memset(section->mem->kmap + section->data.size, 0,
413 panthor_kernel_bo_size(section->mem) - section->data.size);
414 }
415
416 if (!was_mapped)
417 panthor_kernel_bo_vunmap(section->mem);
418 }
419
420 /**
421 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
422 * @ptdev: Device.
423 * @input: Pointer holding the input interface on success.
424 * Should be ignored on failure.
425 * @output: Pointer holding the output interface on success.
426 * Should be ignored on failure.
427 * @input_fw_va: Pointer holding the input interface FW VA on success.
428 * Should be ignored on failure.
429 * @output_fw_va: Pointer holding the output interface FW VA on success.
430 * Should be ignored on failure.
431 *
432 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
433 * interface is at offset 0, and the output interface at offset 4096.
434 *
435 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
436 */
437 struct panthor_kernel_bo *
panthor_fw_alloc_queue_iface_mem(struct panthor_device * ptdev,struct panthor_fw_ringbuf_input_iface ** input,const struct panthor_fw_ringbuf_output_iface ** output,u32 * input_fw_va,u32 * output_fw_va)438 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
439 struct panthor_fw_ringbuf_input_iface **input,
440 const struct panthor_fw_ringbuf_output_iface **output,
441 u32 *input_fw_va, u32 *output_fw_va)
442 {
443 struct panthor_kernel_bo *mem;
444 int ret;
445
446 mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
447 DRM_PANTHOR_BO_NO_MMAP,
448 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
449 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
450 PANTHOR_VM_KERNEL_AUTO_VA);
451 if (IS_ERR(mem))
452 return mem;
453
454 ret = panthor_kernel_bo_vmap(mem);
455 if (ret) {
456 panthor_kernel_bo_destroy(mem);
457 return ERR_PTR(ret);
458 }
459
460 memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
461 *input = mem->kmap;
462 *output = mem->kmap + SZ_4K;
463 *input_fw_va = panthor_kernel_bo_gpuva(mem);
464 *output_fw_va = *input_fw_va + SZ_4K;
465
466 return mem;
467 }
468
469 /**
470 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
471 * @ptdev: Device.
472 * @size: Size of the suspend buffer.
473 *
474 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
475 */
476 struct panthor_kernel_bo *
panthor_fw_alloc_suspend_buf_mem(struct panthor_device * ptdev,size_t size)477 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
478 {
479 return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
480 DRM_PANTHOR_BO_NO_MMAP,
481 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
482 PANTHOR_VM_KERNEL_AUTO_VA);
483 }
484
panthor_fw_load_section_entry(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter,u32 ehdr)485 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
486 const struct firmware *fw,
487 struct panthor_fw_binary_iter *iter,
488 u32 ehdr)
489 {
490 ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
491 struct panthor_fw_binary_section_entry_hdr hdr;
492 struct panthor_fw_section *section;
493 u32 section_size;
494 u32 name_len;
495 int ret;
496
497 ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
498 if (ret)
499 return ret;
500
501 if (hdr.data.end < hdr.data.start) {
502 drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
503 hdr.data.end, hdr.data.start);
504 return -EINVAL;
505 }
506
507 if (hdr.va.end < hdr.va.start) {
508 drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
509 hdr.va.end, hdr.va.start);
510 return -EINVAL;
511 }
512
513 if (hdr.data.end > fw->size) {
514 drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
515 hdr.data.end, fw->size);
516 return -EINVAL;
517 }
518
519 if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
520 drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
521 hdr.va.start, hdr.va.end);
522 return -EINVAL;
523 }
524
525 if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
526 drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
527 hdr.flags);
528 return -EINVAL;
529 }
530
531 if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
532 drm_warn(&ptdev->base,
533 "Firmware protected mode entry not be supported, ignoring");
534 return 0;
535 }
536
537 if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
538 !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
539 drm_err(&ptdev->base,
540 "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
541 return -EINVAL;
542 }
543
544 name_len = iter->size - iter->offset;
545
546 section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
547 if (!section)
548 return -ENOMEM;
549
550 list_add_tail(§ion->node, &ptdev->fw->sections);
551 section->flags = hdr.flags;
552 section->data.size = hdr.data.end - hdr.data.start;
553
554 if (section->data.size > 0) {
555 void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
556
557 if (!data)
558 return -ENOMEM;
559
560 memcpy(data, fw->data + hdr.data.start, section->data.size);
561 section->data.buf = data;
562 }
563
564 if (name_len > 0) {
565 char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
566
567 if (!name)
568 return -ENOMEM;
569
570 memcpy(name, iter->data + iter->offset, name_len);
571 name[name_len] = '\0';
572 section->name = name;
573 }
574
575 section_size = hdr.va.end - hdr.va.start;
576 if (section_size) {
577 u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
578 struct panthor_gem_object *bo;
579 u32 vm_map_flags = 0;
580 struct sg_table *sgt;
581 u64 va = hdr.va.start;
582
583 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
584 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
585
586 if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
587 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
588
589 /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
590 * non-cacheable for now. We might want to introduce a new
591 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
592 * memory and is currently not used by our driver) for
593 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
594 * of IO-coherent systems.
595 */
596 if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
597 vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
598
599 section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
600 section_size,
601 DRM_PANTHOR_BO_NO_MMAP,
602 vm_map_flags, va);
603 if (IS_ERR(section->mem))
604 return PTR_ERR(section->mem);
605
606 if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
607 return -EINVAL;
608
609 if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
610 ret = panthor_kernel_bo_vmap(section->mem);
611 if (ret)
612 return ret;
613 }
614
615 panthor_fw_init_section_mem(ptdev, section);
616
617 bo = to_panthor_bo(section->mem->obj);
618 sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
619 if (IS_ERR(sgt))
620 return PTR_ERR(sgt);
621
622 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
623 }
624
625 if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
626 ptdev->fw->shared_section = section;
627
628 return 0;
629 }
630
631 static void
panthor_reload_fw_sections(struct panthor_device * ptdev,bool full_reload)632 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
633 {
634 struct panthor_fw_section *section;
635
636 list_for_each_entry(section, &ptdev->fw->sections, node) {
637 struct sg_table *sgt;
638
639 if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
640 continue;
641
642 panthor_fw_init_section_mem(ptdev, section);
643 sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
644 if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
645 dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
646 }
647 }
648
panthor_fw_load_entry(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter)649 static int panthor_fw_load_entry(struct panthor_device *ptdev,
650 const struct firmware *fw,
651 struct panthor_fw_binary_iter *iter)
652 {
653 struct panthor_fw_binary_iter eiter;
654 u32 ehdr;
655 int ret;
656
657 ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
658 if (ret)
659 return ret;
660
661 if ((iter->offset % sizeof(u32)) ||
662 (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
663 drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
664 (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
665 return -EINVAL;
666 }
667
668 if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
669 CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
670 return -EINVAL;
671
672 switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
673 case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
674 return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
675
676 /* FIXME: handle those entry types? */
677 case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
678 case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
679 case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
680 case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
681 return 0;
682 default:
683 break;
684 }
685
686 if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
687 return 0;
688
689 drm_err(&ptdev->base,
690 "Unsupported non-optional entry type %u in firmware\n",
691 CSF_FW_BINARY_ENTRY_TYPE(ehdr));
692 return -EINVAL;
693 }
694
panthor_fw_load(struct panthor_device * ptdev)695 static int panthor_fw_load(struct panthor_device *ptdev)
696 {
697 const struct firmware *fw = NULL;
698 struct panthor_fw_binary_iter iter = {};
699 struct panthor_fw_binary_hdr hdr;
700 char fw_path[128];
701 int ret;
702
703 snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
704 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
705 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
706 CSF_FW_NAME);
707
708 ret = request_firmware(&fw, fw_path, ptdev->base.dev);
709 if (ret) {
710 drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
711 CSF_FW_NAME);
712 return ret;
713 }
714
715 iter.data = fw->data;
716 iter.size = fw->size;
717 ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
718 if (ret)
719 goto out;
720
721 if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
722 ret = -EINVAL;
723 drm_err(&ptdev->base, "Invalid firmware magic\n");
724 goto out;
725 }
726
727 if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
728 ret = -EINVAL;
729 drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
730 hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
731 goto out;
732 }
733
734 if (hdr.size > iter.size) {
735 drm_err(&ptdev->base, "Firmware image is truncated\n");
736 goto out;
737 }
738
739 iter.size = hdr.size;
740
741 while (iter.offset < hdr.size) {
742 ret = panthor_fw_load_entry(ptdev, fw, &iter);
743 if (ret)
744 goto out;
745 }
746
747 if (!ptdev->fw->shared_section) {
748 drm_err(&ptdev->base, "Shared interface region not found\n");
749 ret = -EINVAL;
750 goto out;
751 }
752
753 out:
754 release_firmware(fw);
755 return ret;
756 }
757
758 /**
759 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
760 * @ptdev: Device.
761 * @mcu_va: MCU address.
762 *
763 * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
764 */
iface_fw_to_cpu_addr(struct panthor_device * ptdev,u32 mcu_va)765 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
766 {
767 u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
768 u64 shared_mem_end = shared_mem_start +
769 panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
770 if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
771 return NULL;
772
773 return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
774 }
775
panthor_init_cs_iface(struct panthor_device * ptdev,unsigned int csg_idx,unsigned int cs_idx)776 static int panthor_init_cs_iface(struct panthor_device *ptdev,
777 unsigned int csg_idx, unsigned int cs_idx)
778 {
779 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
780 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
781 struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
782 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
783 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
784 (csg_idx * glb_iface->control->group_stride) +
785 CSF_STREAM_CONTROL_OFFSET +
786 (cs_idx * csg_iface->control->stream_stride);
787 struct panthor_fw_cs_iface *first_cs_iface =
788 panthor_fw_get_cs_iface(ptdev, 0, 0);
789
790 if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
791 return -EINVAL;
792
793 spin_lock_init(&cs_iface->lock);
794 cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
795 cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
796 cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
797
798 if (!cs_iface->input || !cs_iface->output) {
799 drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
800 return -EINVAL;
801 }
802
803 if (cs_iface != first_cs_iface) {
804 if (cs_iface->control->features != first_cs_iface->control->features) {
805 drm_err(&ptdev->base, "Expecting identical CS slots");
806 return -EINVAL;
807 }
808 } else {
809 u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
810
811 ptdev->csif_info.cs_reg_count = reg_count;
812 ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
813 }
814
815 return 0;
816 }
817
compare_csg(const struct panthor_fw_csg_control_iface * a,const struct panthor_fw_csg_control_iface * b)818 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
819 const struct panthor_fw_csg_control_iface *b)
820 {
821 if (a->features != b->features)
822 return false;
823 if (a->suspend_size != b->suspend_size)
824 return false;
825 if (a->protm_suspend_size != b->protm_suspend_size)
826 return false;
827 if (a->stream_num != b->stream_num)
828 return false;
829 return true;
830 }
831
panthor_init_csg_iface(struct panthor_device * ptdev,unsigned int csg_idx)832 static int panthor_init_csg_iface(struct panthor_device *ptdev,
833 unsigned int csg_idx)
834 {
835 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
836 struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
837 u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
838 u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
839 unsigned int i;
840
841 if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
842 return -EINVAL;
843
844 spin_lock_init(&csg_iface->lock);
845 csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
846 csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
847 csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
848
849 if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
850 csg_iface->control->stream_num > MAX_CS_PER_CSG)
851 return -EINVAL;
852
853 if (!csg_iface->input || !csg_iface->output) {
854 drm_err(&ptdev->base, "Invalid group control interface input/output VA");
855 return -EINVAL;
856 }
857
858 if (csg_idx > 0) {
859 struct panthor_fw_csg_iface *first_csg_iface =
860 panthor_fw_get_csg_iface(ptdev, 0);
861
862 if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
863 drm_err(&ptdev->base, "Expecting identical CSG slots");
864 return -EINVAL;
865 }
866 }
867
868 for (i = 0; i < csg_iface->control->stream_num; i++) {
869 int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
870
871 if (ret)
872 return ret;
873 }
874
875 return 0;
876 }
877
panthor_get_instr_features(struct panthor_device * ptdev)878 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
879 {
880 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
881
882 if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
883 return 0;
884
885 return glb_iface->control->instr_features;
886 }
887
panthor_fw_init_ifaces(struct panthor_device * ptdev)888 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
889 {
890 struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
891 unsigned int i;
892
893 if (!ptdev->fw->shared_section->mem->kmap)
894 return -EINVAL;
895
896 spin_lock_init(&glb_iface->lock);
897 glb_iface->control = ptdev->fw->shared_section->mem->kmap;
898
899 if (!glb_iface->control->version) {
900 drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
901 return -EINVAL;
902 }
903
904 glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
905 glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
906 if (!glb_iface->input || !glb_iface->output) {
907 drm_err(&ptdev->base, "Invalid global control interface input/output VA");
908 return -EINVAL;
909 }
910
911 if (glb_iface->control->group_num > MAX_CSGS ||
912 glb_iface->control->group_num < MIN_CSGS) {
913 drm_err(&ptdev->base, "Invalid number of control groups");
914 return -EINVAL;
915 }
916
917 for (i = 0; i < glb_iface->control->group_num; i++) {
918 int ret = panthor_init_csg_iface(ptdev, i);
919
920 if (ret)
921 return ret;
922 }
923
924 drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
925 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
926 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
927 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
928 glb_iface->control->features,
929 panthor_get_instr_features(ptdev));
930 return 0;
931 }
932
panthor_fw_init_global_iface(struct panthor_device * ptdev)933 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
934 {
935 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
936
937 /* Enable all cores. */
938 glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
939
940 /* Setup timers. */
941 glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
942 glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
943 glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
944
945 /* Enable interrupts we care about. */
946 glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
947 GLB_PING |
948 GLB_CFG_PROGRESS_TIMER |
949 GLB_CFG_POWEROFF_TIMER |
950 GLB_IDLE_EN |
951 GLB_IDLE;
952
953 panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
954 panthor_fw_toggle_reqs(glb_iface, req, ack,
955 GLB_CFG_ALLOC_EN |
956 GLB_CFG_POWEROFF_TIMER |
957 GLB_CFG_PROGRESS_TIMER);
958
959 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
960
961 /* Kick the watchdog. */
962 mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
963 msecs_to_jiffies(PING_INTERVAL_MS));
964 }
965
panthor_job_irq_handler(struct panthor_device * ptdev,u32 status)966 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
967 {
968 if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
969 ptdev->fw->booted = true;
970
971 wake_up_all(&ptdev->fw->req_waitqueue);
972
973 /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
974 if (!ptdev->fw->booted)
975 return;
976
977 panthor_sched_report_fw_events(ptdev, status);
978 }
979 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
980
panthor_fw_start(struct panthor_device * ptdev)981 static int panthor_fw_start(struct panthor_device *ptdev)
982 {
983 bool timedout = false;
984
985 ptdev->fw->booted = false;
986 panthor_job_irq_resume(&ptdev->fw->irq, ~0);
987 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
988
989 if (!wait_event_timeout(ptdev->fw->req_waitqueue,
990 ptdev->fw->booted,
991 msecs_to_jiffies(1000))) {
992 if (!ptdev->fw->booted &&
993 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
994 timedout = true;
995 }
996
997 if (timedout) {
998 static const char * const status_str[] = {
999 [MCU_STATUS_DISABLED] = "disabled",
1000 [MCU_STATUS_ENABLED] = "enabled",
1001 [MCU_STATUS_HALT] = "halt",
1002 [MCU_STATUS_FATAL] = "fatal",
1003 };
1004 u32 status = gpu_read(ptdev, MCU_STATUS);
1005
1006 drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1007 status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1008 return -ETIMEDOUT;
1009 }
1010
1011 return 0;
1012 }
1013
panthor_fw_stop(struct panthor_device * ptdev)1014 static void panthor_fw_stop(struct panthor_device *ptdev)
1015 {
1016 u32 status;
1017
1018 gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1019 if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1020 status == MCU_STATUS_DISABLED, 10, 100000))
1021 drm_err(&ptdev->base, "Failed to stop MCU");
1022 }
1023
1024 /**
1025 * panthor_fw_pre_reset() - Call before a reset.
1026 * @ptdev: Device.
1027 * @on_hang: true if the reset was triggered on a GPU hang.
1028 *
1029 * If the reset is not triggered on a hang, we try to gracefully halt the
1030 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1031 */
panthor_fw_pre_reset(struct panthor_device * ptdev,bool on_hang)1032 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1033 {
1034 /* Make sure we won't be woken up by a ping. */
1035 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1036
1037 ptdev->fw->fast_reset = false;
1038
1039 if (!on_hang) {
1040 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1041 u32 status;
1042
1043 panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1044 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1045 if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1046 status == MCU_STATUS_HALT, 10, 100000) &&
1047 glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
1048 ptdev->fw->fast_reset = true;
1049 } else {
1050 drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1051 }
1052
1053 /* The FW detects 0 -> 1 transitions. Make sure we reset
1054 * the HALT bit before the FW is rebooted.
1055 */
1056 panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1057 }
1058
1059 panthor_job_irq_suspend(&ptdev->fw->irq);
1060 }
1061
1062 /**
1063 * panthor_fw_post_reset() - Call after a reset.
1064 * @ptdev: Device.
1065 *
1066 * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1067 * make sure we can recover from a memory corruption.
1068 */
panthor_fw_post_reset(struct panthor_device * ptdev)1069 int panthor_fw_post_reset(struct panthor_device *ptdev)
1070 {
1071 int ret;
1072
1073 /* Make the MCU VM active. */
1074 ret = panthor_vm_active(ptdev->fw->vm);
1075 if (ret)
1076 return ret;
1077
1078 /* If this is a fast reset, try to start the MCU without reloading
1079 * the FW sections. If it fails, go for a full reset.
1080 */
1081 if (ptdev->fw->fast_reset) {
1082 ret = panthor_fw_start(ptdev);
1083 if (!ret)
1084 goto out;
1085
1086 /* Forcibly reset the MCU and force a slow reset, so we get a
1087 * fresh boot on the next panthor_fw_start() call.
1088 */
1089 panthor_fw_stop(ptdev);
1090 ptdev->fw->fast_reset = false;
1091 drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
1092
1093 ret = panthor_vm_flush_all(ptdev->fw->vm);
1094 if (ret) {
1095 drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
1096 return ret;
1097 }
1098 }
1099
1100 /* Reload all sections, including RO ones. We're not supposed
1101 * to end up here anyway, let's just assume the overhead of
1102 * reloading everything is acceptable.
1103 */
1104 panthor_reload_fw_sections(ptdev, true);
1105
1106 ret = panthor_fw_start(ptdev);
1107 if (ret) {
1108 drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
1109 return ret;
1110 }
1111
1112 out:
1113 /* We must re-initialize the global interface even on fast-reset. */
1114 panthor_fw_init_global_iface(ptdev);
1115 return 0;
1116 }
1117
1118 /**
1119 * panthor_fw_unplug() - Called when the device is unplugged.
1120 * @ptdev: Device.
1121 *
1122 * This function must make sure all pending operations are flushed before
1123 * will release device resources, thus preventing any interaction with
1124 * the HW.
1125 *
1126 * If there is still FW-related work running after this function returns,
1127 * they must use drm_dev_{enter,exit}() and skip any HW access when
1128 * drm_dev_enter() returns false.
1129 */
panthor_fw_unplug(struct panthor_device * ptdev)1130 void panthor_fw_unplug(struct panthor_device *ptdev)
1131 {
1132 struct panthor_fw_section *section;
1133
1134 cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1135
1136 /* Make sure the IRQ handler can be called after that point. */
1137 if (ptdev->fw->irq.irq)
1138 panthor_job_irq_suspend(&ptdev->fw->irq);
1139
1140 panthor_fw_stop(ptdev);
1141
1142 list_for_each_entry(section, &ptdev->fw->sections, node)
1143 panthor_kernel_bo_destroy(section->mem);
1144
1145 /* We intentionally don't call panthor_vm_idle() and let
1146 * panthor_mmu_unplug() release the AS we acquired with
1147 * panthor_vm_active() so we don't have to track the VM active/idle
1148 * state to keep the active_refcnt balanced.
1149 */
1150 panthor_vm_put(ptdev->fw->vm);
1151 ptdev->fw->vm = NULL;
1152
1153 panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1154 }
1155
1156 /**
1157 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1158 * @req_ptr: Pointer to the req register.
1159 * @ack_ptr: Pointer to the ack register.
1160 * @wq: Wait queue to use for the sleeping wait.
1161 * @req_mask: Mask of requests to wait for.
1162 * @acked: Pointer to field that's updated with the acked requests.
1163 * If the function returns 0, *acked == req_mask.
1164 * @timeout_ms: Timeout expressed in milliseconds.
1165 *
1166 * Return: 0 on success, -ETIMEDOUT otherwise.
1167 */
panthor_fw_wait_acks(const u32 * req_ptr,const u32 * ack_ptr,wait_queue_head_t * wq,u32 req_mask,u32 * acked,u32 timeout_ms)1168 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1169 wait_queue_head_t *wq,
1170 u32 req_mask, u32 *acked,
1171 u32 timeout_ms)
1172 {
1173 u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1174 int ret;
1175
1176 /* Busy wait for a few µsecs before falling back to a sleeping wait. */
1177 *acked = req_mask;
1178 ret = read_poll_timeout_atomic(READ_ONCE, ack,
1179 (ack & req_mask) == req,
1180 0, 10, 0,
1181 *ack_ptr);
1182 if (!ret)
1183 return 0;
1184
1185 if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1186 msecs_to_jiffies(timeout_ms)))
1187 return 0;
1188
1189 /* Check one last time, in case we were not woken up for some reason. */
1190 ack = READ_ONCE(*ack_ptr);
1191 if ((ack & req_mask) == req)
1192 return 0;
1193
1194 *acked = ~(req ^ ack) & req_mask;
1195 return -ETIMEDOUT;
1196 }
1197
1198 /**
1199 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1200 * @ptdev: Device.
1201 * @req_mask: Mask of requests to wait for.
1202 * @acked: Pointer to field that's updated with the acked requests.
1203 * If the function returns 0, *acked == req_mask.
1204 * @timeout_ms: Timeout expressed in milliseconds.
1205 *
1206 * Return: 0 on success, -ETIMEDOUT otherwise.
1207 */
panthor_fw_glb_wait_acks(struct panthor_device * ptdev,u32 req_mask,u32 * acked,u32 timeout_ms)1208 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1209 u32 req_mask, u32 *acked,
1210 u32 timeout_ms)
1211 {
1212 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1213
1214 /* GLB_HALT doesn't get acked through the FW interface. */
1215 if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1216 return -EINVAL;
1217
1218 return panthor_fw_wait_acks(&glb_iface->input->req,
1219 &glb_iface->output->ack,
1220 &ptdev->fw->req_waitqueue,
1221 req_mask, acked, timeout_ms);
1222 }
1223
1224 /**
1225 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1226 * @ptdev: Device.
1227 * @csg_slot: CSG slot ID.
1228 * @req_mask: Mask of requests to wait for.
1229 * @acked: Pointer to field that's updated with the acked requests.
1230 * If the function returns 0, *acked == req_mask.
1231 * @timeout_ms: Timeout expressed in milliseconds.
1232 *
1233 * Return: 0 on success, -ETIMEDOUT otherwise.
1234 */
panthor_fw_csg_wait_acks(struct panthor_device * ptdev,u32 csg_slot,u32 req_mask,u32 * acked,u32 timeout_ms)1235 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1236 u32 req_mask, u32 *acked, u32 timeout_ms)
1237 {
1238 struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1239 int ret;
1240
1241 if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1242 return -EINVAL;
1243
1244 ret = panthor_fw_wait_acks(&csg_iface->input->req,
1245 &csg_iface->output->ack,
1246 &ptdev->fw->req_waitqueue,
1247 req_mask, acked, timeout_ms);
1248
1249 /*
1250 * Check that all bits in the state field were updated, if any mismatch
1251 * then clear all bits in the state field. This allows code to do
1252 * (acked & CSG_STATE_MASK) and get the right value.
1253 */
1254
1255 if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1256 *acked &= ~CSG_STATE_MASK;
1257
1258 return ret;
1259 }
1260
1261 /**
1262 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1263 * @ptdev: Device.
1264 * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1265 *
1266 * This function is toggling bits in the doorbell_req and ringing the
1267 * global doorbell. It doesn't require a user doorbell to be attached to
1268 * the group.
1269 */
panthor_fw_ring_csg_doorbells(struct panthor_device * ptdev,u32 csg_mask)1270 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1271 {
1272 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1273
1274 panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1275 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1276 }
1277
panthor_fw_ping_work(struct work_struct * work)1278 static void panthor_fw_ping_work(struct work_struct *work)
1279 {
1280 struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1281 struct panthor_device *ptdev = fw->irq.ptdev;
1282 struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1283 u32 acked;
1284 int ret;
1285
1286 if (panthor_device_reset_is_pending(ptdev))
1287 return;
1288
1289 panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1290 gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1291
1292 ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1293 if (ret) {
1294 panthor_device_schedule_reset(ptdev);
1295 drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1296 } else {
1297 mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1298 msecs_to_jiffies(PING_INTERVAL_MS));
1299 }
1300 }
1301
1302 /**
1303 * panthor_fw_init() - Initialize FW related data.
1304 * @ptdev: Device.
1305 *
1306 * Return: 0 on success, a negative error code otherwise.
1307 */
panthor_fw_init(struct panthor_device * ptdev)1308 int panthor_fw_init(struct panthor_device *ptdev)
1309 {
1310 struct panthor_fw *fw;
1311 int ret, irq;
1312
1313 fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1314 if (!fw)
1315 return -ENOMEM;
1316
1317 ptdev->fw = fw;
1318 init_waitqueue_head(&fw->req_waitqueue);
1319 INIT_LIST_HEAD(&fw->sections);
1320 INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1321
1322 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1323 if (irq <= 0)
1324 return -ENODEV;
1325
1326 ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1327 if (ret) {
1328 drm_err(&ptdev->base, "failed to request job irq");
1329 return ret;
1330 }
1331
1332 ret = panthor_gpu_l2_power_on(ptdev);
1333 if (ret)
1334 return ret;
1335
1336 fw->vm = panthor_vm_create(ptdev, true,
1337 0, SZ_4G,
1338 CSF_MCU_SHARED_REGION_START,
1339 CSF_MCU_SHARED_REGION_SIZE);
1340 if (IS_ERR(fw->vm)) {
1341 ret = PTR_ERR(fw->vm);
1342 fw->vm = NULL;
1343 goto err_unplug_fw;
1344 }
1345
1346 ret = panthor_fw_load(ptdev);
1347 if (ret)
1348 goto err_unplug_fw;
1349
1350 ret = panthor_vm_active(fw->vm);
1351 if (ret)
1352 goto err_unplug_fw;
1353
1354 ret = panthor_fw_start(ptdev);
1355 if (ret)
1356 goto err_unplug_fw;
1357
1358 ret = panthor_fw_init_ifaces(ptdev);
1359 if (ret)
1360 goto err_unplug_fw;
1361
1362 panthor_fw_init_global_iface(ptdev);
1363 return 0;
1364
1365 err_unplug_fw:
1366 panthor_fw_unplug(ptdev);
1367 return ret;
1368 }
1369
1370 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1371