1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /* @file
25  *
26  * v3d driver code interacting v3dv3 simulator/fpga library.
27  *
28  * This is compiled per V3D version we support, since the register definitions
29  * conflict.
30  */
31 
32 #include <errno.h>
33 #include <stdbool.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <sys/mman.h>
37 #include "util/macros.h"
38 #include "util/u_mm.h"
39 #include "broadcom/common/v3d_macros.h"
40 #include "v3d_simulator_wrapper.h"
41 #include "drm-shim/drm_shim.h"
42 #include "drm-uapi/v3d_drm.h"
43 #include "v3d.h"
44 
45 #define HW_REGISTER_RO(x) (x)
46 #define HW_REGISTER_RW(x) (x)
47 #if V3D_VERSION >= 41
48 #include "libs/core/v3d/registers/4.1.34.0/v3d.h"
49 #else
50 #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
51 #endif
52 
53 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d.hw, reg, val)
54 #define V3D_READ(reg) v3d_hw_read_reg(v3d.hw, reg)
55 
56 static void
v3d_flush_l3()57 v3d_flush_l3()
58 {
59         if (!v3d_hw_has_gca(v3d.hw))
60                 return;
61 
62 #if V3D_VERSION < 40
63         uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
64 
65         V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
66         V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
67 #endif
68 }
69 
70 /* Invalidates the L2 cache.  This is a read-only cache. */
71 static void
v3d_flush_l2(void)72 v3d_flush_l2(void)
73 {
74         V3D_WRITE(V3D_CTL_0_L2CACTL,
75                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
76                   V3D_CTL_0_L2CACTL_L2CENA_SET);
77 }
78 
79 /* Invalidates texture L2 cachelines */
80 static void
v3d_flush_l2t(void)81 v3d_flush_l2t(void)
82 {
83         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
84         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
85         V3D_WRITE(V3D_CTL_0_L2TCACTL,
86                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
87                   (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
88 }
89 
90 /* Invalidates the slice caches.  These are read-only caches. */
91 static void
v3d_flush_slices(void)92 v3d_flush_slices(void)
93 {
94         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
95 }
96 
97 static void
v3d_flush_caches(void)98 v3d_flush_caches(void)
99 {
100         v3d_flush_l3();
101         v3d_flush_l2();
102         v3d_flush_l2t();
103         v3d_flush_slices();
104 }
105 
106 static void
v3d_simulator_copy_in_handle(struct shim_fd * shim_fd,int handle)107 v3d_simulator_copy_in_handle(struct shim_fd *shim_fd, int handle)
108 {
109         if (!handle)
110                 return;
111 
112         struct v3d_bo *bo = v3d_bo_lookup(shim_fd, handle);
113 
114         memcpy(bo->sim_vaddr, bo->gem_vaddr, bo->base.size);
115 }
116 
117 static void
v3d_simulator_copy_out_handle(struct shim_fd * shim_fd,int handle)118 v3d_simulator_copy_out_handle(struct shim_fd *shim_fd, int handle)
119 {
120         if (!handle)
121                 return;
122 
123         struct v3d_bo *bo = v3d_bo_lookup(shim_fd, handle);
124 
125         memcpy(bo->gem_vaddr, bo->sim_vaddr, bo->base.size);
126 }
127 
128 static int
v3dX(v3d_ioctl_submit_cl)129 v3dX(v3d_ioctl_submit_cl)(int fd, unsigned long request, void *arg)
130 {
131         struct shim_fd *shim_fd = drm_shim_fd_lookup(fd);
132         struct drm_v3d_submit_cl *submit = arg;
133         uint32_t *bo_handles = (uint32_t *)(uintptr_t)submit->bo_handles;
134 
135         for (int i = 0; i < submit->bo_handle_count; i++)
136                 v3d_simulator_copy_in_handle(shim_fd, bo_handles[i]);
137 
138         v3d_flush_caches();
139 
140         if (submit->qma) {
141                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
142                 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
143         }
144 #if V3D_VERSION >= 41
145         if (submit->qts) {
146                 V3D_WRITE(V3D_CLE_0_CT0QTS,
147                           V3D_CLE_0_CT0QTS_CTQTSEN_SET |
148                           submit->qts);
149         }
150 #endif
151 
152         fprintf(stderr, "submit %x..%x!\n", submit->bcl_start, submit->bcl_end);
153 
154         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
155         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
156 
157         /* Wait for bin to complete before firing render, as it seems the
158          * simulator doesn't implement the semaphores.
159          */
160         while (V3D_READ(V3D_CLE_0_CT0CA) !=
161                V3D_READ(V3D_CLE_0_CT0EA)) {
162                 v3d_hw_tick(v3d.hw);
163         }
164 
165         fprintf(stderr, "submit %x..%x!\n", submit->rcl_start, submit->rcl_end);
166 
167         v3d_flush_caches();
168 
169         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
170         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
171 
172         while (V3D_READ(V3D_CLE_0_CT1CA) !=
173                V3D_READ(V3D_CLE_0_CT1EA)) {
174                 v3d_hw_tick(v3d.hw);
175         }
176 
177         for (int i = 0; i < submit->bo_handle_count; i++)
178                 v3d_simulator_copy_out_handle(shim_fd, bo_handles[i]);
179 
180         return 0;
181 }
182 
183 static int
v3dX(v3d_ioctl_submit_tfu)184 v3dX(v3d_ioctl_submit_tfu)(int fd, unsigned long request, void *arg)
185 {
186         struct shim_fd *shim_fd = drm_shim_fd_lookup(fd);
187         struct drm_v3d_submit_tfu *submit = arg;
188 
189         v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[0]);
190         v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[1]);
191         v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[2]);
192         v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[3]);
193 
194         int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
195 
196         V3D_WRITE(V3D_TFU_IIA, submit->iia);
197         V3D_WRITE(V3D_TFU_IIS, submit->iis);
198         V3D_WRITE(V3D_TFU_ICA, submit->ica);
199         V3D_WRITE(V3D_TFU_IUA, submit->iua);
200         V3D_WRITE(V3D_TFU_IOA, submit->ioa);
201         V3D_WRITE(V3D_TFU_IOS, submit->ios);
202         V3D_WRITE(V3D_TFU_COEF0, submit->coef[0]);
203         V3D_WRITE(V3D_TFU_COEF1, submit->coef[1]);
204         V3D_WRITE(V3D_TFU_COEF2, submit->coef[2]);
205         V3D_WRITE(V3D_TFU_COEF3, submit->coef[3]);
206 
207         V3D_WRITE(V3D_TFU_ICFG, submit->icfg);
208 
209         while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
210                 v3d_hw_tick(v3d.hw);
211         }
212 
213         v3d_simulator_copy_out_handle(shim_fd, submit->bo_handles[0]);
214 
215         return 0;
216 }
217 
218 static int
v3dX(v3d_ioctl_create_bo)219 v3dX(v3d_ioctl_create_bo)(int fd, unsigned long request, void *arg)
220 {
221         struct shim_fd *shim_fd = drm_shim_fd_lookup(fd);
222         struct drm_v3d_create_bo *create = arg;
223         struct v3d_bo *bo = calloc(1, sizeof(*bo));
224 
225         drm_shim_bo_init(&bo->base, create->size);
226         bo->offset = util_vma_heap_alloc(&v3d.heap, create->size, 4096);
227         if (bo->offset == 0)
228                 return -ENOMEM;
229 
230         bo->sim_vaddr = v3d.mem + bo->offset - v3d.mem_base;
231 #if 0
232         /* Place a mapping of the BO inside of the simulator's address space
233          * for V3D memory.  This lets us avoid copy in/out for simpenrose, but
234          * I'm betting we'll need something else for FPGA.
235          */
236         void *sim_addr = v3d.mem + bo->block->ofs;
237         void *mmap_ret = mmap(sim_addr, create->size, PROT_READ | PROT_WRITE,
238                               MAP_SHARED | MAP_FIXED, bo->base.fd, 0);
239         assert(mmap_ret == sim_addr);
240 #else
241         /* Make a simulator-private mapping of the shim GEM object. */
242         bo->gem_vaddr = mmap(NULL, bo->base.size,
243                              PROT_READ | PROT_WRITE,
244                              MAP_SHARED,
245                              bo->base.fd, 0);
246         if (bo->gem_vaddr == MAP_FAILED) {
247                 fprintf(stderr, "v3d: mmap of shim bo failed\n");
248                 abort();
249         }
250 #endif
251 
252         create->offset = bo->offset;
253         create->handle = drm_shim_bo_get_handle(shim_fd, &bo->base);
254 
255         drm_shim_bo_put(&bo->base);
256 
257         return 0;
258 }
259 
260 static int
v3dX(v3d_ioctl_get_param)261 v3dX(v3d_ioctl_get_param)(int fd, unsigned long request, void *arg)
262 {
263         struct drm_v3d_get_param *gp = arg;
264         static const uint32_t reg_map[] = {
265                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
266                 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
267                 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
268                 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
269                 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
270                 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
271                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
272         };
273 
274         switch (gp->param) {
275         case DRM_V3D_PARAM_SUPPORTS_TFU:
276                 gp->value = 1;
277                 return 0;
278         }
279 
280         if (gp->param < ARRAY_SIZE(reg_map) && reg_map[gp->param]) {
281                 gp->value = V3D_READ(reg_map[gp->param]);
282                 return 0;
283         }
284 
285         fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM %d\n", gp->param);
286         return -1;
287 }
288 
289 static ioctl_fn_t driver_ioctls[] = {
290         [DRM_V3D_SUBMIT_CL] = v3dX(v3d_ioctl_submit_cl),
291         [DRM_V3D_SUBMIT_TFU] = v3dX(v3d_ioctl_submit_tfu),
292         [DRM_V3D_WAIT_BO] = v3d_ioctl_wait_bo,
293         [DRM_V3D_CREATE_BO] = v3dX(v3d_ioctl_create_bo),
294         [DRM_V3D_GET_PARAM] = v3dX(v3d_ioctl_get_param),
295         [DRM_V3D_MMAP_BO] = v3d_ioctl_mmap_bo,
296         [DRM_V3D_GET_BO_OFFSET] = v3d_ioctl_get_bo_offset,
297 };
298 
299 static void
v3d_isr(uint32_t hub_status)300 v3d_isr(uint32_t hub_status)
301 {
302         /* Check the per-core bits */
303         if (hub_status & (1 << 0)) {
304                 uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
305 
306                 if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
307                         fprintf(stderr, "GMP violation at 0x%08x\n",
308                                 V3D_READ(V3D_GMP_0_VIO_ADDR));
309                         abort();
310                 } else {
311                         fprintf(stderr,
312                                 "Unexpected ISR with core status 0x%08x\n",
313                                 core_status);
314                 }
315                 abort();
316         }
317 
318         return;
319 }
320 
321 static void
v3dX(simulator_init_regs)322 v3dX(simulator_init_regs)(void)
323 {
324 #if V3D_VERSION == 33
325         /* Set OVRTMUOUT to match kernel behavior.
326          *
327          * This means that the texture sampler uniform configuration's tmu
328          * output type field is used, instead of using the hardware default
329          * behavior based on the texture type.  If you want the default
330          * behavior, you can still put "2" in the indirect texture state's
331          * output_type field.
332          */
333         V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
334 #endif
335 
336         uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_GMPV_SET;
337         V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
338         V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
339 
340         v3d_hw_set_isr(v3d.hw, v3d_isr);
341 }
342 
343 static void
v3d_bo_free(struct shim_bo * shim_bo)344 v3d_bo_free(struct shim_bo *shim_bo)
345 {
346         struct v3d_bo *bo = v3d_bo(shim_bo);
347 
348         if (bo->gem_vaddr)
349                 munmap(bo->gem_vaddr, shim_bo->size);
350 
351         util_vma_heap_free(&v3d.heap, bo->offset, bo->base.size);
352 }
353 
354 void
v3dX(drm_shim_driver_init)355 v3dX(drm_shim_driver_init)(void)
356 {
357         shim_device.driver_ioctls = driver_ioctls;
358         shim_device.driver_ioctl_count = ARRAY_SIZE(driver_ioctls);
359 
360         shim_device.driver_bo_free = v3d_bo_free;
361 
362         /* Allocate a gig of memory to play in. */
363         v3d_hw_alloc_mem(v3d.hw, 1024 * 1024 * 1024);
364         v3d.mem_base =
365                 v3d_hw_get_mem(v3d.hw, &v3d.mem_size,
366                                &v3d.mem);
367         util_vma_heap_init(&v3d.heap, 4096, v3d.mem_size - 4096);
368 
369         v3dX(simulator_init_regs)();
370 }
371