1 /*
2  * Copyright (C) 2019-2020 Collabora, Ltd.
3  * Copyright (C) 2019 Alyssa Rosenzweig
4  * Copyright (C) 2014-2017 Broadcom
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  */
26 
27 #include <assert.h>
28 
29 #include "drm-uapi/panfrost_drm.h"
30 
31 #include "pan_bo.h"
32 #include "pan_context.h"
33 #include "util/hash_table.h"
34 #include "util/ralloc.h"
35 #include "util/format/u_format.h"
36 #include "util/u_pack_color.h"
37 #include "util/rounding.h"
38 #include "util/u_framebuffer.h"
39 #include "pan_util.h"
40 #include "decode.h"
41 #include "panfrost-quirks.h"
42 
43 #define foreach_batch(ctx, idx) \
44         BITSET_FOREACH_SET(idx, ctx->batches.active, PAN_MAX_BATCHES)
45 
46 static unsigned
panfrost_batch_idx(struct panfrost_batch * batch)47 panfrost_batch_idx(struct panfrost_batch *batch)
48 {
49         return batch - batch->ctx->batches.slots;
50 }
51 
52 /* Adds the BO backing surface to a batch if the surface is non-null */
53 
54 static void
panfrost_batch_add_surface(struct panfrost_batch * batch,struct pipe_surface * surf)55 panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf)
56 {
57         if (surf) {
58                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
59                 panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
60         }
61 }
62 
63 static void
panfrost_batch_init(struct panfrost_context * ctx,const struct pipe_framebuffer_state * key,struct panfrost_batch * batch)64 panfrost_batch_init(struct panfrost_context *ctx,
65                     const struct pipe_framebuffer_state *key,
66                     struct panfrost_batch *batch)
67 {
68         struct pipe_screen *pscreen = ctx->base.screen;
69         struct panfrost_screen *screen = pan_screen(pscreen);
70         struct panfrost_device *dev = &screen->dev;
71 
72         batch->ctx = ctx;
73 
74         batch->seqnum = ++ctx->batches.seqnum;
75 
76         batch->first_bo = INT32_MAX;
77         batch->last_bo = INT32_MIN;
78         util_sparse_array_init(&batch->bos, sizeof(uint32_t), 64);
79 
80         batch->minx = batch->miny = ~0;
81         batch->maxx = batch->maxy = 0;
82 
83         util_copy_framebuffer_state(&batch->key, key);
84         batch->resources =_mesa_set_create(NULL, _mesa_hash_pointer,
85                                           _mesa_key_pointer_equal);
86 
87         /* Preallocate the main pool, since every batch has at least one job
88          * structure so it will be used */
89         panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true, true);
90 
91         /* Don't preallocate the invisible pool, since not every batch will use
92          * the pre-allocation, particularly if the varyings are larger than the
93          * preallocation and a reallocation is needed after anyway. */
94         panfrost_pool_init(&batch->invisible_pool, NULL, dev,
95                         PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
96 
97         for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
98                 panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
99 
100         panfrost_batch_add_surface(batch, batch->key.zsbuf);
101 
102         screen->vtbl.init_batch(batch);
103 }
104 
105 static void
panfrost_batch_cleanup(struct panfrost_context * ctx,struct panfrost_batch * batch)106 panfrost_batch_cleanup(struct panfrost_context *ctx, struct panfrost_batch *batch)
107 {
108         struct panfrost_device *dev = pan_device(ctx->base.screen);
109 
110         assert(batch->seqnum);
111 
112         if (ctx->batch == batch)
113                 ctx->batch = NULL;
114 
115         unsigned batch_idx = panfrost_batch_idx(batch);
116 
117         for (int i = batch->first_bo; i <= batch->last_bo; i++) {
118                 uint32_t *flags = util_sparse_array_get(&batch->bos, i);
119 
120                 if (!*flags)
121                         continue;
122 
123                 struct panfrost_bo *bo = pan_lookup_bo(dev, i);
124                 panfrost_bo_unreference(bo);
125         }
126 
127         set_foreach_remove(batch->resources, entry) {
128                 struct panfrost_resource *rsrc = (void *) entry->key;
129 
130                 if (_mesa_hash_table_search(ctx->writers, rsrc)) {
131                         _mesa_hash_table_remove_key(ctx->writers, rsrc);
132                         rsrc->track.nr_writers--;
133                 }
134 
135                 rsrc->track.nr_users--;
136 
137                 pipe_resource_reference((struct pipe_resource **) &rsrc, NULL);
138         }
139 
140         _mesa_set_destroy(batch->resources, NULL);
141         panfrost_pool_cleanup(&batch->pool);
142         panfrost_pool_cleanup(&batch->invisible_pool);
143 
144         util_unreference_framebuffer_state(&batch->key);
145 
146         util_sparse_array_finish(&batch->bos);
147 
148         memset(batch, 0, sizeof(*batch));
149         BITSET_CLEAR(ctx->batches.active, batch_idx);
150 }
151 
152 static void
153 panfrost_batch_submit(struct panfrost_context *ctx,
154                       struct panfrost_batch *batch,
155                       uint32_t in_sync, uint32_t out_sync);
156 
157 static struct panfrost_batch *
panfrost_get_batch(struct panfrost_context * ctx,const struct pipe_framebuffer_state * key)158 panfrost_get_batch(struct panfrost_context *ctx,
159                    const struct pipe_framebuffer_state *key)
160 {
161         struct panfrost_batch *batch = NULL;
162 
163         for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
164                 if (ctx->batches.slots[i].seqnum &&
165                     util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
166                         /* We found a match, increase the seqnum for the LRU
167                          * eviction logic.
168                          */
169                         ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
170                         return &ctx->batches.slots[i];
171                 }
172 
173                 if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
174                         batch = &ctx->batches.slots[i];
175         }
176 
177         assert(batch);
178 
179         /* The selected slot is used, we need to flush the batch */
180         if (batch->seqnum)
181                 panfrost_batch_submit(ctx, batch, 0, 0);
182 
183         panfrost_batch_init(ctx, key, batch);
184 
185         unsigned batch_idx = panfrost_batch_idx(batch);
186         BITSET_SET(ctx->batches.active, batch_idx);
187 
188         return batch;
189 }
190 
191 /* Get the job corresponding to the FBO we're currently rendering into */
192 
193 struct panfrost_batch *
panfrost_get_batch_for_fbo(struct panfrost_context * ctx)194 panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
195 {
196         /* If we already began rendering, use that */
197 
198         if (ctx->batch) {
199                 assert(util_framebuffer_state_equal(&ctx->batch->key,
200                                                     &ctx->pipe_framebuffer));
201                 return ctx->batch;
202         }
203 
204         /* If not, look up the job */
205         struct panfrost_batch *batch = panfrost_get_batch(ctx,
206                                                           &ctx->pipe_framebuffer);
207 
208         /* Set this job as the current FBO job. Will be reset when updating the
209          * FB state and when submitting or releasing a job.
210          */
211         ctx->batch = batch;
212         panfrost_dirty_state_all(ctx);
213         return batch;
214 }
215 
216 struct panfrost_batch *
panfrost_get_fresh_batch_for_fbo(struct panfrost_context * ctx,const char * reason)217 panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason)
218 {
219         struct panfrost_batch *batch;
220 
221         batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
222         panfrost_dirty_state_all(ctx);
223 
224         /* We only need to submit and get a fresh batch if there is no
225          * draw/clear queued. Otherwise we may reuse the batch. */
226 
227         if (batch->scoreboard.first_job) {
228                 perf_debug_ctx(ctx, "Flushing the current FBO due to: %s", reason);
229                 panfrost_batch_submit(ctx, batch, 0, 0);
230                 batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
231         }
232 
233         ctx->batch = batch;
234         return batch;
235 }
236 
237 static void
panfrost_batch_update_access(struct panfrost_batch * batch,struct panfrost_resource * rsrc,bool writes)238 panfrost_batch_update_access(struct panfrost_batch *batch,
239                              struct panfrost_resource *rsrc, bool writes)
240 {
241         struct panfrost_context *ctx = batch->ctx;
242         uint32_t batch_idx = panfrost_batch_idx(batch);
243         struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
244         struct panfrost_batch *writer = entry ? entry->data : NULL;
245         bool found = false;
246 
247         _mesa_set_search_or_add(batch->resources, rsrc, &found);
248 
249         if (!found) {
250                 /* Cache number of batches accessing a resource */
251                 rsrc->track.nr_users++;
252 
253                 /* Reference the resource on the batch */
254                 pipe_reference(NULL, &rsrc->base.reference);
255         }
256 
257         /* Flush users if required */
258         if (writes || ((writer != NULL) && (writer != batch))) {
259                 unsigned i;
260                 foreach_batch(ctx, i) {
261                         struct panfrost_batch *batch = &ctx->batches.slots[i];
262 
263                         /* Skip the entry if this our batch. */
264                         if (i == batch_idx)
265                                 continue;
266 
267                         /* Submit if it's a user */
268                         if (_mesa_set_search(batch->resources, rsrc))
269                                 panfrost_batch_submit(ctx, batch, 0, 0);
270                 }
271         }
272 
273         if (writes) {
274                 _mesa_hash_table_insert(ctx->writers, rsrc, batch);
275                 rsrc->track.nr_writers++;
276         }
277 }
278 
279 static void
panfrost_batch_add_bo_old(struct panfrost_batch * batch,struct panfrost_bo * bo,uint32_t flags)280 panfrost_batch_add_bo_old(struct panfrost_batch *batch,
281                 struct panfrost_bo *bo, uint32_t flags)
282 {
283         if (!bo)
284                 return;
285 
286         uint32_t *entry = util_sparse_array_get(&batch->bos, bo->gem_handle);
287         uint32_t old_flags = *entry;
288 
289         if (!old_flags) {
290                 batch->num_bos++;
291                 batch->first_bo = MIN2(batch->first_bo, bo->gem_handle);
292                 batch->last_bo = MAX2(batch->last_bo, bo->gem_handle);
293                 panfrost_bo_reference(bo);
294         }
295 
296         if (old_flags == flags)
297                 return;
298 
299         flags |= old_flags;
300         *entry = flags;
301 }
302 
303 static uint32_t
panfrost_access_for_stage(enum pipe_shader_type stage)304 panfrost_access_for_stage(enum pipe_shader_type stage)
305 {
306         return (stage == PIPE_SHADER_FRAGMENT) ?
307                 PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER;
308 }
309 
310 void
panfrost_batch_add_bo(struct panfrost_batch * batch,struct panfrost_bo * bo,enum pipe_shader_type stage)311 panfrost_batch_add_bo(struct panfrost_batch *batch,
312                 struct panfrost_bo *bo, enum pipe_shader_type stage)
313 {
314         panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ |
315                         panfrost_access_for_stage(stage));
316 }
317 
318 void
panfrost_batch_read_rsrc(struct panfrost_batch * batch,struct panfrost_resource * rsrc,enum pipe_shader_type stage)319 panfrost_batch_read_rsrc(struct panfrost_batch *batch,
320                          struct panfrost_resource *rsrc,
321                          enum pipe_shader_type stage)
322 {
323         uint32_t access = PAN_BO_ACCESS_READ |
324                 panfrost_access_for_stage(stage);
325 
326         panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
327 
328         if (rsrc->image.crc.bo)
329                 panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
330 
331         if (rsrc->separate_stencil)
332                 panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
333 
334         panfrost_batch_update_access(batch, rsrc, false);
335 }
336 
337 void
panfrost_batch_write_rsrc(struct panfrost_batch * batch,struct panfrost_resource * rsrc,enum pipe_shader_type stage)338 panfrost_batch_write_rsrc(struct panfrost_batch *batch,
339                          struct panfrost_resource *rsrc,
340                          enum pipe_shader_type stage)
341 {
342         uint32_t access = PAN_BO_ACCESS_WRITE |
343                 panfrost_access_for_stage(stage);
344 
345         panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
346 
347         if (rsrc->image.crc.bo)
348                 panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
349 
350         if (rsrc->separate_stencil)
351                 panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
352 
353         panfrost_batch_update_access(batch, rsrc, true);
354 }
355 
356 struct panfrost_bo *
panfrost_batch_create_bo(struct panfrost_batch * batch,size_t size,uint32_t create_flags,enum pipe_shader_type stage,const char * label)357 panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
358                          uint32_t create_flags, enum pipe_shader_type stage,
359                          const char *label)
360 {
361         struct panfrost_bo *bo;
362 
363         bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
364                                 create_flags, label);
365         panfrost_batch_add_bo(batch, bo, stage);
366 
367         /* panfrost_batch_add_bo() has retained a reference and
368          * panfrost_bo_create() initialize the refcnt to 1, so let's
369          * unreference the BO here so it gets released when the batch is
370          * destroyed (unless it's retained by someone else in the meantime).
371          */
372         panfrost_bo_unreference(bo);
373         return bo;
374 }
375 
376 struct panfrost_bo *
panfrost_batch_get_scratchpad(struct panfrost_batch * batch,unsigned size_per_thread,unsigned thread_tls_alloc,unsigned core_count)377 panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
378                 unsigned size_per_thread,
379                 unsigned thread_tls_alloc,
380                 unsigned core_count)
381 {
382         unsigned size = panfrost_get_total_stack_size(size_per_thread,
383                         thread_tls_alloc,
384                         core_count);
385 
386         if (batch->scratchpad) {
387                 assert(batch->scratchpad->size >= size);
388         } else {
389                 batch->scratchpad = panfrost_batch_create_bo(batch, size,
390                                              PAN_BO_INVISIBLE,
391                                              PIPE_SHADER_VERTEX,
392                                              "Thread local storage");
393 
394                 panfrost_batch_add_bo(batch, batch->scratchpad,
395                                 PIPE_SHADER_FRAGMENT);
396         }
397 
398         return batch->scratchpad;
399 }
400 
401 struct panfrost_bo *
panfrost_batch_get_shared_memory(struct panfrost_batch * batch,unsigned size,unsigned workgroup_count)402 panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
403                 unsigned size,
404                 unsigned workgroup_count)
405 {
406         if (batch->shared_memory) {
407                 assert(batch->shared_memory->size >= size);
408         } else {
409                 batch->shared_memory = panfrost_batch_create_bo(batch, size,
410                                              PAN_BO_INVISIBLE,
411                                              PIPE_SHADER_VERTEX,
412                                              "Workgroup shared memory");
413         }
414 
415         return batch->shared_memory;
416 }
417 
418 static void
panfrost_batch_to_fb_info(const struct panfrost_batch * batch,struct pan_fb_info * fb,struct pan_image_view * rts,struct pan_image_view * zs,struct pan_image_view * s,bool reserve)419 panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
420                           struct pan_fb_info *fb,
421                           struct pan_image_view *rts,
422                           struct pan_image_view *zs,
423                           struct pan_image_view *s,
424                           bool reserve)
425 {
426         memset(fb, 0, sizeof(*fb));
427         memset(rts, 0, sizeof(*rts) * 8);
428         memset(zs, 0, sizeof(*zs));
429         memset(s, 0, sizeof(*s));
430 
431         fb->width = batch->key.width;
432         fb->height = batch->key.height;
433         fb->extent.minx = batch->minx;
434         fb->extent.miny = batch->miny;
435         fb->extent.maxx = batch->maxx - 1;
436         fb->extent.maxy = batch->maxy - 1;
437         fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
438         fb->rt_count = batch->key.nr_cbufs;
439 
440         static const unsigned char id_swz[] = {
441                 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
442         };
443 
444         for (unsigned i = 0; i < fb->rt_count; i++) {
445                 struct pipe_surface *surf = batch->key.cbufs[i];
446 
447                 if (!surf)
448                         continue;
449 
450                 struct panfrost_resource *prsrc = pan_resource(surf->texture);
451                 unsigned mask = PIPE_CLEAR_COLOR0 << i;
452 
453                 if (batch->clear & mask) {
454                         fb->rts[i].clear = true;
455                         memcpy(fb->rts[i].clear_value, batch->clear_color[i],
456                                sizeof((fb->rts[i].clear_value)));
457                 }
458 
459                 fb->rts[i].discard = !reserve && !(batch->resolve & mask);
460 
461                 rts[i].format = surf->format;
462                 rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
463                 rts[i].last_level = rts[i].first_level = surf->u.tex.level;
464                 rts[i].first_layer = surf->u.tex.first_layer;
465                 rts[i].last_layer = surf->u.tex.last_layer;
466                 rts[i].image = &prsrc->image;
467                 rts[i].nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
468                 memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
469                 fb->rts[i].crc_valid = &prsrc->valid.crc;
470                 fb->rts[i].view = &rts[i];
471 
472                 /* Preload if the RT is read or updated */
473                 if (!(batch->clear & mask) &&
474                     ((batch->read & mask) ||
475                      ((batch->draws & mask) &&
476                       BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
477                         fb->rts[i].preload = true;
478 
479         }
480 
481         const struct pan_image_view *s_view = NULL, *z_view = NULL;
482         struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
483 
484         if (batch->key.zsbuf) {
485                 struct pipe_surface *surf = batch->key.zsbuf;
486                 z_rsrc = pan_resource(surf->texture);
487 
488                 zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?
489                              PIPE_FORMAT_Z32_FLOAT : surf->format;
490                 zs->dim = MALI_TEXTURE_DIMENSION_2D;
491                 zs->last_level = zs->first_level = surf->u.tex.level;
492                 zs->first_layer = surf->u.tex.first_layer;
493                 zs->last_layer = surf->u.tex.last_layer;
494                 zs->image = &z_rsrc->image;
495                 zs->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
496                 memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
497                 fb->zs.view.zs = zs;
498                 z_view = zs;
499                 if (util_format_is_depth_and_stencil(zs->format)) {
500                         s_view = zs;
501                         s_rsrc = z_rsrc;
502                 }
503 
504                 if (z_rsrc->separate_stencil) {
505                         s_rsrc = z_rsrc->separate_stencil;
506                         s->format = PIPE_FORMAT_S8_UINT;
507                         s->dim = MALI_TEXTURE_DIMENSION_2D;
508                         s->last_level = s->first_level = surf->u.tex.level;
509                         s->first_layer = surf->u.tex.first_layer;
510                         s->last_layer = surf->u.tex.last_layer;
511                         s->image = &s_rsrc->image;
512                         s->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
513                         memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
514                         fb->zs.view.s = s;
515                         s_view = s;
516                 }
517         }
518 
519         if (batch->clear & PIPE_CLEAR_DEPTH) {
520                 fb->zs.clear.z = true;
521                 fb->zs.clear_value.depth = batch->clear_depth;
522         }
523 
524         if (batch->clear & PIPE_CLEAR_STENCIL) {
525                 fb->zs.clear.s = true;
526                 fb->zs.clear_value.stencil = batch->clear_stencil;
527         }
528 
529         fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
530         fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
531 
532         if (!fb->zs.clear.z &&
533             ((batch->read & PIPE_CLEAR_DEPTH) ||
534              ((batch->draws & PIPE_CLEAR_DEPTH) &&
535               z_rsrc && BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
536                 fb->zs.preload.z = true;
537 
538         if (!fb->zs.clear.s &&
539             ((batch->read & PIPE_CLEAR_STENCIL) ||
540              ((batch->draws & PIPE_CLEAR_STENCIL) &&
541               s_rsrc && BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
542                 fb->zs.preload.s = true;
543 
544         /* Preserve both component if we have a combined ZS view and
545          * one component needs to be preserved.
546          */
547         if (s_view == z_view && fb->zs.discard.z != fb->zs.discard.s) {
548                 bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
549 
550                 fb->zs.discard.z = false;
551                 fb->zs.discard.s = false;
552                 fb->zs.preload.z = !fb->zs.clear.z && valid;
553                 fb->zs.preload.s = !fb->zs.clear.s && valid;
554         }
555 }
556 
557 static int
panfrost_batch_submit_ioctl(struct panfrost_batch * batch,mali_ptr first_job_desc,uint32_t reqs,uint32_t in_sync,uint32_t out_sync)558 panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
559                             mali_ptr first_job_desc,
560                             uint32_t reqs,
561                             uint32_t in_sync,
562                             uint32_t out_sync)
563 {
564         struct panfrost_context *ctx = batch->ctx;
565         struct pipe_context *gallium = (struct pipe_context *) ctx;
566         struct panfrost_device *dev = pan_device(gallium->screen);
567         struct drm_panfrost_submit submit = {0,};
568         uint32_t *bo_handles;
569         int ret;
570 
571         /* If we trace, we always need a syncobj, so make one of our own if we
572          * weren't given one to use. Remember that we did so, so we can free it
573          * after we're done but preventing double-frees if we were given a
574          * syncobj */
575 
576         if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
577                 out_sync = ctx->syncobj;
578 
579         submit.out_sync = out_sync;
580         submit.jc = first_job_desc;
581         submit.requirements = reqs;
582         if (in_sync) {
583                 submit.in_syncs = (u64)(uintptr_t)(&in_sync);
584                 submit.in_sync_count = 1;
585         }
586 
587         bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
588                             panfrost_pool_num_bos(&batch->invisible_pool) +
589                             batch->num_bos + 2,
590                             sizeof(*bo_handles));
591         assert(bo_handles);
592 
593         for (int i = batch->first_bo; i <= batch->last_bo; i++) {
594                 uint32_t *flags = util_sparse_array_get(&batch->bos, i);
595 
596                 if (!*flags)
597                         continue;
598 
599                 assert(submit.bo_handle_count < batch->num_bos);
600                 bo_handles[submit.bo_handle_count++] = i;
601 
602                 /* Update the BO access flags so that panfrost_bo_wait() knows
603                  * about all pending accesses.
604                  * We only keep the READ/WRITE info since this is all the BO
605                  * wait logic cares about.
606                  * We also preserve existing flags as this batch might not
607                  * be the first one to access the BO.
608                  */
609                 struct panfrost_bo *bo = pan_lookup_bo(dev, i);
610 
611                 bo->gpu_access |= *flags & (PAN_BO_ACCESS_RW);
612         }
613 
614         panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);
615         submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
616         panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);
617         submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
618 
619         /* Add the tiler heap to the list of accessed BOs if the batch has at
620          * least one tiler job. Tiler heap is written by tiler jobs and read
621          * by fragment jobs (the polygon list is coming from this heap).
622          */
623         if (batch->scoreboard.first_tiler)
624                 bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
625 
626         /* Always used on Bifrost, occassionally used on Midgard */
627         bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
628 
629         submit.bo_handles = (u64) (uintptr_t) bo_handles;
630         if (ctx->is_noop)
631                 ret = 0;
632         else
633                 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
634         free(bo_handles);
635 
636         if (ret)
637                 return errno;
638 
639         /* Trace the job if we're doing that */
640         if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
641                 /* Wait so we can get errors reported back */
642                 drmSyncobjWait(dev->fd, &out_sync, 1,
643                                INT64_MAX, 0, NULL);
644 
645                 if (dev->debug & PAN_DBG_TRACE)
646                         pandecode_jc(submit.jc, dev->gpu_id);
647 
648                 /* Jobs won't be complete if blackhole rendering, that's ok */
649                 if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
650                         pandecode_abort_on_fault(submit.jc, dev->gpu_id);
651         }
652 
653         return 0;
654 }
655 
656 /* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
657  * outsync corresponding to the later of the two (since there will be an
658  * implicit dep between them) */
659 
660 static int
panfrost_batch_submit_jobs(struct panfrost_batch * batch,const struct pan_fb_info * fb,uint32_t in_sync,uint32_t out_sync)661 panfrost_batch_submit_jobs(struct panfrost_batch *batch,
662                            const struct pan_fb_info *fb,
663                            uint32_t in_sync, uint32_t out_sync)
664 {
665         struct pipe_screen *pscreen = batch->ctx->base.screen;
666         struct panfrost_screen *screen = pan_screen(pscreen);
667         struct panfrost_device *dev = pan_device(pscreen);
668         bool has_draws = batch->scoreboard.first_job;
669         bool has_tiler = batch->scoreboard.first_tiler;
670         bool has_frag = has_tiler || batch->clear;
671         int ret = 0;
672 
673         /* Take the submit lock to make sure no tiler jobs from other context
674          * are inserted between our tiler and fragment jobs, failing to do that
675          * might result in tiler heap corruption.
676          */
677         if (has_tiler)
678                 pthread_mutex_lock(&dev->submit_lock);
679 
680         if (has_draws) {
681                 ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
682                                                   0, in_sync, has_frag ? 0 : out_sync);
683 
684                 if (ret)
685                         goto done;
686         }
687 
688         if (has_frag) {
689                 /* Whether we program the fragment job for draws or not depends
690                  * on whether there is any *tiler* activity (so fragment
691                  * shaders). If there are draws but entirely RASTERIZER_DISCARD
692                  * (say, for transform feedback), we want a fragment job that
693                  * *only* clears, since otherwise the tiler structures will be
694                  * uninitialized leading to faults (or state leaks) */
695 
696                 mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);
697                 ret = panfrost_batch_submit_ioctl(batch, fragjob,
698                                                   PANFROST_JD_REQ_FS, 0,
699                                                   out_sync);
700                 if (ret)
701                         goto done;
702         }
703 
704 done:
705         if (has_tiler)
706                 pthread_mutex_unlock(&dev->submit_lock);
707 
708         return ret;
709 }
710 
711 static void
panfrost_emit_tile_map(struct panfrost_batch * batch,struct pan_fb_info * fb)712 panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
713 {
714         if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
715                 return;
716 
717         struct pipe_surface *surf = batch->key.cbufs[0];
718         struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
719 
720         if (pres && pres->damage.tile_map.enable) {
721                 fb->tile_map.base =
722                         pan_pool_upload_aligned(&batch->pool.base,
723                                                 pres->damage.tile_map.data,
724                                                 pres->damage.tile_map.size,
725                                                 64);
726                 fb->tile_map.stride = pres->damage.tile_map.stride;
727         }
728 }
729 
730 static void
panfrost_batch_submit(struct panfrost_context * ctx,struct panfrost_batch * batch,uint32_t in_sync,uint32_t out_sync)731 panfrost_batch_submit(struct panfrost_context *ctx,
732                       struct panfrost_batch *batch,
733                       uint32_t in_sync, uint32_t out_sync)
734 {
735         struct pipe_screen *pscreen = ctx->base.screen;
736         struct panfrost_screen *screen = pan_screen(pscreen);
737         int ret;
738 
739         /* Nothing to do! */
740         if (!batch->scoreboard.first_job && !batch->clear)
741                 goto out;
742 
743         struct pan_fb_info fb;
744         struct pan_image_view rts[8], zs, s;
745 
746         panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
747 
748         screen->vtbl.preload(batch, &fb);
749         screen->vtbl.init_polygon_list(batch);
750 
751         /* Now that all draws are in, we can finally prepare the
752          * FBD for the batch (if there is one). */
753 
754         screen->vtbl.emit_tls(batch);
755         panfrost_emit_tile_map(batch, &fb);
756 
757         if (batch->scoreboard.first_tiler || batch->clear)
758                 screen->vtbl.emit_fbd(batch, &fb);
759 
760         ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync);
761 
762         if (ret)
763                 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
764 
765         /* We must reset the damage info of our render targets here even
766          * though a damage reset normally happens when the DRI layer swaps
767          * buffers. That's because there can be implicit flushes the GL
768          * app is not aware of, and those might impact the damage region: if
769          * part of the damaged portion is drawn during those implicit flushes,
770          * you have to reload those areas before next draws are pushed, and
771          * since the driver can't easily know what's been modified by the draws
772          * it flushed, the easiest solution is to reload everything.
773          */
774         for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
775                 if (!batch->key.cbufs[i])
776                         continue;
777 
778                 panfrost_resource_set_damage_region(ctx->base.screen,
779                                                     batch->key.cbufs[i]->texture,
780                                                     0, NULL);
781         }
782 
783 out:
784         panfrost_batch_cleanup(ctx, batch);
785 }
786 
787 /* Submit all batches, applying the out_sync to the currently bound batch */
788 
789 void
panfrost_flush_all_batches(struct panfrost_context * ctx,const char * reason)790 panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason)
791 {
792         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
793         panfrost_batch_submit(ctx, batch, ctx->syncobj, ctx->syncobj);
794 
795         for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
796                 if (ctx->batches.slots[i].seqnum) {
797                         if (reason)
798                                 perf_debug_ctx(ctx, "Flushing everything due to: %s", reason);
799 
800                         panfrost_batch_submit(ctx, &ctx->batches.slots[i],
801                                               ctx->syncobj, ctx->syncobj);
802                 }
803         }
804 }
805 
806 void
panfrost_flush_writer(struct panfrost_context * ctx,struct panfrost_resource * rsrc,const char * reason)807 panfrost_flush_writer(struct panfrost_context *ctx,
808                       struct panfrost_resource *rsrc,
809                       const char *reason)
810 {
811         struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
812 
813         if (entry) {
814                 perf_debug_ctx(ctx, "Flushing writer due to: %s", reason);
815                 panfrost_batch_submit(ctx, entry->data, ctx->syncobj, ctx->syncobj);
816         }
817 }
818 
819 void
panfrost_flush_batches_accessing_rsrc(struct panfrost_context * ctx,struct panfrost_resource * rsrc,const char * reason)820 panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
821                                       struct panfrost_resource *rsrc,
822                                       const char *reason)
823 {
824         unsigned i;
825         foreach_batch(ctx, i) {
826                 struct panfrost_batch *batch = &ctx->batches.slots[i];
827 
828                 if (!_mesa_set_search(batch->resources, rsrc))
829                         continue;
830 
831                 perf_debug_ctx(ctx, "Flushing user due to: %s", reason);
832                 panfrost_batch_submit(ctx, batch, ctx->syncobj, ctx->syncobj);
833         }
834 }
835 
836 void
panfrost_batch_adjust_stack_size(struct panfrost_batch * batch)837 panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
838 {
839         struct panfrost_context *ctx = batch->ctx;
840 
841         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
842                 struct panfrost_shader_state *ss;
843 
844                 ss = panfrost_get_shader_state(ctx, i);
845                 if (!ss)
846                         continue;
847 
848                 batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
849         }
850 }
851 
852 void
panfrost_batch_clear(struct panfrost_batch * batch,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)853 panfrost_batch_clear(struct panfrost_batch *batch,
854                      unsigned buffers,
855                      const union pipe_color_union *color,
856                      double depth, unsigned stencil)
857 {
858         struct panfrost_context *ctx = batch->ctx;
859 
860         if (buffers & PIPE_CLEAR_COLOR) {
861                 for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
862                         if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
863                                 continue;
864 
865                         enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
866                         pan_pack_color(batch->clear_color[i], color, format, false);
867                 }
868         }
869 
870         if (buffers & PIPE_CLEAR_DEPTH) {
871                 batch->clear_depth = depth;
872         }
873 
874         if (buffers & PIPE_CLEAR_STENCIL) {
875                 batch->clear_stencil = stencil;
876         }
877 
878         batch->clear |= buffers;
879         batch->resolve |= buffers;
880 
881         /* Clearing affects the entire framebuffer (by definition -- this is
882          * the Gallium clear callback, which clears the whole framebuffer. If
883          * the scissor test were enabled from the GL side, the gallium frontend
884          * would emit a quad instead and we wouldn't go down this code path) */
885 
886         panfrost_batch_union_scissor(batch, 0, 0,
887                                      ctx->pipe_framebuffer.width,
888                                      ctx->pipe_framebuffer.height);
889 }
890 
891 /* Given a new bounding rectangle (scissor), let the job cover the union of the
892  * new and old bounding rectangles */
893 
894 void
panfrost_batch_union_scissor(struct panfrost_batch * batch,unsigned minx,unsigned miny,unsigned maxx,unsigned maxy)895 panfrost_batch_union_scissor(struct panfrost_batch *batch,
896                              unsigned minx, unsigned miny,
897                              unsigned maxx, unsigned maxy)
898 {
899         batch->minx = MIN2(batch->minx, minx);
900         batch->miny = MIN2(batch->miny, miny);
901         batch->maxx = MAX2(batch->maxx, maxx);
902         batch->maxy = MAX2(batch->maxy, maxy);
903 }
904