1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  * Copyright 2018 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #include "drm-uapi/drm_fourcc.h"
27 #include "si_pipe.h"
28 #include "si_query.h"
29 #include "sid.h"
30 #include "frontend/drm_driver.h"
31 #include "util/format/u_format.h"
32 #include "util/os_time.h"
33 #include "util/u_log.h"
34 #include "util/u_memory.h"
35 #include "util/u_pack_color.h"
36 #include "util/u_resource.h"
37 #include "util/u_surface.h"
38 #include "util/u_transfer.h"
39 
40 #include <errno.h>
41 #include <inttypes.h>
42 
43 #include "amd/addrlib/inc/addrinterface.h"
44 
45 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
46                                               const struct pipe_resource *templ,
47                                               bool tc_compatible_htile);
48 
49 static bool si_texture_is_aux_plane(const struct pipe_resource *resource);
50 
51 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
si_copy_region_with_blit(struct pipe_context * pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)52 static void si_copy_region_with_blit(struct pipe_context *pipe, struct pipe_resource *dst,
53                                      unsigned dst_level, unsigned dstx, unsigned dsty,
54                                      unsigned dstz, struct pipe_resource *src, unsigned src_level,
55                                      const struct pipe_box *src_box)
56 {
57    struct pipe_blit_info blit;
58 
59    memset(&blit, 0, sizeof(blit));
60    blit.src.resource = src;
61    blit.src.format = src->format;
62    blit.src.level = src_level;
63    blit.src.box = *src_box;
64    blit.dst.resource = dst;
65    blit.dst.format = dst->format;
66    blit.dst.level = dst_level;
67    blit.dst.box.x = dstx;
68    blit.dst.box.y = dsty;
69    blit.dst.box.z = dstz;
70    blit.dst.box.width = src_box->width;
71    blit.dst.box.height = src_box->height;
72    blit.dst.box.depth = src_box->depth;
73    blit.mask = util_format_get_mask(dst->format);
74    blit.filter = PIPE_TEX_FILTER_NEAREST;
75 
76    if (blit.mask) {
77       pipe->blit(pipe, &blit);
78    }
79 }
80 
81 /* Copy from a full GPU texture to a transfer's staging one. */
si_copy_to_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)82 static void si_copy_to_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
83 {
84    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
85    struct pipe_resource *dst = &stransfer->staging->b.b;
86    struct pipe_resource *src = transfer->resource;
87 
88    if (src->nr_samples > 1 || ((struct si_texture *)src)->is_depth) {
89       si_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, src, transfer->level, &transfer->box);
90       return;
91    }
92 
93    si_resource_copy_region(ctx, dst, 0, 0, 0, 0, src, transfer->level, &transfer->box);
94 }
95 
96 /* Copy from a transfer's staging texture to a full GPU one. */
si_copy_from_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)97 static void si_copy_from_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
98 {
99    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
100    struct pipe_resource *dst = transfer->resource;
101    struct pipe_resource *src = &stransfer->staging->b.b;
102    struct pipe_box sbox;
103 
104    u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
105 
106    if (dst->nr_samples > 1 || ((struct si_texture *)dst)->is_depth) {
107       si_copy_region_with_blit(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
108                                transfer->box.z, src, 0, &sbox);
109       return;
110    }
111 
112    if (util_format_is_compressed(dst->format)) {
113       sbox.width = util_format_get_nblocksx(dst->format, sbox.width);
114       sbox.height = util_format_get_nblocksx(dst->format, sbox.height);
115    }
116 
117    si_resource_copy_region(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
118                            transfer->box.z, src, 0, &sbox);
119 }
120 
si_texture_get_offset(struct si_screen * sscreen,struct si_texture * tex,unsigned level,const struct pipe_box * box,unsigned * stride,unsigned * layer_stride)121 static unsigned si_texture_get_offset(struct si_screen *sscreen, struct si_texture *tex,
122                                       unsigned level, const struct pipe_box *box, unsigned *stride,
123                                       unsigned *layer_stride)
124 {
125    if (sscreen->info.chip_class >= GFX9) {
126       unsigned pitch;
127       if (tex->surface.is_linear) {
128          pitch = tex->surface.u.gfx9.pitch[level];
129       } else {
130          pitch = tex->surface.u.gfx9.surf_pitch;
131       }
132 
133       *stride = pitch * tex->surface.bpe;
134       *layer_stride = tex->surface.u.gfx9.surf_slice_size;
135 
136       if (!box)
137          return 0;
138 
139       /* Each texture is an array of slices. Each slice is an array
140        * of mipmap levels. */
141       return tex->surface.u.gfx9.surf_offset + box->z * tex->surface.u.gfx9.surf_slice_size +
142              tex->surface.u.gfx9.offset[level] +
143              (box->y / tex->surface.blk_h * pitch + box->x / tex->surface.blk_w) *
144              tex->surface.bpe;
145    } else {
146       *stride = tex->surface.u.legacy.level[level].nblk_x * tex->surface.bpe;
147       assert((uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
148       *layer_stride = (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4;
149 
150       if (!box)
151          return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256;
152 
153       /* Each texture is an array of mipmap levels. Each level is
154        * an array of slices. */
155       return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256 +
156              box->z * (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 +
157              (box->y / tex->surface.blk_h * tex->surface.u.legacy.level[level].nblk_x +
158               box->x / tex->surface.blk_w) *
159                 tex->surface.bpe;
160    }
161 }
162 
si_init_surface(struct si_screen * sscreen,struct radeon_surf * surface,const struct pipe_resource * ptex,enum radeon_surf_mode array_mode,uint64_t modifier,bool is_imported,bool is_scanout,bool is_flushed_depth,bool tc_compatible_htile)163 static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surface,
164                            const struct pipe_resource *ptex, enum radeon_surf_mode array_mode,
165                            uint64_t modifier, bool is_imported, bool is_scanout,
166                            bool is_flushed_depth, bool tc_compatible_htile)
167 {
168    const struct util_format_description *desc = util_format_description(ptex->format);
169    bool is_depth, is_stencil;
170    int r;
171    unsigned bpe;
172    uint64_t flags = 0;
173 
174    is_depth = util_format_has_depth(desc);
175    is_stencil = util_format_has_stencil(desc);
176 
177    if (!is_flushed_depth && ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
178       bpe = 4; /* stencil is allocated separately */
179    } else {
180       bpe = util_format_get_blocksize(ptex->format);
181       assert(util_is_power_of_two_or_zero(bpe));
182    }
183 
184    if (!is_flushed_depth && is_depth) {
185       flags |= RADEON_SURF_ZBUFFER;
186 
187       if ((sscreen->debug_flags & DBG(NO_HYPERZ)) ||
188           (ptex->bind & PIPE_BIND_SHARED) || is_imported) {
189          flags |= RADEON_SURF_NO_HTILE;
190       } else if (tc_compatible_htile &&
191                  (sscreen->info.chip_class >= GFX9 || array_mode == RADEON_SURF_MODE_2D)) {
192          /* TC-compatible HTILE only supports Z32_FLOAT.
193           * GFX9 also supports Z16_UNORM.
194           * On GFX8, promote Z16 to Z32. DB->CB copies will convert
195           * the format for transfers.
196           */
197          if (sscreen->info.chip_class == GFX8)
198             bpe = 4;
199 
200          flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
201       }
202 
203       if (is_stencil)
204          flags |= RADEON_SURF_SBUFFER;
205    }
206 
207    /* Disable DCC? */
208    if (sscreen->info.chip_class >= GFX8) {
209       /* Global options that disable DCC. */
210       if (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC)
211          flags |= RADEON_SURF_DISABLE_DCC;
212 
213       if (ptex->nr_samples >= 2 && sscreen->debug_flags & DBG(NO_DCC_MSAA))
214          flags |= RADEON_SURF_DISABLE_DCC;
215 
216       /* Shared textures must always set up DCC. If it's not present, it will be disabled by
217        * si_get_opaque_metadata later.
218        */
219       if (!is_imported &&
220           (sscreen->debug_flags & DBG(NO_DCC) ||
221            (ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC))))
222          flags |= RADEON_SURF_DISABLE_DCC;
223 
224       /* R9G9B9E5 isn't supported for rendering by older generations. */
225       if (sscreen->info.chip_class < GFX10_3 &&
226           ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
227          flags |= RADEON_SURF_DISABLE_DCC;
228 
229       switch (sscreen->info.chip_class) {
230       case GFX8:
231          /* Stoney: 128bpp MSAA textures randomly fail piglit tests with DCC. */
232          if (sscreen->info.family == CHIP_STONEY && bpe == 16 && ptex->nr_samples >= 2)
233             flags |= RADEON_SURF_DISABLE_DCC;
234 
235          /* DCC clear for 4x and 8x MSAA array textures unimplemented. */
236          if (ptex->nr_storage_samples >= 4 && ptex->array_size > 1)
237             flags |= RADEON_SURF_DISABLE_DCC;
238          break;
239 
240       case GFX9:
241          /* DCC MSAA fails this on Raven:
242           *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.2_samples.html
243           * and this on Picasso:
244           *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.4_samples.html
245           */
246          if (sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4)
247             flags |= RADEON_SURF_DISABLE_DCC;
248          break;
249 
250       case GFX10:
251       case GFX10_3:
252          /* DCC causes corruption with MSAA. */
253          if (ptex->nr_storage_samples >= 2)
254             flags |= RADEON_SURF_DISABLE_DCC;
255          break;
256 
257       default:
258          assert(0);
259       }
260    }
261 
262    if (is_scanout) {
263       /* This should catch bugs in gallium users setting incorrect flags. */
264       assert(ptex->nr_samples <= 1 && ptex->array_size == 1 && ptex->depth0 == 1 &&
265              ptex->last_level == 0 && !(flags & RADEON_SURF_Z_OR_SBUFFER));
266 
267       flags |= RADEON_SURF_SCANOUT;
268    }
269 
270    if (ptex->bind & PIPE_BIND_SHARED)
271       flags |= RADEON_SURF_SHAREABLE;
272    if (is_imported)
273       flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
274    if (sscreen->debug_flags & DBG(NO_FMASK))
275       flags |= RADEON_SURF_NO_FMASK;
276 
277    if (sscreen->info.chip_class == GFX9 && (ptex->flags & SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE)) {
278       flags |= RADEON_SURF_FORCE_MICRO_TILE_MODE;
279       surface->micro_tile_mode = SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(ptex->flags);
280    }
281 
282    if (ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING) {
283       flags |= RADEON_SURF_FORCE_SWIZZLE_MODE;
284 
285       if (sscreen->info.chip_class >= GFX10)
286          surface->u.gfx9.swizzle_mode = ADDR_SW_64KB_R_X;
287    }
288 
289    surface->modifier = modifier;
290 
291    r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe, array_mode, surface);
292    if (r) {
293       return r;
294    }
295 
296    return 0;
297 }
298 
si_eliminate_fast_color_clear(struct si_context * sctx,struct si_texture * tex,bool * ctx_flushed)299 void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex,
300                                    bool *ctx_flushed)
301 {
302    struct si_screen *sscreen = sctx->screen;
303    struct pipe_context *ctx = &sctx->b;
304 
305    if (ctx == sscreen->aux_context)
306       simple_mtx_lock(&sscreen->aux_context_lock);
307 
308    unsigned n = sctx->num_decompress_calls;
309    ctx->flush_resource(ctx, &tex->buffer.b.b);
310 
311    /* Flush only if any fast clear elimination took place. */
312    bool flushed = false;
313    if (n != sctx->num_decompress_calls)
314    {
315       ctx->flush(ctx, NULL, 0);
316       flushed = true;
317    }
318    if (ctx_flushed)
319       *ctx_flushed = flushed;
320 
321    if (ctx == sscreen->aux_context)
322       simple_mtx_unlock(&sscreen->aux_context_lock);
323 }
324 
si_texture_discard_cmask(struct si_screen * sscreen,struct si_texture * tex)325 void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex)
326 {
327    if (!tex->cmask_buffer)
328       return;
329 
330    assert(tex->buffer.b.b.nr_samples <= 1);
331 
332    /* Disable CMASK. */
333    tex->cmask_base_address_reg = tex->buffer.gpu_address >> 8;
334    tex->dirty_level_mask = 0;
335 
336    tex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
337 
338    if (tex->cmask_buffer != &tex->buffer)
339       si_resource_reference(&tex->cmask_buffer, NULL);
340 
341    tex->cmask_buffer = NULL;
342 
343    /* Notify all contexts about the change. */
344    p_atomic_inc(&sscreen->dirty_tex_counter);
345    p_atomic_inc(&sscreen->compressed_colortex_counter);
346 }
347 
si_can_disable_dcc(struct si_texture * tex)348 static bool si_can_disable_dcc(struct si_texture *tex)
349 {
350    /* We can't disable DCC if it can be written by another process. */
351    return !tex->is_depth &&
352           tex->surface.meta_offset &&
353           (!tex->buffer.b.is_shared ||
354            !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)) &&
355           !ac_modifier_has_dcc(tex->surface.modifier);
356 }
357 
si_texture_discard_dcc(struct si_screen * sscreen,struct si_texture * tex)358 static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture *tex)
359 {
360    if (!si_can_disable_dcc(tex))
361       return false;
362 
363    /* Disable DCC. */
364    ac_surface_zero_dcc_fields(&tex->surface);
365 
366    /* Notify all contexts about the change. */
367    p_atomic_inc(&sscreen->dirty_tex_counter);
368    return true;
369 }
370 
371 /**
372  * Disable DCC for the texture. (first decompress, then discard metadata).
373  *
374  * There is unresolved multi-context synchronization issue between
375  * screen::aux_context and the current context. If applications do this with
376  * multiple contexts, it's already undefined behavior for them and we don't
377  * have to worry about that. The scenario is:
378  *
379  * If context 1 disables DCC and context 2 has queued commands that write
380  * to the texture via CB with DCC enabled, and the order of operations is
381  * as follows:
382  *   context 2 queues draw calls rendering to the texture, but doesn't flush
383  *   context 1 disables DCC and flushes
384  *   context 1 & 2 reset descriptors and FB state
385  *   context 2 flushes (new compressed tiles written by the draw calls)
386  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
387  *   compressed tiled
388  *
389  * \param sctx  the current context if you have one, or sscreen->aux_context
390  *              if you don't.
391  */
si_texture_disable_dcc(struct si_context * sctx,struct si_texture * tex)392 bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex)
393 {
394    struct si_screen *sscreen = sctx->screen;
395 
396    if (!sctx->has_graphics)
397       return si_texture_discard_dcc(sscreen, tex);
398 
399    if (!si_can_disable_dcc(tex))
400       return false;
401 
402    if (&sctx->b == sscreen->aux_context)
403       simple_mtx_lock(&sscreen->aux_context_lock);
404 
405    /* Decompress DCC. */
406    si_decompress_dcc(sctx, tex);
407    sctx->b.flush(&sctx->b, NULL, 0);
408 
409    if (&sctx->b == sscreen->aux_context)
410       simple_mtx_unlock(&sscreen->aux_context_lock);
411 
412    return si_texture_discard_dcc(sscreen, tex);
413 }
414 
si_reallocate_texture_inplace(struct si_context * sctx,struct si_texture * tex,unsigned new_bind_flag,bool invalidate_storage)415 static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_texture *tex,
416                                           unsigned new_bind_flag, bool invalidate_storage)
417 {
418    struct pipe_screen *screen = sctx->b.screen;
419    struct si_texture *new_tex;
420    struct pipe_resource templ = tex->buffer.b.b;
421    unsigned i;
422 
423    templ.bind |= new_bind_flag;
424 
425    if (tex->buffer.b.is_shared || tex->num_planes > 1)
426       return;
427 
428    if (new_bind_flag == PIPE_BIND_LINEAR) {
429       if (tex->surface.is_linear)
430          return;
431 
432       /* This fails with MSAA, depth, and compressed textures. */
433       if (si_choose_tiling(sctx->screen, &templ, false) != RADEON_SURF_MODE_LINEAR_ALIGNED)
434          return;
435    }
436 
437    new_tex = (struct si_texture *)screen->resource_create(screen, &templ);
438    if (!new_tex)
439       return;
440 
441    /* Copy the pixels to the new texture. */
442    if (!invalidate_storage) {
443       for (i = 0; i <= templ.last_level; i++) {
444          struct pipe_box box;
445 
446          u_box_3d(0, 0, 0, u_minify(templ.width0, i), u_minify(templ.height0, i),
447                   util_num_layers(&templ, i), &box);
448 
449          si_resource_copy_region(&sctx->b, &new_tex->buffer.b.b,
450                                  i, 0, 0, 0, &tex->buffer.b.b, i, &box);
451       }
452    }
453 
454    if (new_bind_flag == PIPE_BIND_LINEAR) {
455       si_texture_discard_cmask(sctx->screen, tex);
456       si_texture_discard_dcc(sctx->screen, tex);
457    }
458 
459    /* Replace the structure fields of tex. */
460    tex->buffer.b.b.bind = templ.bind;
461    radeon_bo_reference(sctx->screen->ws, &tex->buffer.buf, new_tex->buffer.buf);
462    tex->buffer.gpu_address = new_tex->buffer.gpu_address;
463    tex->buffer.memory_usage_kb = new_tex->buffer.memory_usage_kb;
464    tex->buffer.bo_size = new_tex->buffer.bo_size;
465    tex->buffer.bo_alignment_log2 = new_tex->buffer.bo_alignment_log2;
466    tex->buffer.domains = new_tex->buffer.domains;
467    tex->buffer.flags = new_tex->buffer.flags;
468 
469    tex->surface = new_tex->surface;
470    si_texture_reference(&tex->flushed_depth_texture, new_tex->flushed_depth_texture);
471 
472    tex->surface.fmask_offset = new_tex->surface.fmask_offset;
473    tex->surface.cmask_offset = new_tex->surface.cmask_offset;
474    tex->cmask_base_address_reg = new_tex->cmask_base_address_reg;
475 
476    if (tex->cmask_buffer == &tex->buffer)
477       tex->cmask_buffer = NULL;
478    else
479       si_resource_reference(&tex->cmask_buffer, NULL);
480 
481    if (new_tex->cmask_buffer == &new_tex->buffer)
482       tex->cmask_buffer = &tex->buffer;
483    else
484       si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
485 
486    tex->surface.meta_offset = new_tex->surface.meta_offset;
487    tex->cb_color_info = new_tex->cb_color_info;
488    memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value));
489    tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
490 
491    memcpy(tex->depth_clear_value, new_tex->depth_clear_value, sizeof(tex->depth_clear_value));
492    tex->dirty_level_mask = new_tex->dirty_level_mask;
493    tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask;
494    tex->db_render_format = new_tex->db_render_format;
495    memcpy(tex->stencil_clear_value, new_tex->stencil_clear_value, sizeof(tex->stencil_clear_value));
496    tex->tc_compatible_htile = new_tex->tc_compatible_htile;
497    tex->depth_cleared_level_mask_once = new_tex->depth_cleared_level_mask_once;
498    tex->stencil_cleared_level_mask = new_tex->stencil_cleared_level_mask;
499    tex->upgraded_depth = new_tex->upgraded_depth;
500    tex->db_compatible = new_tex->db_compatible;
501    tex->can_sample_z = new_tex->can_sample_z;
502    tex->can_sample_s = new_tex->can_sample_s;
503 
504    tex->displayable_dcc_dirty = new_tex->displayable_dcc_dirty;
505 
506    if (new_bind_flag == PIPE_BIND_LINEAR) {
507       assert(!tex->surface.meta_offset);
508       assert(!tex->cmask_buffer);
509       assert(!tex->surface.fmask_size);
510       assert(!tex->is_depth);
511    }
512 
513    si_texture_reference(&new_tex, NULL);
514 
515    p_atomic_inc(&sctx->screen->dirty_tex_counter);
516 }
517 
si_set_tex_bo_metadata(struct si_screen * sscreen,struct si_texture * tex)518 static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex)
519 {
520    struct pipe_resource *res = &tex->buffer.b.b;
521    struct radeon_bo_metadata md;
522 
523    memset(&md, 0, sizeof(md));
524 
525    assert(tex->surface.fmask_size == 0);
526 
527    static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
528                                            PIPE_SWIZZLE_W};
529    bool is_array = util_texture_is_array(res->target);
530    uint32_t desc[8];
531 
532    sscreen->make_texture_descriptor(sscreen, tex, true, res->target, res->format, swizzle, 0,
533                                     res->last_level, 0, is_array ? res->array_size - 1 : 0,
534                                     res->width0, res->height0, res->depth0, desc, NULL);
535    si_set_mutable_tex_desc_fields(sscreen, tex, &tex->surface.u.legacy.level[0], 0, 0,
536                                   tex->surface.blk_w, false, 0, desc);
537 
538    ac_surface_get_umd_metadata(&sscreen->info, &tex->surface,
539                                tex->buffer.b.b.last_level + 1,
540                                desc, &md.size_metadata, md.metadata);
541    sscreen->ws->buffer_set_metadata(sscreen->ws, tex->buffer.buf, &md, &tex->surface);
542 }
543 
si_displayable_dcc_needs_explicit_flush(struct si_texture * tex)544 static bool si_displayable_dcc_needs_explicit_flush(struct si_texture *tex)
545 {
546    struct si_screen *sscreen = (struct si_screen *)tex->buffer.b.b.screen;
547 
548    if (sscreen->info.chip_class <= GFX8)
549       return false;
550 
551    /* With modifiers and > 1 planes any applications will know that they
552     * cannot do frontbuffer rendering with the texture. */
553    if (ac_surface_get_nplanes(&tex->surface) > 1)
554       return false;
555 
556    return tex->surface.is_displayable && tex->surface.meta_offset;
557 }
558 
si_resource_get_param(struct pipe_screen * screen,struct pipe_context * context,struct pipe_resource * resource,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned handle_usage,uint64_t * value)559 static bool si_resource_get_param(struct pipe_screen *screen, struct pipe_context *context,
560                                   struct pipe_resource *resource, unsigned plane, unsigned layer,
561                                   unsigned level,
562                                   enum pipe_resource_param param, unsigned handle_usage,
563                                   uint64_t *value)
564 {
565    while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
566       --plane;
567       resource = resource->next;
568    }
569 
570    struct si_screen *sscreen = (struct si_screen *)screen;
571    struct si_texture *tex = (struct si_texture *)resource;
572    struct winsys_handle whandle;
573 
574    switch (param) {
575    case PIPE_RESOURCE_PARAM_NPLANES:
576       if (resource->target == PIPE_BUFFER)
577          *value = 1;
578       else if (tex->num_planes > 1)
579          *value = tex->num_planes;
580       else
581          *value = ac_surface_get_nplanes(&tex->surface);
582       return true;
583 
584    case PIPE_RESOURCE_PARAM_STRIDE:
585       if (resource->target == PIPE_BUFFER)
586          *value = 0;
587       else
588          *value = ac_surface_get_plane_stride(sscreen->info.chip_class,
589                                               &tex->surface, plane);
590       return true;
591 
592    case PIPE_RESOURCE_PARAM_OFFSET:
593       if (resource->target == PIPE_BUFFER)
594          *value = 0;
595       else
596          *value = ac_surface_get_plane_offset(sscreen->info.chip_class,
597                                               &tex->surface, plane, layer);
598       return true;
599 
600    case PIPE_RESOURCE_PARAM_MODIFIER:
601       *value = tex->surface.modifier;
602       return true;
603 
604    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
605    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS:
606    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD:
607       memset(&whandle, 0, sizeof(whandle));
608 
609       if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED)
610          whandle.type = WINSYS_HANDLE_TYPE_SHARED;
611       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
612          whandle.type = WINSYS_HANDLE_TYPE_KMS;
613       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD)
614          whandle.type = WINSYS_HANDLE_TYPE_FD;
615 
616       if (!screen->resource_get_handle(screen, context, resource, &whandle, handle_usage))
617          return false;
618 
619       *value = whandle.handle;
620       return true;
621    case PIPE_RESOURCE_PARAM_LAYER_STRIDE:
622       break;
623    }
624    return false;
625 }
626 
si_texture_get_info(struct pipe_screen * screen,struct pipe_resource * resource,unsigned * pstride,unsigned * poffset)627 static void si_texture_get_info(struct pipe_screen *screen, struct pipe_resource *resource,
628                                 unsigned *pstride, unsigned *poffset)
629 {
630    uint64_t value;
631 
632    if (pstride) {
633       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_STRIDE, 0, &value);
634       *pstride = value;
635    }
636 
637    if (poffset) {
638       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_OFFSET, 0, &value);
639       *poffset = value;
640    }
641 }
642 
si_texture_get_handle(struct pipe_screen * screen,struct pipe_context * ctx,struct pipe_resource * resource,struct winsys_handle * whandle,unsigned usage)643 static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_context *ctx,
644                                   struct pipe_resource *resource, struct winsys_handle *whandle,
645                                   unsigned usage)
646 {
647    struct si_screen *sscreen = (struct si_screen *)screen;
648    struct si_context *sctx;
649    struct si_resource *res = si_resource(resource);
650    struct si_texture *tex = (struct si_texture *)resource;
651    bool update_metadata = false;
652    unsigned stride, offset, slice_size;
653    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
654    bool flush = false;
655 
656    ctx = threaded_context_unwrap_sync(ctx);
657    sctx = (struct si_context *)(ctx ? ctx : sscreen->aux_context);
658 
659    if (resource->target != PIPE_BUFFER) {
660       unsigned plane = whandle->plane;
661 
662       /* Individual planes are chained pipe_resource instances. */
663       while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
664          resource = resource->next;
665          --plane;
666       }
667 
668       res = si_resource(resource);
669       tex = (struct si_texture *)resource;
670 
671       /* This is not supported now, but it might be required for OpenCL
672        * interop in the future.
673        */
674       if (resource->nr_samples > 1 || tex->is_depth)
675          return false;
676 
677       if (plane) {
678          whandle->offset = ac_surface_get_plane_offset(sscreen->info.chip_class,
679                                                        &tex->surface, plane, 0);
680          whandle->stride = ac_surface_get_plane_stride(sscreen->info.chip_class,
681                                                        &tex->surface, plane);
682          whandle->modifier = tex->surface.modifier;
683          return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
684       }
685 
686       /* Move a suballocated texture into a non-suballocated allocation. */
687       if (sscreen->ws->buffer_is_suballocated(res->buf) || tex->surface.tile_swizzle ||
688           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
689            sscreen->info.has_local_buffers)) {
690          assert(!res->b.is_shared);
691          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_SHARED, false);
692          flush = true;
693          assert(res->b.b.bind & PIPE_BIND_SHARED);
694          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
695          assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
696          assert(tex->surface.tile_swizzle == 0);
697       }
698 
699       /* Since shader image stores don't support DCC on GFX8,
700        * disable it for external clients that want write
701        * access.
702        */
703       if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && !tex->is_depth && tex->surface.meta_offset) ||
704           /* Displayable DCC requires an explicit flush. */
705           (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
706            si_displayable_dcc_needs_explicit_flush(tex))) {
707          if (si_texture_disable_dcc(sctx, tex)) {
708             update_metadata = true;
709             /* si_texture_disable_dcc flushes the context */
710             flush = false;
711          }
712       }
713 
714       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
715           (tex->cmask_buffer || (!tex->is_depth && tex->surface.meta_offset))) {
716          /* Eliminate fast clear (both CMASK and DCC) */
717          bool flushed;
718          si_eliminate_fast_color_clear(sctx, tex, &flushed);
719          /* eliminate_fast_color_clear sometimes flushes the context */
720          if (flushed)
721             flush = false;
722 
723          /* Disable CMASK if flush_resource isn't going
724           * to be called.
725           */
726          if (tex->cmask_buffer)
727             si_texture_discard_cmask(sscreen, tex);
728       }
729 
730       /* Set metadata. */
731       if ((!res->b.is_shared || update_metadata) && whandle->offset == 0)
732          si_set_tex_bo_metadata(sscreen, tex);
733 
734       if (sscreen->info.chip_class >= GFX9) {
735          slice_size = tex->surface.u.gfx9.surf_slice_size;
736       } else {
737          slice_size = (uint64_t)tex->surface.u.legacy.level[0].slice_size_dw * 4;
738       }
739 
740       modifier = tex->surface.modifier;
741    } else {
742       /* Buffer exports are for the OpenCL interop. */
743       /* Move a suballocated buffer into a non-suballocated allocation. */
744       if (sscreen->ws->buffer_is_suballocated(res->buf) ||
745           /* A DMABUF export always fails if the BO is local. */
746           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
747            sscreen->info.has_local_buffers)) {
748          assert(!res->b.is_shared);
749 
750          /* Allocate a new buffer with PIPE_BIND_SHARED. */
751          struct pipe_resource templ = res->b.b;
752          templ.bind |= PIPE_BIND_SHARED;
753 
754          struct pipe_resource *newb = screen->resource_create(screen, &templ);
755          if (!newb)
756             return false;
757 
758          /* Copy the old buffer contents to the new one. */
759          struct pipe_box box;
760          u_box_1d(0, newb->width0, &box);
761          sctx->b.resource_copy_region(&sctx->b, newb, 0, 0, 0, 0, &res->b.b, 0, &box);
762          flush = true;
763          /* Move the new buffer storage to the old pipe_resource. */
764          si_replace_buffer_storage(&sctx->b, &res->b.b, newb, 0, 0, 0);
765          pipe_resource_reference(&newb, NULL);
766 
767          assert(res->b.b.bind & PIPE_BIND_SHARED);
768          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
769       }
770 
771       /* Buffers */
772       slice_size = 0;
773    }
774 
775    si_texture_get_info(screen, resource, &stride, &offset);
776 
777    if (res->b.is_shared) {
778       /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
779        * doesn't set it.
780        */
781       res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
782       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
783          res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
784    } else {
785       res->b.is_shared = true;
786       res->external_usage = usage;
787    }
788 
789    if (flush)
790       sctx->b.flush(&sctx->b, NULL, 0);
791 
792    whandle->stride = stride;
793    whandle->offset = offset + slice_size * whandle->layer;
794    whandle->modifier = modifier;
795 
796    return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
797 }
798 
si_print_texture_info(struct si_screen * sscreen,struct si_texture * tex,struct u_log_context * log)799 void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex,
800                            struct u_log_context *log)
801 {
802    int i;
803    FILE *f;
804    char *surf_info = NULL;
805    size_t surf_info_size;
806 
807    /* Common parameters. */
808    u_log_printf(log,
809                 "  Info: npix_x=%u, npix_y=%u, npix_z=%u, "
810                 "array_size=%u, last_level=%u, nsamples=%u",
811                 tex->buffer.b.b.width0, tex->buffer.b.b.height0,
812                 tex->buffer.b.b.depth0, tex->buffer.b.b.array_size,
813                 tex->buffer.b.b.last_level, tex->buffer.b.b.nr_samples);
814 
815    if (tex->is_depth && tex->surface.meta_offset)
816       u_log_printf(log, ", tc_compatible_htile=%u", tex->tc_compatible_htile);
817 
818    u_log_printf(log, ", %s\n",
819                 util_format_short_name(tex->buffer.b.b.format));
820 
821    f = open_memstream(&surf_info, &surf_info_size);
822    if (!f)
823       return;
824    ac_surface_print_info(f, &sscreen->info, &tex->surface);
825    fclose(f);
826    u_log_printf(log, "%s", surf_info);
827    free(surf_info);
828 
829    if (sscreen->info.chip_class >= GFX9) {
830       return;
831    }
832 
833    if (!tex->is_depth && tex->surface.meta_offset) {
834       for (i = 0; i <= tex->buffer.b.b.last_level; i++)
835          u_log_printf(log,
836                       "    DCCLevel[%i]: enabled=%u, offset=%u, "
837                       "fast_clear_size=%u\n",
838                       i, i < tex->surface.num_meta_levels, tex->surface.u.legacy.color.dcc_level[i].dcc_offset,
839                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size);
840    }
841 
842    for (i = 0; i <= tex->buffer.b.b.last_level; i++)
843       u_log_printf(log,
844                    "    Level[%i]: offset=%" PRIu64 ", slice_size=%" PRIu64 ", "
845                    "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
846                    "mode=%u, tiling_index = %u\n",
847                    i, (uint64_t)tex->surface.u.legacy.level[i].offset_256B * 256,
848                    (uint64_t)tex->surface.u.legacy.level[i].slice_size_dw * 4,
849                    u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
850                    u_minify(tex->buffer.b.b.depth0, i), tex->surface.u.legacy.level[i].nblk_x,
851                    tex->surface.u.legacy.level[i].nblk_y, tex->surface.u.legacy.level[i].mode,
852                    tex->surface.u.legacy.tiling_index[i]);
853 
854    if (tex->surface.has_stencil) {
855       for (i = 0; i <= tex->buffer.b.b.last_level; i++) {
856          u_log_printf(log,
857                       "    StencilLevel[%i]: offset=%" PRIu64 ", "
858                       "slice_size=%" PRIu64 ", npix_x=%u, "
859                       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
860                       "mode=%u, tiling_index = %u\n",
861                       i, (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].offset_256B * 256,
862                       (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].slice_size_dw * 4,
863                       u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
864                       u_minify(tex->buffer.b.b.depth0, i),
865                       tex->surface.u.legacy.zs.stencil_level[i].nblk_x,
866                       tex->surface.u.legacy.zs.stencil_level[i].nblk_y,
867                       tex->surface.u.legacy.zs.stencil_level[i].mode,
868                       tex->surface.u.legacy.zs.stencil_tiling_index[i]);
869       }
870    }
871 }
872 
873 /**
874  * Common function for si_texture_create and si_texture_from_handle.
875  *
876  * \param screen	screen
877  * \param base		resource template
878  * \param surface	radeon_surf
879  * \param plane0	if a non-zero plane is being created, this is the first plane
880  * \param imported_buf	from si_texture_from_handle
881  * \param offset	offset for non-zero planes or imported buffers
882  * \param alloc_size	the size to allocate if plane0 != NULL
883  * \param alignment	alignment for the allocation
884  */
si_texture_create_object(struct pipe_screen * screen,const struct pipe_resource * base,const struct radeon_surf * surface,const struct si_texture * plane0,struct pb_buffer * imported_buf,uint64_t offset,unsigned pitch_in_bytes,uint64_t alloc_size,unsigned alignment)885 static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
886                                                    const struct pipe_resource *base,
887                                                    const struct radeon_surf *surface,
888                                                    const struct si_texture *plane0,
889                                                    struct pb_buffer *imported_buf,
890                                                    uint64_t offset, unsigned pitch_in_bytes,
891                                                    uint64_t alloc_size, unsigned alignment)
892 {
893    struct si_texture *tex;
894    struct si_resource *resource;
895    struct si_screen *sscreen = (struct si_screen *)screen;
896 
897    if (!sscreen->info.has_3d_cube_border_color_mipmap &&
898        (base->last_level > 0 ||
899         base->target == PIPE_TEXTURE_3D ||
900         base->target == PIPE_TEXTURE_CUBE)) {
901       assert(0);
902       return NULL;
903    }
904 
905    tex = CALLOC_STRUCT_CL(si_texture);
906    if (!tex)
907       goto error;
908 
909    resource = &tex->buffer;
910    resource->b.b = *base;
911    pipe_reference_init(&resource->b.b.reference, 1);
912    resource->b.b.screen = screen;
913 
914    /* don't include stencil-only formats which we don't support for rendering */
915    tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
916    tex->surface = *surface;
917 
918    /* Use 1.0 as the default clear value to get optimal ZRANGE_PRECISION if we don't
919     * get a fast clear.
920     */
921    for (unsigned i = 0; i < ARRAY_SIZE(tex->depth_clear_value); i++)
922       tex->depth_clear_value[i] = 1.0;
923 
924    /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE
925     * setting, so we have to enable it if we enabled it at allocation.
926     *
927     * GFX9 and later use the same tiling for both, so TC-compatible HTILE can be
928     * enabled on demand.
929     */
930    tex->tc_compatible_htile = (sscreen->info.chip_class == GFX8 &&
931                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ||
932                               /* Mipmapping always starts TC-compatible. */
933                               (sscreen->info.chip_class >= GFX8 &&
934                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE &&
935                                tex->buffer.b.b.last_level > 0);
936 
937    /* TC-compatible HTILE:
938     * - GFX8 only supports Z32_FLOAT.
939     * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
940    if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
941       if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
942          tex->db_render_format = base->format;
943       else {
944          tex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
945          tex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
946                                base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
947       }
948    } else {
949       tex->db_render_format = base->format;
950    }
951 
952    /* Applies to GCN. */
953    tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode;
954 
955    if (!ac_surface_override_offset_stride(&sscreen->info, &tex->surface,
956                                      tex->buffer.b.b.last_level + 1,
957                                           offset, pitch_in_bytes / tex->surface.bpe))
958       goto error;
959 
960    if (tex->is_depth) {
961       tex->htile_stencil_disabled = !tex->surface.has_stencil;
962 
963       if (sscreen->info.chip_class >= GFX9) {
964          tex->can_sample_z = true;
965          tex->can_sample_s = true;
966 
967          /* Stencil texturing with HTILE doesn't work
968           * with mipmapping on Navi10-14. */
969          if (sscreen->info.chip_class == GFX10 && base->last_level > 0)
970             tex->htile_stencil_disabled = true;
971       } else {
972          tex->can_sample_z = !tex->surface.u.legacy.depth_adjusted;
973          tex->can_sample_s = !tex->surface.u.legacy.stencil_adjusted;
974 
975          /* GFX8 must keep stencil enabled because it can't use Z-only TC-compatible
976           * HTILE because of a hw bug. This has only a small effect on performance
977           * because we lose a little bit of Z precision in order to make space for
978           * stencil in HTILE.
979           */
980          if (sscreen->info.chip_class == GFX8 &&
981              tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
982             tex->htile_stencil_disabled = false;
983       }
984 
985       tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER;
986    } else {
987       if (tex->surface.cmask_offset) {
988          tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
989          tex->cmask_buffer = &tex->buffer;
990       }
991    }
992 
993    if (plane0) {
994       /* The buffer is shared with the first plane. */
995       resource->bo_size = plane0->buffer.bo_size;
996       resource->bo_alignment_log2 = plane0->buffer.bo_alignment_log2;
997       resource->flags = plane0->buffer.flags;
998       resource->domains = plane0->buffer.domains;
999       resource->memory_usage_kb = plane0->buffer.memory_usage_kb;
1000 
1001       radeon_bo_reference(sscreen->ws, &resource->buf, plane0->buffer.buf);
1002       resource->gpu_address = plane0->buffer.gpu_address;
1003    } else if (!(surface->flags & RADEON_SURF_IMPORTED)) {
1004       /* Create the backing buffer. */
1005       si_init_resource_fields(sscreen, resource, alloc_size, alignment);
1006 
1007       if (!si_alloc_resource(sscreen, resource))
1008          goto error;
1009    } else {
1010       resource->buf = imported_buf;
1011       resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf);
1012       resource->bo_size = imported_buf->size;
1013       resource->bo_alignment_log2 = imported_buf->alignment_log2;
1014       resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
1015       resource->memory_usage_kb = MAX2(1, resource->bo_size / 1024);
1016       if (sscreen->ws->buffer_get_flags)
1017          resource->flags = sscreen->ws->buffer_get_flags(resource->buf);
1018    }
1019 
1020    /* Prepare metadata clears.  */
1021    struct si_clear_info clears[4];
1022    unsigned num_clears = 0;
1023 
1024    if (tex->cmask_buffer) {
1025       /* Initialize the cmask to 0xCC (= compressed state). */
1026       assert(num_clears < ARRAY_SIZE(clears));
1027       si_init_buffer_clear(&clears[num_clears++], &tex->cmask_buffer->b.b,
1028                            tex->surface.cmask_offset, tex->surface.cmask_size,
1029                            0xCCCCCCCC);
1030    }
1031    if (tex->is_depth && tex->surface.meta_offset) {
1032       uint32_t clear_value = 0;
1033 
1034       if (sscreen->info.chip_class >= GFX9 || tex->tc_compatible_htile)
1035          clear_value = 0x0000030F;
1036 
1037       assert(num_clears < ARRAY_SIZE(clears));
1038       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1039                            tex->surface.meta_size, clear_value);
1040    }
1041 
1042    /* Initialize DCC only if the texture is not being imported. */
1043    if (!(surface->flags & RADEON_SURF_IMPORTED) && !tex->is_depth && tex->surface.meta_offset) {
1044       /* Clear DCC to black for all tiles with DCC enabled.
1045        *
1046        * This fixes corruption in 3DMark Slingshot Extreme, which
1047        * uses uninitialized textures, causing corruption.
1048        */
1049       if (tex->surface.num_meta_levels == tex->buffer.b.b.last_level + 1 &&
1050           tex->buffer.b.b.nr_samples <= 2) {
1051          /* Simple case - all tiles have DCC enabled. */
1052          assert(num_clears < ARRAY_SIZE(clears));
1053          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1054                               tex->surface.meta_size, DCC_CLEAR_COLOR_0000);
1055       } else if (sscreen->info.chip_class >= GFX9) {
1056          /* Clear to uncompressed. Clearing this to black is complicated. */
1057          assert(num_clears < ARRAY_SIZE(clears));
1058          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1059                               tex->surface.meta_size, DCC_UNCOMPRESSED);
1060       } else {
1061          /* GFX8: Initialize mipmap levels and multisamples separately. */
1062          if (tex->buffer.b.b.nr_samples >= 2) {
1063             /* Clearing this to black is complicated. */
1064             assert(num_clears < ARRAY_SIZE(clears));
1065             si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1066                                  tex->surface.meta_size, DCC_UNCOMPRESSED);
1067          } else {
1068             /* Clear the enabled mipmap levels to black. */
1069             unsigned size = 0;
1070 
1071             for (unsigned i = 0; i < tex->surface.num_meta_levels; i++) {
1072                if (!tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size)
1073                   break;
1074 
1075                size = tex->surface.u.legacy.color.dcc_level[i].dcc_offset +
1076                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size;
1077             }
1078 
1079             /* Mipmap levels with DCC. */
1080             if (size) {
1081                assert(num_clears < ARRAY_SIZE(clears));
1082                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, size,
1083                                     DCC_CLEAR_COLOR_0000);
1084             }
1085             /* Mipmap levels without DCC. */
1086             if (size != tex->surface.meta_size) {
1087                assert(num_clears < ARRAY_SIZE(clears));
1088                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset + size,
1089                                     tex->surface.meta_size - size, DCC_UNCOMPRESSED);
1090             }
1091          }
1092       }
1093    }
1094 
1095    /* Initialize displayable DCC that requires the retile blit. */
1096    if (tex->surface.display_dcc_offset && !(surface->flags & RADEON_SURF_IMPORTED)) {
1097       /* Uninitialized DCC can hang the display hw.
1098        * Clear to white to indicate that. */
1099       assert(num_clears < ARRAY_SIZE(clears));
1100       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.display_dcc_offset,
1101                            tex->surface.u.gfx9.color.display_dcc_size, DCC_CLEAR_COLOR_1111);
1102    }
1103 
1104    /* Execute the clears. */
1105    if (num_clears) {
1106       simple_mtx_lock(&sscreen->aux_context_lock);
1107       si_execute_clears((struct si_context *)sscreen->aux_context,
1108                         clears, num_clears, 0);
1109       sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
1110       simple_mtx_unlock(&sscreen->aux_context_lock);
1111    }
1112 
1113    /* Initialize the CMASK base register value. */
1114    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1115 
1116    if (sscreen->debug_flags & DBG(VM)) {
1117       fprintf(stderr,
1118               "VM start=0x%" PRIX64 "  end=0x%" PRIX64
1119               " | Texture %ix%ix%i, %i levels, %i samples, %s\n",
1120               tex->buffer.gpu_address, tex->buffer.gpu_address + tex->buffer.buf->size,
1121               base->width0, base->height0, util_num_layers(base, 0), base->last_level + 1,
1122               base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1123    }
1124 
1125    if (sscreen->debug_flags & DBG(TEX)) {
1126       puts("Texture:");
1127       struct u_log_context log;
1128       u_log_context_init(&log);
1129       si_print_texture_info(sscreen, tex, &log);
1130       u_log_new_page_print(&log, stdout);
1131       fflush(stdout);
1132       u_log_context_destroy(&log);
1133    }
1134 
1135    return tex;
1136 
1137 error:
1138    FREE_CL(tex);
1139    return NULL;
1140 }
1141 
si_choose_tiling(struct si_screen * sscreen,const struct pipe_resource * templ,bool tc_compatible_htile)1142 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
1143                                               const struct pipe_resource *templ,
1144                                               bool tc_compatible_htile)
1145 {
1146    const struct util_format_description *desc = util_format_description(templ->format);
1147    bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING;
1148    bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
1149                            !(templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH);
1150 
1151    /* MSAA resources must be 2D tiled. */
1152    if (templ->nr_samples > 1)
1153       return RADEON_SURF_MODE_2D;
1154 
1155    /* Transfer resources should be linear. */
1156    if (templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR)
1157       return RADEON_SURF_MODE_LINEAR_ALIGNED;
1158 
1159    /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8,
1160     * which requires 2D tiling.
1161     */
1162    if (sscreen->info.chip_class == GFX8 && tc_compatible_htile)
1163       return RADEON_SURF_MODE_2D;
1164 
1165    /* Handle common candidates for the linear mode.
1166     * Compressed textures and DB surfaces must always be tiled.
1167     */
1168    if (!force_tiling && !is_depth_stencil && !util_format_is_compressed(templ->format)) {
1169       if (sscreen->debug_flags & DBG(NO_TILING) ||
1170 	  (templ->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_TILING)))
1171          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1172 
1173       /* Tiling doesn't work with the 422 (SUBSAMPLED) formats. */
1174       if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1175          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1176 
1177       /* Cursors are linear on AMD GCN.
1178        * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1179       if (templ->bind & PIPE_BIND_CURSOR)
1180          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1181 
1182       if (templ->bind & PIPE_BIND_LINEAR)
1183          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1184 
1185       /* Textures with a very small height are recommended to be linear. */
1186       if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY ||
1187           /* Only very thin and long 2D textures should benefit from
1188            * linear_aligned. */
1189           templ->height0 <= 2)
1190          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1191 
1192       /* Textures likely to be mapped often. */
1193       if (templ->usage == PIPE_USAGE_STAGING || templ->usage == PIPE_USAGE_STREAM)
1194          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1195    }
1196 
1197    /* Make small textures 1D tiled. */
1198    if (templ->width0 <= 16 || templ->height0 <= 16 || (sscreen->debug_flags & DBG(NO_2D_TILING)))
1199       return RADEON_SURF_MODE_1D;
1200 
1201    /* The allocator will switch to 1D if needed. */
1202    return RADEON_SURF_MODE_2D;
1203 }
1204 
1205 static struct pipe_resource *
si_texture_create_with_modifier(struct pipe_screen * screen,const struct pipe_resource * templ,uint64_t modifier)1206 si_texture_create_with_modifier(struct pipe_screen *screen,
1207                                 const struct pipe_resource *templ,
1208                                 uint64_t modifier)
1209 {
1210    struct si_screen *sscreen = (struct si_screen *)screen;
1211    bool is_zs = util_format_is_depth_or_stencil(templ->format);
1212 
1213    if (templ->nr_samples >= 2) {
1214       /* This is hackish (overwriting the const pipe_resource template),
1215        * but should be harmless and gallium frontends can also see
1216        * the overriden number of samples in the created pipe_resource.
1217        */
1218       if (is_zs && sscreen->eqaa_force_z_samples) {
1219          ((struct pipe_resource *)templ)->nr_samples =
1220             ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_z_samples;
1221       } else if (!is_zs && sscreen->eqaa_force_color_samples) {
1222          ((struct pipe_resource *)templ)->nr_samples = sscreen->eqaa_force_coverage_samples;
1223          ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_color_samples;
1224       }
1225    }
1226 
1227    bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH ||
1228                            templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR;
1229    bool tc_compatible_htile =
1230       sscreen->info.chip_class >= GFX8 &&
1231       /* There are issues with TC-compatible HTILE on Tonga (and
1232        * Iceland is the same design), and documented bug workarounds
1233        * don't help. For example, this fails:
1234        *   piglit/bin/tex-miplevel-selection 'texture()' 2DShadow -auto
1235        */
1236       sscreen->info.family != CHIP_TONGA && sscreen->info.family != CHIP_ICELAND &&
1237       (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
1238       !(sscreen->debug_flags & DBG(NO_HYPERZ)) && !is_flushed_depth &&
1239       templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
1240       is_zs;
1241    enum radeon_surf_mode tile_mode = si_choose_tiling(sscreen, templ, tc_compatible_htile);
1242 
1243    /* This allocates textures with multiple planes like NV12 in 1 buffer. */
1244    enum
1245    {
1246       SI_TEXTURE_MAX_PLANES = 3
1247    };
1248    struct radeon_surf surface[SI_TEXTURE_MAX_PLANES] = {};
1249    struct pipe_resource plane_templ[SI_TEXTURE_MAX_PLANES];
1250    uint64_t plane_offset[SI_TEXTURE_MAX_PLANES] = {};
1251    uint64_t total_size = 0;
1252    unsigned max_alignment = 0;
1253    unsigned num_planes = util_format_get_num_planes(templ->format);
1254    assert(num_planes <= SI_TEXTURE_MAX_PLANES);
1255 
1256    /* Compute texture or plane layouts and offsets. */
1257    for (unsigned i = 0; i < num_planes; i++) {
1258       plane_templ[i] = *templ;
1259       plane_templ[i].format = util_format_get_plane_format(templ->format, i);
1260       plane_templ[i].width0 = util_format_get_plane_width(templ->format, i, templ->width0);
1261       plane_templ[i].height0 = util_format_get_plane_height(templ->format, i, templ->height0);
1262 
1263       /* Multi-plane allocations need PIPE_BIND_SHARED, because we can't
1264        * reallocate the storage to add PIPE_BIND_SHARED, because it's
1265        * shared by 3 pipe_resources.
1266        */
1267       if (num_planes > 1)
1268          plane_templ[i].bind |= PIPE_BIND_SHARED;
1269 
1270       if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, modifier,
1271                           false, plane_templ[i].bind & PIPE_BIND_SCANOUT,
1272                           is_flushed_depth, tc_compatible_htile))
1273          return NULL;
1274 
1275       plane_offset[i] = align64(total_size, 1 << surface[i].surf_alignment_log2);
1276       total_size = plane_offset[i] + surface[i].total_size;
1277       max_alignment = MAX2(max_alignment, 1 << surface[i].surf_alignment_log2);
1278    }
1279 
1280    struct si_texture *plane0 = NULL, *last_plane = NULL;
1281 
1282    for (unsigned i = 0; i < num_planes; i++) {
1283       struct si_texture *tex =
1284          si_texture_create_object(screen, &plane_templ[i], &surface[i], plane0, NULL,
1285                                   plane_offset[i], 0, total_size, max_alignment);
1286       if (!tex) {
1287          si_texture_reference(&plane0, NULL);
1288          return NULL;
1289       }
1290 
1291       tex->plane_index = i;
1292       tex->num_planes = num_planes;
1293 
1294       if (!plane0) {
1295          plane0 = last_plane = tex;
1296       } else {
1297          last_plane->buffer.b.b.next = &tex->buffer.b.b;
1298          last_plane = tex;
1299       }
1300    }
1301 
1302    return (struct pipe_resource *)plane0;
1303 }
1304 
si_texture_create(struct pipe_screen * screen,const struct pipe_resource * templ)1305 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
1306                                         const struct pipe_resource *templ)
1307 {
1308    return si_texture_create_with_modifier(screen, templ, DRM_FORMAT_MOD_INVALID);
1309 }
1310 
si_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * count)1311 static void si_query_dmabuf_modifiers(struct pipe_screen *screen,
1312                                       enum pipe_format format,
1313                                       int max,
1314                                       uint64_t *modifiers,
1315                                       unsigned int *external_only,
1316                                       int *count)
1317 {
1318    struct si_screen *sscreen = (struct si_screen *)screen;
1319 
1320    unsigned ac_mod_count = max;
1321    ac_get_supported_modifiers(&sscreen->info, &(struct ac_modifier_options) {
1322          .dcc = !(sscreen->debug_flags & DBG(NO_DCC)),
1323          /* Do not support DCC with retiling yet. This needs explicit
1324           * resource flushes, but the app has no way to promise doing
1325           * flushes with modifiers. */
1326          .dcc_retile = !(sscreen->debug_flags & DBG(NO_DCC)),
1327       }, format, &ac_mod_count,  max ? modifiers : NULL);
1328    if (max && external_only) {
1329       for (unsigned i = 0; i < ac_mod_count; ++i)
1330          external_only[i] = util_format_is_yuv(format);
1331    }
1332    *count = ac_mod_count;
1333 }
1334 
1335 static bool
si_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)1336 si_is_dmabuf_modifier_supported(struct pipe_screen *screen,
1337                                uint64_t modifier,
1338                                enum pipe_format format,
1339                                bool *external_only)
1340 {
1341    int allowed_mod_count;
1342    si_query_dmabuf_modifiers(screen, format, 0, NULL, NULL, &allowed_mod_count);
1343 
1344    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1345    if (!allowed_modifiers)
1346       return false;
1347 
1348    unsigned *external_array = NULL;
1349    if (external_only) {
1350       external_array = (unsigned *)calloc(allowed_mod_count, sizeof(unsigned));
1351       if (!external_array) {
1352          free(allowed_modifiers);
1353          return false;
1354       }
1355    }
1356 
1357    si_query_dmabuf_modifiers(screen, format, allowed_mod_count, allowed_modifiers,
1358                             external_array, &allowed_mod_count);
1359 
1360    bool supported = false;
1361    for (int i = 0; i < allowed_mod_count && !supported; ++i) {
1362       if (allowed_modifiers[i] != modifier)
1363          continue;
1364 
1365       supported = true;
1366       if (external_only)
1367          *external_only = external_array[i];
1368    }
1369 
1370    free(allowed_modifiers);
1371    free(external_array);
1372    return supported;
1373 }
1374 
1375 static unsigned
si_get_dmabuf_modifier_planes(struct pipe_screen * pscreen,uint64_t modifier,enum pipe_format format)1376 si_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier,
1377                              enum pipe_format format)
1378 {
1379    unsigned planes = util_format_get_num_planes(format);
1380 
1381    if (IS_AMD_FMT_MOD(modifier) && planes == 1) {
1382       if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
1383          return 3;
1384       else if (AMD_FMT_MOD_GET(DCC, modifier))
1385          return 2;
1386       else
1387          return 1;
1388    }
1389 
1390    return planes;
1391 }
1392 
1393 static bool
si_modifier_supports_resource(struct pipe_screen * screen,uint64_t modifier,const struct pipe_resource * templ)1394 si_modifier_supports_resource(struct pipe_screen *screen,
1395                               uint64_t modifier,
1396                               const struct pipe_resource *templ)
1397 {
1398    struct si_screen *sscreen = (struct si_screen *)screen;
1399    uint32_t max_width, max_height;
1400 
1401    ac_modifier_max_extent(&sscreen->info, modifier, &max_width, &max_height);
1402    return templ->width0 <= max_width && templ->height0 <= max_height;
1403 }
1404 
1405 static struct pipe_resource *
si_texture_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int modifier_count)1406 si_texture_create_with_modifiers(struct pipe_screen *screen,
1407                                  const struct pipe_resource *templ,
1408                                  const uint64_t *modifiers,
1409                                  int modifier_count)
1410 {
1411    /* Buffers with modifiers make zero sense. */
1412    assert(templ->target != PIPE_BUFFER);
1413 
1414    /* Select modifier. */
1415    int allowed_mod_count;
1416    si_query_dmabuf_modifiers(screen, templ->format, 0, NULL, NULL, &allowed_mod_count);
1417 
1418    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1419    if (!allowed_modifiers) {
1420       return NULL;
1421    }
1422 
1423    /* This does not take external_only into account. We assume it is the same for all modifiers. */
1424    si_query_dmabuf_modifiers(screen, templ->format, allowed_mod_count, allowed_modifiers, NULL, &allowed_mod_count);
1425 
1426    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1427 
1428    /* Try to find the first allowed modifier that is in the application provided
1429     * list. We assume that the allowed modifiers are ordered in descending
1430     * preference in the list provided by si_query_dmabuf_modifiers. */
1431    for (int i = 0; i < allowed_mod_count; ++i) {
1432       bool found = false;
1433       for (int j = 0; j < modifier_count && !found; ++j)
1434          if (modifiers[j] == allowed_modifiers[i] && si_modifier_supports_resource(screen, modifiers[j], templ))
1435             found = true;
1436 
1437       if (found) {
1438          modifier = allowed_modifiers[i];
1439          break;
1440       }
1441    }
1442 
1443    free(allowed_modifiers);
1444 
1445    if (modifier == DRM_FORMAT_MOD_INVALID) {
1446       return NULL;
1447    }
1448    return si_texture_create_with_modifier(screen, templ, modifier);
1449 }
1450 
si_texture_is_aux_plane(const struct pipe_resource * resource)1451 static bool si_texture_is_aux_plane(const struct pipe_resource *resource)
1452 {
1453    return resource->flags & SI_RESOURCE_AUX_PLANE;
1454 }
1455 
si_texture_from_winsys_buffer(struct si_screen * sscreen,const struct pipe_resource * templ,struct pb_buffer * buf,unsigned stride,uint64_t offset,uint64_t modifier,unsigned usage,bool dedicated)1456 static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen,
1457                                                            const struct pipe_resource *templ,
1458                                                            struct pb_buffer *buf, unsigned stride,
1459                                                            uint64_t offset, uint64_t modifier,
1460                                                            unsigned usage, bool dedicated)
1461 {
1462    struct radeon_surf surface = {};
1463    struct radeon_bo_metadata metadata = {};
1464    struct si_texture *tex;
1465    int r;
1466 
1467    /* Ignore metadata for non-zero planes. */
1468    if (offset != 0)
1469       dedicated = false;
1470 
1471    if (dedicated) {
1472       sscreen->ws->buffer_get_metadata(sscreen->ws, buf, &metadata, &surface);
1473    } else {
1474       /**
1475        * The bo metadata is unset for un-dedicated images. So we fall
1476        * back to linear. See answer to question 5 of the
1477        * VK_KHX_external_memory spec for some details.
1478        *
1479        * It is possible that this case isn't going to work if the
1480        * surface pitch isn't correctly aligned by default.
1481        *
1482        * In order to support it correctly we require multi-image
1483        * metadata to be synchronized between radv and radeonsi. The
1484        * semantics of associating multiple image metadata to a memory
1485        * object on the vulkan export side are not concretely defined
1486        * either.
1487        *
1488        * All the use cases we are aware of at the moment for memory
1489        * objects use dedicated allocations. So lets keep the initial
1490        * implementation simple.
1491        *
1492        * A possible alternative is to attempt to reconstruct the
1493        * tiling information when the TexParameter TEXTURE_TILING_EXT
1494        * is set.
1495        */
1496       metadata.mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1497    }
1498 
1499    r = si_init_surface(sscreen, &surface, templ, metadata.mode, modifier, true,
1500                        surface.flags & RADEON_SURF_SCANOUT, false, false);
1501    if (r)
1502       return NULL;
1503 
1504    tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf,
1505                                   offset, stride, 0, 0);
1506    if (!tex)
1507       return NULL;
1508 
1509    tex->buffer.b.is_shared = true;
1510    tex->buffer.external_usage = usage;
1511    tex->num_planes = 1;
1512    if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
1513       tex->buffer.b.b.bind |= PIPE_BIND_PROTECTED;
1514 
1515    /* Account for multiple planes with lowered yuv import. */
1516    struct pipe_resource *next_plane = tex->buffer.b.b.next;
1517    while (next_plane && !si_texture_is_aux_plane(next_plane)) {
1518       struct si_texture *next_tex = (struct si_texture *)next_plane;
1519       ++next_tex->num_planes;
1520       ++tex->num_planes;
1521       next_plane = next_plane->next;
1522    }
1523 
1524    unsigned nplanes = ac_surface_get_nplanes(&tex->surface);
1525    unsigned plane = 1;
1526    while (next_plane) {
1527       struct si_auxiliary_texture *ptex = (struct si_auxiliary_texture *)next_plane;
1528       if (plane >= nplanes || ptex->buffer != tex->buffer.buf ||
1529           ptex->offset != ac_surface_get_plane_offset(sscreen->info.chip_class,
1530                                                       &tex->surface, plane, 0) ||
1531           ptex->stride != ac_surface_get_plane_stride(sscreen->info.chip_class,
1532                                                       &tex->surface, plane)) {
1533          si_texture_reference(&tex, NULL);
1534          return NULL;
1535       }
1536       ++plane;
1537       next_plane = next_plane->next;
1538    }
1539 
1540    if (plane != nplanes && tex->num_planes == 1) {
1541       si_texture_reference(&tex, NULL);
1542       return NULL;
1543    }
1544 
1545    if (!ac_surface_set_umd_metadata(&sscreen->info, &tex->surface,
1546                                     tex->buffer.b.b.nr_storage_samples,
1547                                     tex->buffer.b.b.last_level + 1,
1548                                     metadata.size_metadata,
1549                                     metadata.metadata)) {
1550       si_texture_reference(&tex, NULL);
1551       return NULL;
1552    }
1553 
1554    if (ac_surface_get_plane_offset(sscreen->info.chip_class, &tex->surface, 0, 0) +
1555         tex->surface.total_size > buf->size ||
1556        buf->alignment_log2 < tex->surface.alignment_log2) {
1557       si_texture_reference(&tex, NULL);
1558       return NULL;
1559    }
1560 
1561    /* Displayable DCC requires an explicit flush. */
1562    if (dedicated && offset == 0 && !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
1563        si_displayable_dcc_needs_explicit_flush(tex)) {
1564       /* TODO: do we need to decompress DCC? */
1565       if (si_texture_discard_dcc(sscreen, tex)) {
1566          /* Update BO metadata after disabling DCC. */
1567          si_set_tex_bo_metadata(sscreen, tex);
1568       }
1569    }
1570 
1571    assert(tex->surface.tile_swizzle == 0);
1572    return &tex->buffer.b.b;
1573 }
1574 
si_texture_from_handle(struct pipe_screen * screen,const struct pipe_resource * templ,struct winsys_handle * whandle,unsigned usage)1575 static struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen,
1576                                                     const struct pipe_resource *templ,
1577                                                     struct winsys_handle *whandle, unsigned usage)
1578 {
1579    struct si_screen *sscreen = (struct si_screen *)screen;
1580    struct pb_buffer *buf = NULL;
1581 
1582    /* Support only 2D textures without mipmaps */
1583    if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT &&
1584         templ->target != PIPE_TEXTURE_2D_ARRAY) ||
1585        templ->last_level != 0)
1586       return NULL;
1587 
1588    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, sscreen->info.max_alignment);
1589    if (!buf)
1590       return NULL;
1591 
1592    if (whandle->plane >= util_format_get_num_planes(whandle->format)) {
1593       struct si_auxiliary_texture *tex = CALLOC_STRUCT_CL(si_auxiliary_texture);
1594       if (!tex)
1595          return NULL;
1596       tex->b.b = *templ;
1597       tex->b.b.flags |= SI_RESOURCE_AUX_PLANE;
1598       tex->stride = whandle->stride;
1599       tex->offset = whandle->offset;
1600       tex->buffer = buf;
1601       pipe_reference_init(&tex->b.b.reference, 1);
1602       tex->b.b.screen = screen;
1603 
1604       return &tex->b.b;
1605    }
1606 
1607    return si_texture_from_winsys_buffer(sscreen, templ, buf, whandle->stride, whandle->offset,
1608                                         whandle->modifier, usage, true);
1609 }
1610 
si_init_flushed_depth_texture(struct pipe_context * ctx,struct pipe_resource * texture)1611 bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture)
1612 {
1613    struct si_texture *tex = (struct si_texture *)texture;
1614    struct pipe_resource resource;
1615    enum pipe_format pipe_format = texture->format;
1616 
1617    assert(!tex->flushed_depth_texture);
1618 
1619    if (!tex->can_sample_z && tex->can_sample_s) {
1620       switch (pipe_format) {
1621       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1622          /* Save memory by not allocating the S plane. */
1623          pipe_format = PIPE_FORMAT_Z32_FLOAT;
1624          break;
1625       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1626       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1627          /* Save memory bandwidth by not copying the
1628           * stencil part during flush.
1629           *
1630           * This potentially increases memory bandwidth
1631           * if an application uses both Z and S texturing
1632           * simultaneously (a flushed Z24S8 texture
1633           * would be stored compactly), but how often
1634           * does that really happen?
1635           */
1636          pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1637          break;
1638       default:;
1639       }
1640    } else if (!tex->can_sample_s && tex->can_sample_z) {
1641       assert(util_format_has_stencil(util_format_description(pipe_format)));
1642 
1643       /* DB->CB copies to an 8bpp surface don't work. */
1644       pipe_format = PIPE_FORMAT_X24S8_UINT;
1645    }
1646 
1647    memset(&resource, 0, sizeof(resource));
1648    resource.target = texture->target;
1649    resource.format = pipe_format;
1650    resource.width0 = texture->width0;
1651    resource.height0 = texture->height0;
1652    resource.depth0 = texture->depth0;
1653    resource.array_size = texture->array_size;
1654    resource.last_level = texture->last_level;
1655    resource.nr_samples = texture->nr_samples;
1656    resource.usage = PIPE_USAGE_DEFAULT;
1657    resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1658    resource.flags = texture->flags | SI_RESOURCE_FLAG_FLUSHED_DEPTH;
1659 
1660    tex->flushed_depth_texture =
1661       (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1662    if (!tex->flushed_depth_texture) {
1663       PRINT_ERR("failed to create temporary texture to hold flushed depth\n");
1664       return false;
1665    }
1666    return true;
1667 }
1668 
1669 /**
1670  * Initialize the pipe_resource descriptor to be of the same size as the box,
1671  * which is supposed to hold a subregion of the texture "orig" at the given
1672  * mipmap level.
1673  */
si_init_temp_resource_from_box(struct pipe_resource * res,struct pipe_resource * orig,const struct pipe_box * box,unsigned level,unsigned usage,unsigned flags)1674 static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pipe_resource *orig,
1675                                            const struct pipe_box *box, unsigned level,
1676                                            unsigned usage, unsigned flags)
1677 {
1678    memset(res, 0, sizeof(*res));
1679    res->format = orig->format;
1680    res->width0 = box->width;
1681    res->height0 = box->height;
1682    res->depth0 = 1;
1683    res->array_size = 1;
1684    res->usage = usage;
1685    res->flags = flags;
1686 
1687    if (flags & SI_RESOURCE_FLAG_FORCE_LINEAR && util_format_is_compressed(orig->format)) {
1688       /* Transfer resources are allocated with linear tiling, which is
1689        * not supported for compressed formats.
1690        */
1691       unsigned blocksize = util_format_get_blocksize(orig->format);
1692 
1693       if (blocksize == 8) {
1694          res->format = PIPE_FORMAT_R16G16B16A16_UINT;
1695       } else {
1696          assert(blocksize == 16);
1697          res->format = PIPE_FORMAT_R32G32B32A32_UINT;
1698       }
1699 
1700       res->width0 = util_format_get_nblocksx(orig->format, box->width);
1701       res->height0 = util_format_get_nblocksy(orig->format, box->height);
1702    }
1703 
1704    /* We must set the correct texture target and dimensions for a 3D box. */
1705    if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1706       res->target = PIPE_TEXTURE_2D_ARRAY;
1707       res->array_size = box->depth;
1708    } else {
1709       res->target = PIPE_TEXTURE_2D;
1710    }
1711 }
1712 
si_can_invalidate_texture(struct si_screen * sscreen,struct si_texture * tex,unsigned transfer_usage,const struct pipe_box * box)1713 static bool si_can_invalidate_texture(struct si_screen *sscreen, struct si_texture *tex,
1714                                       unsigned transfer_usage, const struct pipe_box *box)
1715 {
1716    return !tex->buffer.b.is_shared && !(tex->surface.flags & RADEON_SURF_IMPORTED) &&
1717           !(transfer_usage & PIPE_MAP_READ) && tex->buffer.b.b.last_level == 0 &&
1718           util_texrange_covers_whole_level(&tex->buffer.b.b, 0, box->x, box->y, box->z, box->width,
1719                                            box->height, box->depth);
1720 }
1721 
si_texture_invalidate_storage(struct si_context * sctx,struct si_texture * tex)1722 static void si_texture_invalidate_storage(struct si_context *sctx, struct si_texture *tex)
1723 {
1724    struct si_screen *sscreen = sctx->screen;
1725 
1726    /* There is no point in discarding depth and tiled buffers. */
1727    assert(!tex->is_depth);
1728    assert(tex->surface.is_linear);
1729 
1730    /* Reallocate the buffer in the same pipe_resource. */
1731    si_alloc_resource(sscreen, &tex->buffer);
1732 
1733    /* Initialize the CMASK base address (needed even without CMASK). */
1734    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1735 
1736    p_atomic_inc(&sscreen->dirty_tex_counter);
1737 
1738    sctx->num_alloc_tex_transfer_bytes += tex->surface.total_size;
1739 }
1740 
si_texture_transfer_map(struct pipe_context * ctx,struct pipe_resource * texture,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)1741 static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *texture,
1742                                      unsigned level, unsigned usage, const struct pipe_box *box,
1743                                      struct pipe_transfer **ptransfer)
1744 {
1745    struct si_context *sctx = (struct si_context *)ctx;
1746    struct si_texture *tex = (struct si_texture *)texture;
1747    struct si_transfer *trans;
1748    struct si_resource *buf;
1749    unsigned offset = 0;
1750    char *map;
1751    bool use_staging_texture = tex->buffer.flags & RADEON_FLAG_ENCRYPTED;
1752 
1753    assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR));
1754    assert(box->width && box->height && box->depth);
1755 
1756    if (tex->buffer.b.b.flags & SI_RESOURCE_AUX_PLANE)
1757       return NULL;
1758 
1759    if ((tex->buffer.flags & RADEON_FLAG_ENCRYPTED) && usage & PIPE_MAP_READ)
1760       return NULL;
1761 
1762    if (tex->is_depth) {
1763       /* Depth textures use staging unconditionally. */
1764       use_staging_texture = true;
1765    } else {
1766       /* Degrade the tile mode if we get too many transfers on APUs.
1767        * On dGPUs, the staging texture is always faster.
1768        * Only count uploads that are at least 4x4 pixels large.
1769        */
1770       if (!sctx->screen->info.has_dedicated_vram && level == 0 && box->width >= 4 &&
1771           box->height >= 4 && p_atomic_inc_return(&tex->num_level0_transfers) == 10) {
1772          bool can_invalidate = si_can_invalidate_texture(sctx->screen, tex, usage, box);
1773 
1774          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_LINEAR, can_invalidate);
1775       }
1776 
1777       /* Tiled textures need to be converted into a linear texture for CPU
1778        * access. The staging texture is always linear and is placed in GART.
1779        *
1780        * dGPU use a staging texture for VRAM, so that we don't map it and
1781        * don't relocate it to GTT.
1782        *
1783        * Reading from VRAM or GTT WC is slow, always use the staging
1784        * texture in this case.
1785        *
1786        * Use the staging texture for uploads if the underlying BO
1787        * is busy.
1788        */
1789       if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
1790           (tex->buffer.domains & RADEON_DOMAIN_VRAM && sctx->screen->info.has_dedicated_vram &&
1791            !sctx->screen->info.smart_access_memory))
1792          use_staging_texture = true;
1793       else if (usage & PIPE_MAP_READ)
1794          use_staging_texture =
1795             tex->buffer.domains & RADEON_DOMAIN_VRAM || tex->buffer.flags & RADEON_FLAG_GTT_WC;
1796       /* Write & linear only: */
1797       else if (si_cs_is_buffer_referenced(sctx, tex->buffer.buf, RADEON_USAGE_READWRITE) ||
1798                !sctx->ws->buffer_wait(sctx->ws, tex->buffer.buf, 0, RADEON_USAGE_READWRITE)) {
1799          /* It's busy. */
1800          if (si_can_invalidate_texture(sctx->screen, tex, usage, box))
1801             si_texture_invalidate_storage(sctx, tex);
1802          else
1803             use_staging_texture = true;
1804       }
1805    }
1806 
1807    trans = CALLOC_STRUCT(si_transfer);
1808    if (!trans)
1809       return NULL;
1810    pipe_resource_reference(&trans->b.b.resource, texture);
1811    trans->b.b.level = level;
1812    trans->b.b.usage = usage;
1813    trans->b.b.box = *box;
1814 
1815    if (use_staging_texture) {
1816       struct pipe_resource resource;
1817       struct si_texture *staging;
1818       unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1819       unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
1820 
1821       si_init_temp_resource_from_box(&resource, texture, box, level, bo_usage,
1822                                      bo_flags);
1823 
1824       /* Since depth-stencil textures don't support linear tiling,
1825        * blit from ZS to color and vice versa. u_blitter will do
1826        * the packing for these formats.
1827        */
1828       if (tex->is_depth)
1829          resource.format = util_blitter_get_color_format_for_zs(resource.format);
1830 
1831       /* Create the temporary texture. */
1832       staging = (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1833       if (!staging) {
1834          PRINT_ERR("failed to create temporary texture to hold untiled copy\n");
1835          goto fail_trans;
1836       }
1837       trans->staging = &staging->buffer;
1838 
1839       /* Just get the strides. */
1840       si_texture_get_offset(sctx->screen, staging, 0, NULL, &trans->b.b.stride,
1841                             &trans->b.b.layer_stride);
1842 
1843       if (usage & PIPE_MAP_READ)
1844          si_copy_to_staging_texture(ctx, trans);
1845       else
1846          usage |= PIPE_MAP_UNSYNCHRONIZED;
1847 
1848       buf = trans->staging;
1849    } else {
1850       /* the resource is mapped directly */
1851       offset = si_texture_get_offset(sctx->screen, tex, level, box, &trans->b.b.stride,
1852                                      &trans->b.b.layer_stride);
1853       buf = &tex->buffer;
1854    }
1855 
1856    /* Always unmap texture CPU mappings on 32-bit architectures, so that
1857     * we don't run out of the CPU address space.
1858     */
1859    if (sizeof(void *) == 4)
1860       usage |= RADEON_MAP_TEMPORARY;
1861 
1862    if (!(map = si_buffer_map(sctx, buf, usage)))
1863       goto fail_trans;
1864 
1865    *ptransfer = &trans->b.b;
1866    return map + offset;
1867 
1868 fail_trans:
1869    si_resource_reference(&trans->staging, NULL);
1870    pipe_resource_reference(&trans->b.b.resource, NULL);
1871    FREE(trans);
1872    return NULL;
1873 }
1874 
si_texture_transfer_unmap(struct pipe_context * ctx,struct pipe_transfer * transfer)1875 static void si_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer)
1876 {
1877    struct si_context *sctx = (struct si_context *)ctx;
1878    struct si_transfer *stransfer = (struct si_transfer *)transfer;
1879    struct pipe_resource *texture = transfer->resource;
1880    struct si_texture *tex = (struct si_texture *)texture;
1881 
1882    /* Always unmap texture CPU mappings on 32-bit architectures, so that
1883     * we don't run out of the CPU address space.
1884     */
1885    if (sizeof(void *) == 4) {
1886       struct si_resource *buf = stransfer->staging ? stransfer->staging : &tex->buffer;
1887 
1888       sctx->ws->buffer_unmap(sctx->ws, buf->buf);
1889    }
1890 
1891    if ((transfer->usage & PIPE_MAP_WRITE) && stransfer->staging)
1892       si_copy_from_staging_texture(ctx, stransfer);
1893 
1894    if (stransfer->staging) {
1895       sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size;
1896       si_resource_reference(&stransfer->staging, NULL);
1897    }
1898 
1899    /* Heuristic for {upload, draw, upload, draw, ..}:
1900     *
1901     * Flush the gfx IB if we've allocated too much texture storage.
1902     *
1903     * The idea is that we don't want to build IBs that use too much
1904     * memory and put pressure on the kernel memory manager and we also
1905     * want to make temporary and invalidated buffers go idle ASAP to
1906     * decrease the total memory usage or make them reusable. The memory
1907     * usage will be slightly higher than given here because of the buffer
1908     * cache in the winsys.
1909     *
1910     * The result is that the kernel memory manager is never a bottleneck.
1911     */
1912    if (sctx->num_alloc_tex_transfer_bytes > sctx->screen->info.gart_size / 4) {
1913       si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
1914       sctx->num_alloc_tex_transfer_bytes = 0;
1915    }
1916 
1917    pipe_resource_reference(&transfer->resource, NULL);
1918    FREE(transfer);
1919 }
1920 
1921 /* Return if it's allowed to reinterpret one format as another with DCC enabled.
1922  */
vi_dcc_formats_compatible(struct si_screen * sscreen,enum pipe_format format1,enum pipe_format format2)1923 bool vi_dcc_formats_compatible(struct si_screen *sscreen, enum pipe_format format1,
1924                                enum pipe_format format2)
1925 {
1926    const struct util_format_description *desc1, *desc2;
1927 
1928    /* No format change - exit early. */
1929    if (format1 == format2)
1930       return true;
1931 
1932    format1 = si_simplify_cb_format(format1);
1933    format2 = si_simplify_cb_format(format2);
1934 
1935    /* Check again after format adjustments. */
1936    if (format1 == format2)
1937       return true;
1938 
1939    desc1 = util_format_description(format1);
1940    desc2 = util_format_description(format2);
1941 
1942    if (desc1->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc2->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1943       return false;
1944 
1945    /* Float and non-float are totally incompatible. */
1946    if ((desc1->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) !=
1947        (desc2->channel[0].type == UTIL_FORMAT_TYPE_FLOAT))
1948       return false;
1949 
1950    /* Channel sizes must match across DCC formats.
1951     * Comparing just the first 2 channels should be enough.
1952     */
1953    if (desc1->channel[0].size != desc2->channel[0].size ||
1954        (desc1->nr_channels >= 2 && desc1->channel[1].size != desc2->channel[1].size))
1955       return false;
1956 
1957    /* Everything below is not needed if the driver never uses the DCC
1958     * clear code with the value of 1.
1959     */
1960 
1961    /* If the clear values are all 1 or all 0, this constraint can be
1962     * ignored. */
1963    if (vi_alpha_is_on_msb(sscreen, format1) != vi_alpha_is_on_msb(sscreen, format2))
1964       return false;
1965 
1966    /* Channel types must match if the clear value of 1 is used.
1967     * The type categories are only float, signed, unsigned.
1968     * NORM and INT are always compatible.
1969     */
1970    if (desc1->channel[0].type != desc2->channel[0].type ||
1971        (desc1->nr_channels >= 2 && desc1->channel[1].type != desc2->channel[1].type))
1972       return false;
1973 
1974    return true;
1975 }
1976 
vi_dcc_formats_are_incompatible(struct pipe_resource * tex,unsigned level,enum pipe_format view_format)1977 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, unsigned level,
1978                                      enum pipe_format view_format)
1979 {
1980    struct si_texture *stex = (struct si_texture *)tex;
1981 
1982    return vi_dcc_enabled(stex, level) &&
1983           !vi_dcc_formats_compatible((struct si_screen *)tex->screen, tex->format, view_format);
1984 }
1985 
1986 /* This can't be merged with the above function, because
1987  * vi_dcc_formats_compatible should be called only when DCC is enabled. */
vi_disable_dcc_if_incompatible_format(struct si_context * sctx,struct pipe_resource * tex,unsigned level,enum pipe_format view_format)1988 void vi_disable_dcc_if_incompatible_format(struct si_context *sctx, struct pipe_resource *tex,
1989                                            unsigned level, enum pipe_format view_format)
1990 {
1991    struct si_texture *stex = (struct si_texture *)tex;
1992 
1993    if (vi_dcc_formats_are_incompatible(tex, level, view_format))
1994       if (!si_texture_disable_dcc(sctx, stex))
1995          si_decompress_dcc(sctx, stex);
1996 }
1997 
si_create_surface_custom(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_surface * templ,unsigned width0,unsigned height0,unsigned width,unsigned height)1998 struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
1999                                               struct pipe_resource *texture,
2000                                               const struct pipe_surface *templ, unsigned width0,
2001                                               unsigned height0, unsigned width, unsigned height)
2002 {
2003    struct si_surface *surface = CALLOC_STRUCT(si_surface);
2004 
2005    if (!surface)
2006       return NULL;
2007 
2008    assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
2009    assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
2010 
2011    pipe_reference_init(&surface->base.reference, 1);
2012    pipe_resource_reference(&surface->base.texture, texture);
2013    surface->base.context = pipe;
2014    surface->base.format = templ->format;
2015    surface->base.width = width;
2016    surface->base.height = height;
2017    surface->base.u = templ->u;
2018 
2019    surface->width0 = width0;
2020    surface->height0 = height0;
2021 
2022    surface->dcc_incompatible =
2023       texture->target != PIPE_BUFFER &&
2024       vi_dcc_formats_are_incompatible(texture, templ->u.tex.level, templ->format);
2025    return &surface->base;
2026 }
2027 
si_create_surface(struct pipe_context * pipe,struct pipe_resource * tex,const struct pipe_surface * templ)2028 static struct pipe_surface *si_create_surface(struct pipe_context *pipe, struct pipe_resource *tex,
2029                                               const struct pipe_surface *templ)
2030 {
2031    unsigned level = templ->u.tex.level;
2032    unsigned width = u_minify(tex->width0, level);
2033    unsigned height = u_minify(tex->height0, level);
2034    unsigned width0 = tex->width0;
2035    unsigned height0 = tex->height0;
2036 
2037    if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
2038       const struct util_format_description *tex_desc = util_format_description(tex->format);
2039       const struct util_format_description *templ_desc = util_format_description(templ->format);
2040 
2041       assert(tex_desc->block.bits == templ_desc->block.bits);
2042 
2043       /* Adjust size of surface if and only if the block width or
2044        * height is changed. */
2045       if (tex_desc->block.width != templ_desc->block.width ||
2046           tex_desc->block.height != templ_desc->block.height) {
2047          unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
2048          unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
2049 
2050          width = nblks_x * templ_desc->block.width;
2051          height = nblks_y * templ_desc->block.height;
2052 
2053          width0 = util_format_get_nblocksx(tex->format, width0);
2054          height0 = util_format_get_nblocksy(tex->format, height0);
2055       }
2056    }
2057 
2058    return si_create_surface_custom(pipe, tex, templ, width0, height0, width, height);
2059 }
2060 
si_surface_destroy(struct pipe_context * pipe,struct pipe_surface * surface)2061 static void si_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surface)
2062 {
2063    pipe_resource_reference(&surface->texture, NULL);
2064    FREE(surface);
2065 }
2066 
si_translate_colorswap(enum pipe_format format,bool do_endian_swap)2067 unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
2068 {
2069    const struct util_format_description *desc = util_format_description(format);
2070 
2071 #define HAS_SWIZZLE(chan, swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
2072 
2073    if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
2074       return V_028C70_SWAP_STD;
2075 
2076    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
2077       return ~0U;
2078 
2079    switch (desc->nr_channels) {
2080    case 1:
2081       if (HAS_SWIZZLE(0, X))
2082          return V_028C70_SWAP_STD; /* X___ */
2083       else if (HAS_SWIZZLE(3, X))
2084          return V_028C70_SWAP_ALT_REV; /* ___X */
2085       break;
2086    case 2:
2087       if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) ||
2088           (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y)))
2089          return V_028C70_SWAP_STD; /* XY__ */
2090       else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) ||
2091                (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
2092                (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X)))
2093          /* YX__ */
2094          return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
2095       else if (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(3, Y))
2096          return V_028C70_SWAP_ALT; /* X__Y */
2097       else if (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(3, X))
2098          return V_028C70_SWAP_ALT_REV; /* Y__X */
2099       break;
2100    case 3:
2101       if (HAS_SWIZZLE(0, X))
2102          return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
2103       else if (HAS_SWIZZLE(0, Z))
2104          return V_028C70_SWAP_STD_REV; /* ZYX */
2105       break;
2106    case 4:
2107       /* check the middle channels, the 1st and 4th channel can be NONE */
2108       if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, Z)) {
2109          return V_028C70_SWAP_STD; /* XYZW */
2110       } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, Y)) {
2111          return V_028C70_SWAP_STD_REV; /* WZYX */
2112       } else if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, X)) {
2113          return V_028C70_SWAP_ALT; /* ZYXW */
2114       } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, W)) {
2115          /* YZWX */
2116          if (desc->is_array)
2117             return V_028C70_SWAP_ALT_REV;
2118          else
2119             return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
2120       }
2121       break;
2122    }
2123    return ~0U;
2124 }
2125 
2126 static struct pipe_memory_object *
si_memobj_from_handle(struct pipe_screen * screen,struct winsys_handle * whandle,bool dedicated)2127 si_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated)
2128 {
2129    struct si_screen *sscreen = (struct si_screen *)screen;
2130    struct si_memory_object *memobj = CALLOC_STRUCT(si_memory_object);
2131    struct pb_buffer *buf = NULL;
2132 
2133    if (!memobj)
2134       return NULL;
2135 
2136    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, sscreen->info.max_alignment);
2137    if (!buf) {
2138       free(memobj);
2139       return NULL;
2140    }
2141 
2142    memobj->b.dedicated = dedicated;
2143    memobj->buf = buf;
2144    memobj->stride = whandle->stride;
2145 
2146    return (struct pipe_memory_object *)memobj;
2147 }
2148 
si_memobj_destroy(struct pipe_screen * screen,struct pipe_memory_object * _memobj)2149 static void si_memobj_destroy(struct pipe_screen *screen, struct pipe_memory_object *_memobj)
2150 {
2151    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2152 
2153    radeon_bo_reference(((struct si_screen*)screen)->ws, &memobj->buf, NULL);
2154    free(memobj);
2155 }
2156 
si_resource_from_memobj(struct pipe_screen * screen,const struct pipe_resource * templ,struct pipe_memory_object * _memobj,uint64_t offset)2157 static struct pipe_resource *si_resource_from_memobj(struct pipe_screen *screen,
2158                                                     const struct pipe_resource *templ,
2159                                                     struct pipe_memory_object *_memobj,
2160                                                     uint64_t offset)
2161 {
2162    struct si_screen *sscreen = (struct si_screen *)screen;
2163    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2164    struct pipe_resource *res;
2165 
2166    if (templ->target == PIPE_BUFFER)
2167       res = si_buffer_from_winsys_buffer(screen, templ, memobj->buf,
2168                                          memobj->b.dedicated);
2169    else
2170       res = si_texture_from_winsys_buffer(sscreen, templ, memobj->buf,
2171                                           memobj->stride,
2172                                           offset, DRM_FORMAT_MOD_INVALID,
2173                                           PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE | PIPE_HANDLE_USAGE_SHADER_WRITE,
2174                                           memobj->b.dedicated);
2175 
2176    if (!res)
2177       return NULL;
2178 
2179    /* si_texture_from_winsys_buffer doesn't increment refcount of
2180     * memobj->buf, so increment it here.
2181     */
2182    struct pb_buffer *buf = NULL;
2183    radeon_bo_reference(sscreen->ws, &buf, memobj->buf);
2184    return res;
2185 }
2186 
si_check_resource_capability(struct pipe_screen * screen,struct pipe_resource * resource,unsigned bind)2187 static bool si_check_resource_capability(struct pipe_screen *screen, struct pipe_resource *resource,
2188                                          unsigned bind)
2189 {
2190    struct si_texture *tex = (struct si_texture *)resource;
2191 
2192    /* Buffers only support the linear flag. */
2193    if (resource->target == PIPE_BUFFER)
2194       return (bind & ~PIPE_BIND_LINEAR) == 0;
2195 
2196    if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
2197       return false;
2198 
2199    if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
2200       return false;
2201 
2202    /* TODO: PIPE_BIND_CURSOR - do we care? */
2203    return true;
2204 }
2205 
si_init_screen_texture_functions(struct si_screen * sscreen)2206 void si_init_screen_texture_functions(struct si_screen *sscreen)
2207 {
2208    sscreen->b.resource_from_handle = si_texture_from_handle;
2209    sscreen->b.resource_get_handle = si_texture_get_handle;
2210    sscreen->b.resource_get_param = si_resource_get_param;
2211    sscreen->b.resource_get_info = si_texture_get_info;
2212    sscreen->b.resource_from_memobj = si_resource_from_memobj;
2213    sscreen->b.memobj_create_from_handle = si_memobj_from_handle;
2214    sscreen->b.memobj_destroy = si_memobj_destroy;
2215    sscreen->b.check_resource_capability = si_check_resource_capability;
2216 
2217    /* By not setting it the frontend will fall back to non-modifier create,
2218     * which works around some applications using modifiers that are not
2219     * allowed in combination with lack of error reporting in
2220     * gbm_dri_surface_create */
2221    if (sscreen->info.chip_class >= GFX9 && sscreen->info.kernel_has_modifiers) {
2222       sscreen->b.resource_create_with_modifiers = si_texture_create_with_modifiers;
2223       sscreen->b.query_dmabuf_modifiers = si_query_dmabuf_modifiers;
2224       sscreen->b.is_dmabuf_modifier_supported = si_is_dmabuf_modifier_supported;
2225       sscreen->b.get_dmabuf_modifier_planes = si_get_dmabuf_modifier_planes;
2226    }
2227 }
2228 
si_init_context_texture_functions(struct si_context * sctx)2229 void si_init_context_texture_functions(struct si_context *sctx)
2230 {
2231    sctx->b.texture_map = si_texture_transfer_map;
2232    sctx->b.texture_unmap = si_texture_transfer_unmap;
2233    sctx->b.create_surface = si_create_surface;
2234    sctx->b.surface_destroy = si_surface_destroy;
2235 }
2236