1 /*
2  * Copyright © 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
14  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
16  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  */
25 
26 #ifndef AC_SURFACE_H
27 #define AC_SURFACE_H
28 
29 #include "amd_family.h"
30 #include "util/format/u_format.h"
31 
32 /* NIR is optional. Some components don't want to include NIR with ac_surface.h. */
33 #ifdef AC_SURFACE_INCLUDE_NIR
34 #include "compiler/nir/nir_builder.h"
35 #endif
36 
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
44 
45 /* Forward declarations. */
46 struct ac_addrlib;
47 
48 struct amdgpu_gpu_info;
49 struct radeon_info;
50 
51 #define RADEON_SURF_MAX_LEVELS 15
52 
53 enum radeon_surf_mode
54 {
55    RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
56    RADEON_SURF_MODE_1D = 2,
57    RADEON_SURF_MODE_2D = 3,
58 };
59 
60 /* This describes D/S/Z/R swizzle modes.
61  * Defined in the GB_TILE_MODEn.MICRO_TILE_MODE_NEW order.
62  */
63 enum radeon_micro_mode
64 {
65    RADEON_MICRO_MODE_DISPLAY = 0,
66    RADEON_MICRO_MODE_STANDARD = 1,
67    RADEON_MICRO_MODE_DEPTH = 2,
68    RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
69 };
70 
71 /* the first 16 bits are reserved for libdrm_radeon, don't use them */
72 #define RADEON_SURF_SCANOUT      (1 << 16)
73 #define RADEON_SURF_ZBUFFER      (1 << 17)
74 #define RADEON_SURF_SBUFFER      (1 << 18)
75 #define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
76 /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
77 #define RADEON_SURF_FMASK                 (1 << 21)
78 #define RADEON_SURF_DISABLE_DCC           (1ull << 22)
79 #define RADEON_SURF_TC_COMPATIBLE_HTILE   (1ull << 23)
80 #define RADEON_SURF_IMPORTED              (1ull << 24)
81 #define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1ull << 25)
82 #define RADEON_SURF_SHAREABLE             (1ull << 26)
83 #define RADEON_SURF_NO_RENDER_TARGET      (1ull << 27)
84 /* Force a swizzle mode (gfx9+) or tile mode (gfx6-8).
85  * If this is not set, optimize for space. */
86 #define RADEON_SURF_FORCE_SWIZZLE_MODE    (1ull << 28)
87 #define RADEON_SURF_NO_FMASK              (1ull << 29)
88 #define RADEON_SURF_NO_HTILE              (1ull << 30)
89 #define RADEON_SURF_FORCE_MICRO_TILE_MODE (1ull << 31)
90 #define RADEON_SURF_PRT                   (1ull << 32)
91 
92 struct legacy_surf_level {
93    uint32_t offset_256B;   /* divided by 256, the hw can only do 40-bit addresses */
94    uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
95    unsigned nblk_x : 15;
96    unsigned nblk_y : 15;
97    enum radeon_surf_mode mode : 2;
98 };
99 
100 struct legacy_surf_dcc_level {
101    uint32_t dcc_offset;    /* relative offset within DCC mip tree */
102    uint32_t dcc_fast_clear_size;
103    uint32_t dcc_slice_fast_clear_size;
104 };
105 
106 struct legacy_surf_fmask {
107    unsigned slice_tile_max; /* max 4M */
108    uint8_t tiling_index;    /* max 31 */
109    uint8_t bankh;           /* max 8 */
110    uint16_t pitch_in_pixels;
111 };
112 
113 struct legacy_surf_layout {
114    unsigned bankw : 4;               /* max 8 */
115    unsigned bankh : 4;               /* max 8 */
116    unsigned mtilea : 4;              /* max 8 */
117    unsigned tile_split : 13;         /* max 4K */
118    unsigned stencil_tile_split : 13; /* max 4K */
119    unsigned pipe_config : 5;         /* max 17 */
120    unsigned num_banks : 5;           /* max 16 */
121    unsigned macro_tile_index : 4;    /* max 15 */
122 
123    /* Whether the depth miptree or stencil miptree as used by the DB are
124     * adjusted from their TC compatible form to ensure depth/stencil
125     * compatibility. If either is true, the corresponding plane cannot be
126     * sampled from.
127     */
128    unsigned depth_adjusted : 1;
129    unsigned stencil_adjusted : 1;
130 
131    struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
132    uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
133 
134    union {
135       /* Color layout */
136       struct {
137          struct legacy_surf_dcc_level dcc_level[RADEON_SURF_MAX_LEVELS];
138          struct legacy_surf_fmask fmask;
139          unsigned cmask_slice_tile_max;
140       } color;
141 
142       /* Z/S layout */
143       struct {
144          struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
145          uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
146       } zs;
147    };
148 };
149 
150 /* Same as addrlib - AddrResourceType. */
151 enum gfx9_resource_type
152 {
153    RADEON_RESOURCE_1D = 0,
154    RADEON_RESOURCE_2D,
155    RADEON_RESOURCE_3D,
156 };
157 
158 struct gfx9_surf_meta_flags {
159    uint8_t rb_aligned : 1;   /* optimal for RBs */
160    uint8_t pipe_aligned : 1; /* optimal for TC */
161    uint8_t independent_64B_blocks : 1;
162    uint8_t independent_128B_blocks : 1;
163    uint8_t max_compressed_block_size : 2;
164    uint8_t display_equation_valid : 1;
165 };
166 
167 struct gfx9_surf_level {
168    unsigned offset;
169    unsigned size; /* the size of one level in one layer (the image is an array of layers
170                    * where each layer has an array of levels) */
171 };
172 
173 /**
174  * Meta address equation.
175  *
176  * DCC/HTILE address equation for doing DCC/HTILE address computations in shaders.
177  *
178  * ac_surface_meta_address_test.c contains the reference implementation.
179  * ac_nir_{dcc,htile}_addr_from_coord is the NIR implementation.
180  *
181  * For DCC:
182  * The gfx9 equation doesn't support mipmapping.
183  * The gfx10 equation doesn't support mipmapping and MSAA.
184  * (those are also limitations of Addr2ComputeDccAddrFromCoord)
185  *
186  * For HTILE:
187  * The gfx9 equation isn't implemented.
188  * The gfx10 equation doesn't support mipmapping.
189  */
190 struct gfx9_meta_equation {
191    uint16_t meta_block_width;
192    uint16_t meta_block_height;
193    uint16_t meta_block_depth;
194 
195    union {
196       /* The gfx9 DCC equation is chip-specific, and it varies with:
197        * - resource type
198        * - swizzle_mode
199        * - bpp
200        * - number of samples
201        * - number of fragments
202        * - pipe_aligned
203        * - rb_aligned
204        */
205       struct {
206          uint8_t num_bits;
207          uint8_t num_pipe_bits;
208 
209          struct {
210             struct {
211                uint8_t dim:3; /* 0..4 */
212                uint8_t ord:5; /* 0..31 */
213             } coord[5]; /* 0..num_coords-1 */
214          } bit[20]; /* 0..num_bits-1 */
215       } gfx9;
216 
217       /* The gfx10 DCC equation is chip-specific, it requires 64KB_R_X, and it varies with:
218        * - bpp
219        * - number of samples
220        * - number of fragments
221        * - pipe_aligned
222        *
223        * The gfx10 HTILE equation is chip-specific, it requires 64KB_Z_X, and it varies with:
224        * - number of samples
225        */
226       uint16_t gfx10_bits[60];
227    } u;
228 };
229 
230 struct gfx9_surf_layout {
231    uint16_t epitch;           /* gfx9 only, not on gfx10 */
232    uint8_t swizzle_mode;      /* color or depth */
233 
234    enum gfx9_resource_type resource_type:8; /* 1D, 2D or 3D */
235    uint16_t surf_pitch;                   /* in blocks */
236    uint16_t surf_height;
237 
238    uint64_t surf_offset; /* 0 unless imported with an offset */
239    /* The size of the 2D plane containing all mipmap levels. */
240    uint64_t surf_slice_size;
241    /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
242    uint32_t offset[RADEON_SURF_MAX_LEVELS];
243    /* Mipmap level pitch in elements. Only valid for LINEAR. */
244    uint16_t pitch[RADEON_SURF_MAX_LEVELS];
245 
246    uint16_t base_mip_width;
247    uint16_t base_mip_height;
248 
249    /* Pitch of level in blocks, only valid for prt images. */
250    uint16_t prt_level_pitch[RADEON_SURF_MAX_LEVELS];
251    /* Offset within slice in bytes, only valid for prt images. */
252    uint32_t prt_level_offset[RADEON_SURF_MAX_LEVELS];
253 
254    /* DCC or HTILE level info */
255    struct gfx9_surf_level meta_levels[RADEON_SURF_MAX_LEVELS];
256 
257    union {
258       /* Color */
259       struct {
260          struct gfx9_surf_meta_flags dcc; /* metadata of color */
261          uint8_t fmask_swizzle_mode;
262          uint16_t fmask_epitch;     /* gfx9 only, not on gfx10 */
263 
264          uint16_t dcc_pitch_max;
265          uint16_t dcc_height;
266 
267          uint8_t dcc_block_width;
268          uint8_t dcc_block_height;
269          uint8_t dcc_block_depth;
270 
271          /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
272           * The 3D engine doesn't support that layout except for chips with 1 RB.
273           * All other chips must set rb_aligned=1.
274           * A compute shader needs to convert from aligned DCC to unaligned.
275           */
276          uint8_t display_dcc_alignment_log2;
277          uint32_t display_dcc_size;
278          uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
279          uint16_t display_dcc_height;
280          bool dcc_retile_use_uint16;     /* if all values fit into uint16_t */
281          uint32_t dcc_retile_num_elements;
282          void *dcc_retile_map;
283 
284          /* CMASK level info (only level 0) */
285          struct gfx9_surf_level cmask_level0;
286 
287          /* For DCC retiling. */
288          struct gfx9_meta_equation dcc_equation; /* 2D only */
289          struct gfx9_meta_equation display_dcc_equation;
290 
291          /* For FCE compute. */
292          struct gfx9_meta_equation cmask_equation; /* 2D only */
293       } color;
294 
295       /* Z/S */
296       struct {
297          uint64_t stencil_offset; /* separate stencil */
298          uint16_t stencil_epitch;   /* gfx9 only, not on gfx10 */
299          uint8_t stencil_swizzle_mode;
300 
301          /* For HTILE VRS. */
302          struct gfx9_meta_equation htile_equation;
303       } zs;
304    };
305 };
306 
307 struct radeon_surf {
308    /* Format properties. */
309    uint8_t blk_w : 4;
310    uint8_t blk_h : 4;
311    uint8_t bpe : 5;
312    /* Display, standard(thin), depth, render(rotated). AKA D,S,Z,R swizzle modes. */
313    uint8_t micro_tile_mode : 3;
314    /* Number of mipmap levels where DCC or HTILE is enabled starting from level 0.
315     * Non-zero levels may be disabled due to alignment constraints, but not
316     * the first level.
317     */
318    uint8_t num_meta_levels : 4;
319    uint8_t is_linear : 1;
320    uint8_t has_stencil : 1;
321    /* This might be true even if micro_tile_mode isn't displayable or rotated. */
322    uint8_t is_displayable : 1;
323    uint8_t first_mip_tail_level : 4;
324 
325    /* These are return values. Some of them can be set by the caller, but
326     * they will be treated as hints (e.g. bankw, bankh) and might be
327     * changed by the calculator.
328     */
329 
330    /* Not supported yet for depth + stencil. */
331    uint16_t prt_tile_width;
332    uint16_t prt_tile_height;
333 
334    /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
335     * The value is the same for all mipmap levels. Supported tile modes:
336     * - GFX6: Only macro tiling.
337     * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
338     *   tail.
339     *
340     * Only these surfaces are allowed to set it:
341     * - color (if it doesn't have to be displayable)
342     * - DCC (same tile swizzle as color)
343     * - FMASK
344     * - CMASK if it's TC-compatible or if the gen is GFX9
345     * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
346     */
347    uint8_t tile_swizzle;
348    uint8_t fmask_tile_swizzle;
349 
350    /* Use (1 << log2) to compute the alignment. */
351    uint8_t surf_alignment_log2;
352    uint8_t fmask_alignment_log2;
353    uint8_t meta_alignment_log2; /* DCC or HTILE */
354    uint8_t cmask_alignment_log2;
355    uint8_t alignment_log2;
356 
357    /* DRM format modifier. Set to DRM_FORMAT_MOD_INVALID to have addrlib
358     * select tiling parameters instead.
359     */
360    uint64_t modifier;
361    uint64_t flags;
362 
363    uint64_t surf_size;
364    uint64_t fmask_size;
365    uint32_t fmask_slice_size; /* max 2^31 (16K * 16K * 8) */
366 
367    /* DCC and HTILE (they are very small) */
368    uint32_t meta_size;
369    uint32_t meta_slice_size;
370    uint32_t meta_pitch;
371 
372    uint32_t cmask_size;
373    uint32_t cmask_slice_size;
374    uint16_t cmask_pitch; /* GFX9+ */
375    uint16_t cmask_height; /* GFX9+ */
376 
377    /* All buffers combined. */
378    uint64_t meta_offset; /* DCC or HTILE */
379    uint64_t fmask_offset;
380    uint64_t cmask_offset;
381    uint64_t display_dcc_offset;
382    uint64_t total_size;
383 
384    union {
385       /* Return values for GFX8 and older.
386        *
387        * Some of them can be set by the caller if certain parameters are
388        * desirable. The allocator will try to obey them.
389        */
390       struct legacy_surf_layout legacy;
391 
392       /* GFX9+ return values. */
393       struct gfx9_surf_layout gfx9;
394    } u;
395 };
396 
397 struct ac_surf_info {
398    uint32_t width;
399    uint32_t height;
400    uint32_t depth;
401    uint8_t samples;         /* For Z/S: samples; For color: FMASK coverage samples */
402    uint8_t storage_samples; /* For color: allocated samples */
403    uint8_t levels;
404    uint8_t num_channels; /* heuristic for displayability */
405    uint16_t array_size;
406    uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
407    uint32_t *fmask_surf_index;
408 };
409 
410 struct ac_surf_config {
411    struct ac_surf_info info;
412    unsigned is_1d : 1;
413    unsigned is_3d : 1;
414    unsigned is_cube : 1;
415 };
416 
417 struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, uint64_t *max_alignment);
418 void ac_addrlib_destroy(struct ac_addrlib *addrlib);
419 void *ac_addrlib_get_handle(struct ac_addrlib *addrlib);
420 
421 int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
422                        const struct ac_surf_config *config, enum radeon_surf_mode mode,
423                        struct radeon_surf *surf);
424 void ac_surface_zero_dcc_fields(struct radeon_surf *surf);
425 
426 void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
427                                 uint64_t tiling_flags, enum radeon_surf_mode *mode);
428 void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
429                                 uint64_t *tiling_flags);
430 
431 bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
432                                  unsigned num_storage_samples, unsigned num_mipmap_levels,
433                                  unsigned size_metadata, const uint32_t metadata[64]);
434 void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
435                                  unsigned num_mipmap_levels, uint32_t desc[8],
436                                  unsigned *size_metadata, uint32_t metadata[64]);
437 
438 bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
439                                        unsigned num_mipmap_levels, uint64_t offset, unsigned pitch);
440 
441 struct ac_modifier_options {
442 	bool dcc; /* Whether to allow DCC. */
443 	bool dcc_retile; /* Whether to allow use of a DCC retile map. */
444 };
445 
446 bool ac_is_modifier_supported(const struct radeon_info *info,
447                               const struct ac_modifier_options *options,
448                               enum pipe_format format,
449                               uint64_t modifier);
450 bool ac_get_supported_modifiers(const struct radeon_info *info,
451                                 const struct ac_modifier_options *options,
452                                 enum pipe_format format,
453                                 unsigned *mod_count,
454                                 uint64_t *mods);
455 bool ac_modifier_has_dcc(uint64_t modifier);
456 bool ac_modifier_has_dcc_retile(uint64_t modifier);
457 bool ac_modifier_supports_dcc_image_stores(uint64_t modifier);
458 void ac_modifier_max_extent(const struct radeon_info *info,
459                             uint64_t modifier, uint32_t *width, uint32_t *height);
460 
461 unsigned ac_surface_get_nplanes(const struct radeon_surf *surf);
462 uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
463                                      const struct radeon_surf *surf,
464                                      unsigned plane, unsigned layer);
465 uint64_t ac_surface_get_plane_stride(enum chip_class chip_class,
466                                      const struct radeon_surf *surf,
467                                      unsigned plane);
468 /* Of the whole miplevel, not an individual layer */
469 uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
470                                    unsigned plane);
471 
472 void ac_surface_print_info(FILE *out, const struct radeon_info *info,
473                            const struct radeon_surf *surf);
474 
475 bool ac_surface_supports_dcc_image_stores(enum chip_class chip_class,
476                                           const struct radeon_surf *surf);
477 
478 #ifdef AC_SURFACE_INCLUDE_NIR
479 nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
480                                         unsigned bpe, struct gfx9_meta_equation *equation,
481                                         nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
482                                         nir_ssa_def *dcc_slice_size,
483                                         nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
484                                         nir_ssa_def *sample, nir_ssa_def *pipe_xor);
485 
486 nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
487                                         struct gfx9_meta_equation *equation,
488                                         nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
489                                         nir_ssa_def *cmask_slice_size,
490                                         nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
491                                         nir_ssa_def *pipe_xor,
492                                         nir_ssa_def **bit_position);
493 
494 nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
495                                           struct gfx9_meta_equation *equation,
496                                           nir_ssa_def *htile_pitch,
497                                           nir_ssa_def *htile_slice_size,
498                                           nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
499                                           nir_ssa_def *pipe_xor);
500 #endif
501 
502 #ifdef __cplusplus
503 }
504 #endif
505 
506 #endif /* AC_SURFACE_H */
507