1 /*
2  * Copyright © 2021 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /* Make the test not meaningless when asserts are disabled. */
28 #undef NDEBUG
29 
30 #include <assert.h>
31 #include <inttypes.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 
35 #include <amdgpu.h>
36 #include "drm-uapi/amdgpu_drm.h"
37 #include "drm-uapi/drm_fourcc.h"
38 
39 #include "ac_surface.h"
40 #include "util/macros.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_vector.h"
44 #include "util/mesa-sha1.h"
45 #include "addrlib/inc/addrinterface.h"
46 
47 #include "ac_surface_test_common.h"
48 
49 /*
50  * The main goal of this test is to validate that our dcc/htile addressing
51  * functions match addrlib behavior.
52  */
53 
54 /* DCC address computation without mipmapping.
55  * CMASK address computation without mipmapping and without multisampling.
56  */
gfx9_meta_addr_from_coord(const struct radeon_info * info,const struct gfx9_addr_meta_equation * eq,unsigned meta_block_width,unsigned meta_block_height,unsigned meta_block_depth,unsigned meta_pitch,unsigned meta_height,unsigned x,unsigned y,unsigned z,unsigned sample,unsigned pipe_xor,unsigned * bit_position)57 static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info,
58                                           /* Shader key inputs: */
59                                           /* equation varies with resource_type, swizzle_mode,
60                                            * bpp, number of fragments, pipe_aligned, rb_aligned */
61                                           const struct gfx9_addr_meta_equation *eq,
62                                           unsigned meta_block_width, unsigned meta_block_height,
63                                           unsigned meta_block_depth,
64                                           /* Shader inputs: */
65                                           unsigned meta_pitch, unsigned meta_height,
66                                           unsigned x, unsigned y, unsigned z,
67                                           unsigned sample, unsigned pipe_xor,
68                                           /* Shader outputs (CMASK only): */
69                                           unsigned *bit_position)
70 {
71    /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
72    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
73    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
74    unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);
75 
76    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
77    unsigned numPipeBits = eq->numPipeBits;
78    unsigned pitchInBlock = meta_pitch >> meta_block_width_log2;
79    unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock;
80 
81    unsigned xb = x >> meta_block_width_log2;
82    unsigned yb = y >> meta_block_height_log2;
83    unsigned zb = z >> meta_block_depth_log2;
84 
85    unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
86    unsigned coords[] = {x, y, z, sample, blockIndex};
87 
88    unsigned address = 0;
89    unsigned num_bits = eq->num_bits;
90    assert(num_bits <= 32);
91 
92    /* Compute the address up until the last bit that doesn't use the block index. */
93    for (unsigned b = 0; b < num_bits - 1; b++) {
94       unsigned xor = 0;
95       for (unsigned c = 0; c < 5; c++) {
96          if (eq->bit[b].coord[c].dim >= 5)
97             continue;
98 
99          assert(eq->bit[b].coord[c].ord < 32);
100          unsigned ison = (coords[eq->bit[b].coord[c].dim] >>
101                                  eq->bit[b].coord[c].ord) & 0x1;
102 
103          xor ^= ison;
104       }
105       address |= xor << b;
106    }
107 
108    /* Fill the remaining bits with the block index. */
109    unsigned last = num_bits - 1;
110    address |= (blockIndex >> eq->bit[last].coord[0].ord) << last;
111 
112    if (bit_position)
113       *bit_position = (address & 1) << 2;
114 
115    unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);
116    return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);
117 }
118 
119 /* DCC/CMASK/HTILE address computation for GFX10. */
gfx10_meta_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned blkSizeLog2,unsigned meta_pitch,unsigned meta_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)120 static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,
121                                            /* Shader key inputs: */
122                                            const uint16_t *equation,
123                                            unsigned meta_block_width, unsigned meta_block_height,
124                                            unsigned blkSizeLog2,
125                                            /* Shader inputs: */
126                                            unsigned meta_pitch, unsigned meta_slice_size,
127                                            unsigned x, unsigned y, unsigned z,
128                                            unsigned pipe_xor,
129                                            /* Shader outputs: (CMASK only) */
130                                            unsigned *bit_position)
131 {
132    /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
133    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
134    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
135 
136    unsigned coord[] = {x, y, z, 0};
137    unsigned address = 0;
138 
139    for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {
140       unsigned v = 0;
141 
142       for (unsigned c = 0; c < 4; c++) {
143          if (equation[i*4+c] != 0) {
144             unsigned mask = equation[i*4+c];
145             unsigned bits = coord[c];
146 
147             while (mask)
148                v ^= (bits >> u_bit_scan(&mask)) & 0x1;
149          }
150       }
151 
152       address |= v << i;
153    }
154 
155    unsigned blkMask = (1 << blkSizeLog2) - 1;
156    unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
157    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
158    unsigned xb = x >> meta_block_width_log2;
159    unsigned yb = y >> meta_block_height_log2;
160    unsigned pb = meta_pitch >> meta_block_width_log2;
161    unsigned blkIndex = (yb * pb) + xb;
162    unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
163 
164    if (bit_position)
165       *bit_position = (address & 1) << 2;
166 
167    return (meta_slice_size * z) +
168           (blkIndex * (1 << blkSizeLog2)) +
169           ((address >> 1) ^ pipeXor);
170 }
171 
172 /* DCC address computation without mipmapping and MSAA. */
gfx10_dcc_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned dcc_pitch,unsigned dcc_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)173 static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,
174                                           /* Shader key inputs: */
175                                           /* equation varies with bpp and pipe_aligned */
176                                           const uint16_t *equation, unsigned bpp,
177                                           unsigned meta_block_width, unsigned meta_block_height,
178                                           /* Shader inputs: */
179                                           unsigned dcc_pitch, unsigned dcc_slice_size,
180                                           unsigned x, unsigned y, unsigned z,
181                                           unsigned pipe_xor)
182 {
183    unsigned bpp_log2 = util_logbase2(bpp >> 3);
184    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
185    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
186    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;
187 
188    return gfx10_meta_addr_from_coord(info, equation,
189                                      meta_block_width, meta_block_height,
190                                      blkSizeLog2,
191                                      dcc_pitch, dcc_slice_size,
192                                      x, y, z, pipe_xor, NULL);
193 }
194 
one_dcc_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned samples,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z,unsigned start_sample)195 static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
196                                  const struct radeon_info *info, unsigned width, unsigned height,
197                                  unsigned depth, unsigned samples, unsigned bpp,
198                                  unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,
199                                  unsigned mrt_index,
200                                  unsigned start_x, unsigned start_y, unsigned start_z,
201                                  unsigned start_sample)
202 {
203    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};
204    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};
205    ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};
206    ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};
207    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};
208    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
209    ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
210 
211    dout.pMipInfo = meta_mip_info;
212 
213    /* Compute DCC info. */
214    in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;
215    in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;
216    xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;
217    xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;
218    in.bpp = din.bpp = bpp;
219    xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;
220    in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */
221    din.unalignedWidth = width;
222    din.unalignedHeight = height;
223    din.numSlices = depth;
224    din.firstMipIdInTail = 1;
225 
226    int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
227    assert(ret == ADDR_OK);
228 
229    /* Compute xor. */
230    static AddrFormat format[] = {
231       ADDR_FMT_8,
232       ADDR_FMT_16,
233       ADDR_FMT_32,
234       ADDR_FMT_32_32,
235       ADDR_FMT_32_32_32_32,
236    };
237    xin.flags.color = 1;
238    xin.flags.texture = 1;
239    xin.flags.opt4space = 1;
240    xin.flags.metaRbUnaligned = !rb_aligned;
241    xin.flags.metaPipeUnaligned = !pipe_aligned;
242    xin.format = format[util_logbase2(bpp / 8)];
243    xin.surfIndex = mrt_index;
244 
245    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
246    assert(ret == ADDR_OK);
247 
248    /* Compute addresses */
249    in.compressBlkWidth = dout.compressBlkWidth;
250    in.compressBlkHeight = dout.compressBlkHeight;
251    in.compressBlkDepth = dout.compressBlkDepth;
252    in.metaBlkWidth = dout.metaBlkWidth;
253    in.metaBlkHeight = dout.metaBlkHeight;
254    in.metaBlkDepth = dout.metaBlkDepth;
255    in.dccRamSliceSize = dout.dccRamSliceSize;
256 
257    in.mipId = 0;
258    in.pitch = dout.pitch;
259    in.height = dout.height;
260    in.pipeXor = xout.pipeBankXor;
261 
262    /* Validate that the packed gfx9_meta_equation structure can fit all fields. */
263    const struct gfx9_meta_equation eq;
264    if (info->chip_class == GFX9) {
265       /* The bit array is smaller in gfx9_meta_equation than in addrlib. */
266       assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
267    } else {
268       /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
269       for (unsigned i = 0; i < 4; i++)
270          assert(dout.equation.gfx10_bits[i] == 0);
271 
272       for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)
273          assert(dout.equation.gfx10_bits[i] == 0);
274    }
275 
276    for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {
277       for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {
278          for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {
279             for (in.sample = start_sample; in.sample < samples; in.sample++) {
280                int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);
281                if (r != ADDR_OK) {
282                   printf("%s addrlib error: %s\n", name, test);
283                   abort();
284                }
285 
286                unsigned addr;
287                if (info->chip_class == GFX9) {
288                   addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
289                                                    dout.metaBlkDepth, dout.pitch, dout.height,
290                                                    in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
291                   if (in.sample == 1) {
292                      /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */
293                      assert(addr - 1 ==
294                             gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
295                                                       dout.metaBlkDepth, dout.pitch, dout.height,
296                                                       in.x, in.y, in.slice, 0, in.pipeXor, NULL));
297                   }
298                } else {
299                   addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,
300                                                    in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,
301                                                    dout.pitch, dout.dccRamSliceSize,
302                                                    in.x, in.y, in.slice, in.pipeXor);
303                }
304 
305                if (out.addr != addr) {
306                   printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",
307                          name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);
308                   return false;
309                }
310             }
311          }
312       }
313    }
314    return true;
315 }
316 
run_dcc_address_test(const char * name,const struct radeon_info * info,bool full)317 static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)
318 {
319    unsigned total = 0;
320    unsigned fails = 0;
321    unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_R_X;
322    unsigned last_size, max_samples, min_bpp, max_bpp;
323 
324    if (full) {
325       last_size = 6*6 - 1;
326       max_samples = 8;
327       min_bpp = 8;
328       max_bpp = 128;
329    } else {
330       /* The test coverage is reduced for Gitlab CI because it timeouts. */
331       last_size = 0;
332       max_samples = 2;
333       min_bpp = 32;
334       max_bpp = 64;
335    }
336 
337 #ifdef HAVE_OPENMP
338 #pragma omp parallel for
339 #endif
340    for (unsigned size = 0; size <= last_size; size++) {
341       unsigned width = 8 + 379 * (size % 6);
342       unsigned height = 8 + 379 * ((size / 6) % 6);
343 
344       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
345       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
346 
347       unsigned local_fails = 0;
348       unsigned local_total = 0;
349 
350       for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
351          /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
352          for (unsigned samples = 1; samples <= (info->chip_class == GFX9 ? max_samples : 1); samples *= 2) {
353             for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
354                for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
355                   for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
356                      unsigned depth = 2;
357                      char test[256];
358 
359                      snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",
360                               width, height, depth, bpp, samples, rb_aligned, pipe_aligned);
361 
362                      if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,
363                                               bpp, swizzle_mode, pipe_aligned, rb_aligned, mrt_index,
364                                               0, 0, 0, 0)) {
365                      } else {
366                         local_fails++;
367                      }
368                      local_total++;
369                   }
370                }
371             }
372          }
373       }
374 
375       ac_addrlib_destroy(ac_addrlib);
376       p_atomic_add(&fails, local_fails);
377       p_atomic_add(&total, local_total);
378    }
379    printf("%16s total: %u, fail: %u\n", name, total, fails);
380 }
381 
382 /* HTILE address computation without mipmapping. */
gfx10_htile_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned htile_pitch,unsigned htile_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)383 static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,
384                                             const uint16_t *equation,
385                                             unsigned meta_block_width,
386                                             unsigned meta_block_height,
387                                             unsigned htile_pitch, unsigned htile_slice_size,
388                                             unsigned x, unsigned y, unsigned z,
389                                             unsigned pipe_xor)
390 {
391    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
392    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
393    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;
394 
395    return gfx10_meta_addr_from_coord(info, equation,
396                                      meta_block_width, meta_block_height,
397                                      blkSizeLog2,
398                                      htile_pitch, htile_slice_size,
399                                      x, y, z, pipe_xor, NULL);
400 }
401 
one_htile_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,unsigned start_x,unsigned start_y,unsigned start_z)402 static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
403                                    const struct radeon_info *info,
404                                    unsigned width, unsigned height, unsigned depth,
405                                    unsigned bpp, unsigned swizzle_mode,
406                                    unsigned start_x, unsigned start_y, unsigned start_z)
407 {
408    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
409    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
410    ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
411    ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
412    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};
413    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};
414    ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
415 
416    hout.pMipInfo = meta_mip_info;
417 
418    /* Compute HTILE info. */
419    hin.hTileFlags.pipeAligned = 1;
420    hin.hTileFlags.rbAligned = 1;
421    hin.depthFlags.depth = 1;
422    hin.depthFlags.texture = 1;
423    hin.depthFlags.opt4space = 1;
424    hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;
425    hin.unalignedWidth = in.unalignedWidth = width;
426    hin.unalignedHeight = in.unalignedHeight = height;
427    hin.numSlices = in.numSlices = depth;
428    hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */
429    hin.firstMipIdInTail = 1;
430 
431    int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
432    assert(ret == ADDR_OK);
433 
434    /* Compute xor. */
435    static AddrFormat format[] = {
436       ADDR_FMT_8, /* unused */
437       ADDR_FMT_16,
438       ADDR_FMT_32,
439    };
440    xin.flags = hin.depthFlags;
441    xin.resourceType = ADDR_RSRC_TEX_2D;
442    xin.format = format[util_logbase2(bpp / 8)];
443    xin.numFrags = xin.numSamples = in.numSamples = 1;
444 
445    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
446    assert(ret == ADDR_OK);
447 
448    in.hTileFlags = hin.hTileFlags;
449    in.depthflags = xin.flags;
450    in.bpp = bpp;
451    in.pipeXor = xout.pipeBankXor;
452 
453    for (in.x = start_x; in.x < width; in.x++) {
454       for (in.y = start_y; in.y < height; in.y++) {
455          for (in.slice = start_z; in.slice < depth; in.slice++) {
456             int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);
457             if (r != ADDR_OK) {
458                printf("%s addrlib error: %s\n", name, test);
459                abort();
460             }
461 
462             unsigned addr =
463                gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,
464                                            hout.metaBlkWidth, hout.metaBlkHeight,
465                                            hout.pitch, hout.sliceSize,
466                                            in.x, in.y, in.slice, in.pipeXor);
467             if (out.addr != addr) {
468                printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",
469                       name, test, in.x, in.y, in.slice, out.addr, addr);
470                return false;
471             }
472          }
473       }
474    }
475 
476    return true;
477 }
478 
run_htile_address_test(const char * name,const struct radeon_info * info,bool full)479 static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)
480 {
481    unsigned total = 0;
482    unsigned fails = 0;
483    unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
484 
485    /* The test coverage is reduced for Gitlab CI because it timeouts. */
486    if (!full) {
487       first_size = last_size = 0;
488    }
489 
490 #ifdef HAVE_OPENMP
491 #pragma omp parallel for
492 #endif
493    for (unsigned size = first_size; size <= last_size; size++) {
494       unsigned width = 8 + 379 * (size % 6);
495       unsigned height = 8 + 379 * (size / 6);
496 
497       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
498       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
499 
500       for (unsigned depth = 1; depth <= 2; depth *= 2) {
501          for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
502             if (one_htile_address_test(name, name, addrlib, info, width, height, depth,
503                                        bpp, ADDR_SW_64KB_Z_X, 0, 0, 0)) {
504             } else {
505                p_atomic_inc(&fails);
506             }
507             p_atomic_inc(&total);
508          }
509       }
510 
511       ac_addrlib_destroy(ac_addrlib);
512    }
513    printf("%16s total: %u, fail: %u\n", name, total, fails);
514 }
515 
516 /* CMASK address computation without mipmapping and MSAA. */
gfx10_cmask_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned cmask_pitch,unsigned cmask_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)517 static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info,
518                                             /* Shader key inputs: */
519                                             /* equation varies with bpp and pipe_aligned */
520                                             const uint16_t *equation, unsigned bpp,
521                                             unsigned meta_block_width, unsigned meta_block_height,
522                                             /* Shader inputs: */
523                                             unsigned cmask_pitch, unsigned cmask_slice_size,
524                                             unsigned x, unsigned y, unsigned z,
525                                             unsigned pipe_xor,
526                                             /* Shader outputs: */
527                                             unsigned *bit_position)
528 
529 {
530    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
531    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
532    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7;
533 
534    return gfx10_meta_addr_from_coord(info, equation,
535                                      meta_block_width, meta_block_height,
536                                      blkSizeLog2,
537                                      cmask_pitch, cmask_slice_size,
538                                      x, y, z, pipe_xor, bit_position);
539 }
540 
one_cmask_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z)541 static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
542                                    const struct radeon_info *info,
543                                    unsigned width, unsigned height, unsigned depth,
544                                    unsigned bpp, unsigned swizzle_mode,
545                                    bool pipe_aligned, bool rb_aligned, unsigned mrt_index,
546                                    unsigned start_x, unsigned start_y, unsigned start_z)
547 {
548    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)};
549    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)};
550    ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)};
551    ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)};
552    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)};
553    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
554 
555    /* Compute CMASK info. */
556    cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D;
557    cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode;
558    cin.unalignedWidth = in.unalignedWidth = width;
559    cin.unalignedHeight = in.unalignedHeight = height;
560    cin.numSlices = in.numSlices = depth;
561    cin.numMipLevels = 1;
562    cin.firstMipIdInTail = 1;
563    cin.cMaskFlags.pipeAligned = pipe_aligned;
564    cin.cMaskFlags.rbAligned = rb_aligned;
565    cin.cMaskFlags.linear = false;
566    cin.colorFlags.color = 1;
567    cin.colorFlags.texture = 1;
568    cin.colorFlags.opt4space = 1;
569    cin.colorFlags.metaRbUnaligned = !rb_aligned;
570    cin.colorFlags.metaPipeUnaligned = !pipe_aligned;
571 
572    int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout);
573    assert(ret == ADDR_OK);
574 
575    /* Compute xor. */
576    static AddrFormat format[] = {
577       ADDR_FMT_8,
578       ADDR_FMT_16,
579       ADDR_FMT_32,
580       ADDR_FMT_32_32,
581       ADDR_FMT_32_32_32_32,
582    };
583    xin.flags = cin.colorFlags;
584    xin.format = format[util_logbase2(bpp / 8)];
585    xin.surfIndex = mrt_index;
586    xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1;
587 
588    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
589    assert(ret == ADDR_OK);
590 
591    in.cMaskFlags = cin.cMaskFlags;
592    in.colorFlags = cin.colorFlags;
593    in.pipeXor = xout.pipeBankXor;
594 
595    for (in.x = start_x; in.x < width; in.x++) {
596       for (in.y = start_y; in.y < height; in.y++) {
597          for (in.slice = start_z; in.slice < depth; in.slice++) {
598             int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out);
599             if (r != ADDR_OK) {
600                printf("%s addrlib error: %s\n", name, test);
601                abort();
602             }
603 
604             unsigned addr, bit_position;
605 
606             if (info->chip_class == GFX9) {
607                addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
608                                                 cout.metaBlkWidth, cout.metaBlkHeight, 1,
609                                                 cout.pitch, cout.height,
610                                                 in.x, in.y, in.slice, 0, in.pipeXor,
611                                                 &bit_position);
612             } else {
613                addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits,
614                                                   bpp, cout.metaBlkWidth,
615                                                   cout.metaBlkHeight,
616                                                   cout.pitch, cout.sliceSize,
617                                                   in.x, in.y, in.slice,
618                                                   in.pipeXor,
619                                                   &bit_position);
620             }
621 
622             if (out.addr != addr || out.bitPosition != bit_position) {
623                printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, "
624                       "expected (bit_position) = %u, got = %u\n",
625                       name, test, in.x, in.y, in.slice, out.addr, addr,
626                       out.bitPosition, bit_position);
627                return false;
628             }
629          }
630       }
631    }
632 
633    return true;
634 }
635 
run_cmask_address_test(const char * name,const struct radeon_info * info,bool full)636 static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full)
637 {
638    unsigned total = 0;
639    unsigned fails = 0;
640    unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
641    unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
642 
643    /* The test coverage is reduced for Gitlab CI because it timeouts. */
644    if (!full) {
645       first_size = last_size = 0;
646    }
647 
648 #ifdef HAVE_OPENMP
649 #pragma omp parallel for
650 #endif
651    for (unsigned size = first_size; size <= last_size; size++) {
652       unsigned width = 8 + 379 * (size % 6);
653       unsigned height = 8 + 379 * (size / 6);
654 
655       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
656       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
657 
658       for (unsigned depth = 1; depth <= 2; depth *= 2) {
659          for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
660             for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) {
661                for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) {
662                   if (one_cmask_address_test(name, name, addrlib, info,
663                                              width, height, depth, bpp,
664                                              swizzle_mode,
665                                              pipe_aligned, rb_aligned,
666                                              0, 0, 0, 0)) {
667                   } else {
668                      p_atomic_inc(&fails);
669                   }
670                   p_atomic_inc(&total);
671                }
672             }
673          }
674       }
675 
676       ac_addrlib_destroy(ac_addrlib);
677    }
678    printf("%16s total: %u, fail: %u\n", name, total, fails);
679 }
680 
main(int argc,char ** argv)681 int main(int argc, char **argv)
682 {
683    bool full = false;
684 
685    if (argc == 2 && !strcmp(argv[1], "--full"))
686       full = true;
687    else
688       puts("Specify --full to run the full test.");
689 
690    puts("DCC:");
691    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
692       struct radeon_info info = get_radeon_info(&testcases[i]);
693 
694       run_dcc_address_test(testcases[i].name, &info, full);
695    }
696 
697    puts("HTILE:");
698    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
699       struct radeon_info info = get_radeon_info(&testcases[i]);
700 
701       /* Only GFX10+ is currently supported. */
702       if (info.chip_class < GFX10)
703          continue;
704 
705       run_htile_address_test(testcases[i].name, &info, full);
706    }
707 
708    puts("CMASK:");
709    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
710       struct radeon_info info = get_radeon_info(&testcases[i]);
711 
712       run_cmask_address_test(testcases[i].name, &info, full);
713    }
714 
715    return 0;
716 }
717