1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "anv_private.h"
31 
32 #include "common/intel_aux_map.h"
33 #include "common/intel_sample_positions.h"
34 #include "common/intel_pixel_hash.h"
35 #include "genxml/gen_macros.h"
36 #include "genxml/genX_pack.h"
37 
38 #include "vk_util.h"
39 
40 static void
genX(emit_slice_hashing_state)41 genX(emit_slice_hashing_state)(struct anv_device *device,
42                                struct anv_batch *batch)
43 {
44 #if GFX_VER == 11
45    /* Gfx11 hardware has two pixel pipes at most. */
46    for (unsigned i = 2; i < ARRAY_SIZE(device->info.ppipe_subslices); i++)
47       assert(device->info.ppipe_subslices[i] == 0);
48 
49    if (device->info.ppipe_subslices[0] == device->info.ppipe_subslices[1])
50      return;
51 
52    if (!device->slice_hash.alloc_size) {
53       unsigned size = GENX(SLICE_HASH_TABLE_length) * 4;
54       device->slice_hash =
55          anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
56 
57       const bool flip = device->info.ppipe_subslices[0] <
58                      device->info.ppipe_subslices[1];
59       struct GENX(SLICE_HASH_TABLE) table;
60       intel_compute_pixel_hash_table_3way(16, 16, 3, 3, flip, table.Entry[0]);
61 
62       GENX(SLICE_HASH_TABLE_pack)(NULL, device->slice_hash.map, &table);
63    }
64 
65    anv_batch_emit(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
66       ptr.SliceHashStatePointerValid = true;
67       ptr.SliceHashTableStatePointer = device->slice_hash.offset;
68    }
69 
70    anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) {
71       mode.SliceHashingTableEnable = true;
72    }
73 #elif GFX_VERx10 == 120
74    /* For each n calculate ppipes_of[n], equal to the number of pixel pipes
75     * present with n active dual subslices.
76     */
77    unsigned ppipes_of[3] = {};
78 
79    for (unsigned n = 0; n < ARRAY_SIZE(ppipes_of); n++) {
80       for (unsigned p = 0; p < 3; p++)
81          ppipes_of[n] += (device->info.ppipe_subslices[p] == n);
82    }
83 
84    /* Gfx12 has three pixel pipes. */
85    for (unsigned p = 3; p < ARRAY_SIZE(device->info.ppipe_subslices); p++)
86       assert(device->info.ppipe_subslices[p] == 0);
87 
88    if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
89       /* All three pixel pipes have the maximum number of active dual
90        * subslices, or there is only one active pixel pipe: Nothing to do.
91        */
92       return;
93    }
94 
95    anv_batch_emit(batch, GENX(3DSTATE_SUBSLICE_HASH_TABLE), p) {
96       p.SliceHashControl[0] = TABLE_0;
97 
98       if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
99          intel_compute_pixel_hash_table_3way(8, 16, 2, 2, 0, p.TwoWayTableEntry[0]);
100       else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
101          intel_compute_pixel_hash_table_3way(8, 16, 3, 3, 0, p.TwoWayTableEntry[0]);
102 
103       if (ppipes_of[2] == 2 && ppipes_of[1] == 1)
104          intel_compute_pixel_hash_table_3way(8, 16, 5, 4, 0, p.ThreeWayTableEntry[0]);
105       else if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
106          intel_compute_pixel_hash_table_3way(8, 16, 2, 2, 0, p.ThreeWayTableEntry[0]);
107       else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
108          intel_compute_pixel_hash_table_3way(8, 16, 3, 3, 0, p.ThreeWayTableEntry[0]);
109       else
110          unreachable("Illegal fusing.");
111    }
112 
113    anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), p) {
114       p.SubsliceHashingTableEnable = true;
115       p.SubsliceHashingTableEnableMask = true;
116    }
117 #elif GFX_VERx10 == 125
118    uint32_t ppipe_mask = 0;
119    for (unsigned p = 0; p < ARRAY_SIZE(device->info.ppipe_subslices); p++) {
120       if (device->info.ppipe_subslices[p])
121          ppipe_mask |= (1u << p);
122    }
123    assert(ppipe_mask);
124 
125    if (!device->slice_hash.alloc_size) {
126       unsigned size = GENX(SLICE_HASH_TABLE_length) * 4;
127       device->slice_hash =
128          anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
129 
130       struct GENX(SLICE_HASH_TABLE) table;
131 
132       /* Note that the hardware expects an array with 7 tables, each
133        * table is intended to specify the pixel pipe hashing behavior
134        * for every possible slice count between 2 and 8, however that
135        * doesn't actually work, among other reasons due to hardware
136        * bugs that will cause the GPU to erroneously access the table
137        * at the wrong index in some cases, so in practice all 7 tables
138        * need to be initialized to the same value.
139        */
140       for (unsigned i = 0; i < 7; i++)
141          intel_compute_pixel_hash_table_nway(16, 16, ppipe_mask, table.Entry[i][0]);
142 
143       GENX(SLICE_HASH_TABLE_pack)(NULL, device->slice_hash.map, &table);
144    }
145 
146    anv_batch_emit(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
147       ptr.SliceHashStatePointerValid = true;
148       ptr.SliceHashTableStatePointer = device->slice_hash.offset;
149    }
150 
151    anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) {
152       mode.SliceHashingTableEnable = true;
153       mode.SliceHashingTableEnableMask = true;
154       mode.CrossSliceHashingMode = (util_bitcount(ppipe_mask) > 1 ?
155 				    hashing32x32 : NormalMode);
156       mode.CrossSliceHashingModeMask = -1;
157    }
158 #endif
159 }
160 
161 static VkResult
init_render_queue_state(struct anv_queue * queue)162 init_render_queue_state(struct anv_queue *queue)
163 {
164    struct anv_device *device = queue->device;
165    uint32_t cmds[128];
166    struct anv_batch batch = {
167       .start = cmds,
168       .next = cmds,
169       .end = (void *) cmds + sizeof(cmds),
170    };
171 
172    anv_batch_emit(&batch, GENX(PIPELINE_SELECT), ps) {
173 #if GFX_VER >= 9
174       ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
175       ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
176 #endif
177       ps.PipelineSelection = _3D;
178    }
179 
180 #if GFX_VER == 9
181    anv_batch_write_reg(&batch, GENX(CACHE_MODE_1), cm1) {
182       cm1.FloatBlendOptimizationEnable = true;
183       cm1.FloatBlendOptimizationEnableMask = true;
184       cm1.MSCRAWHazardAvoidanceBit = true;
185       cm1.MSCRAWHazardAvoidanceBitMask = true;
186       cm1.PartialResolveDisableInVC = true;
187       cm1.PartialResolveDisableInVCMask = true;
188    }
189 #endif
190 
191 #if GFX_VERx10 >= 125
192    /* GEN:BUG:1607854226:
193     *
194     *  Non-pipelined state has issues with not applying in MEDIA/GPGPU mode.
195     *  Fortunately, we always start the context off in 3D mode.
196     */
197    uint32_t mocs = device->isl_dev.mocs.internal;
198    anv_batch_emit(&batch, GENX(STATE_BASE_ADDRESS), sba) {
199       sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
200       sba.GeneralStateBufferSize  = 0xfffff;
201       sba.GeneralStateMOCS = mocs;
202       sba.GeneralStateBaseAddressModifyEnable = true;
203       sba.GeneralStateBufferSizeModifyEnable = true;
204 
205       sba.StatelessDataPortAccessMOCS = mocs;
206 
207       sba.SurfaceStateBaseAddress =
208          (struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS };
209       sba.SurfaceStateMOCS = mocs;
210       sba.SurfaceStateBaseAddressModifyEnable = true;
211 
212       sba.DynamicStateBaseAddress =
213          (struct anv_address) { .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS };
214       sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
215       sba.DynamicStateMOCS = mocs;
216       sba.DynamicStateBaseAddressModifyEnable = true;
217       sba.DynamicStateBufferSizeModifyEnable = true;
218 
219       sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 };
220       sba.IndirectObjectBufferSize = 0xfffff;
221       sba.IndirectObjectMOCS = mocs;
222       sba.IndirectObjectBaseAddressModifyEnable = true;
223       sba.IndirectObjectBufferSizeModifyEnable = true;
224 
225       sba.InstructionBaseAddress =
226          (struct anv_address) { .offset = INSTRUCTION_STATE_POOL_MIN_ADDRESS };
227       sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
228       sba.InstructionMOCS = mocs;
229       sba.InstructionBaseAddressModifyEnable = true;
230       sba.InstructionBuffersizeModifyEnable = true;
231 
232       sba.BindlessSurfaceStateBaseAddress =
233          (struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS };
234       sba.BindlessSurfaceStateSize = (1 << 20) - 1;
235       sba.BindlessSurfaceStateMOCS = mocs;
236       sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
237 
238       sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
239       sba.BindlessSamplerStateMOCS = mocs;
240       sba.BindlessSamplerStateBaseAddressModifyEnable = true;
241       sba.BindlessSamplerStateBufferSize = 0;
242    }
243 #endif
244 
245    anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa);
246 
247    anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
248       rect.ClippedDrawingRectangleYMin = 0;
249       rect.ClippedDrawingRectangleXMin = 0;
250       rect.ClippedDrawingRectangleYMax = UINT16_MAX;
251       rect.ClippedDrawingRectangleXMax = UINT16_MAX;
252       rect.DrawingRectangleOriginY = 0;
253       rect.DrawingRectangleOriginX = 0;
254    }
255 
256 #if GFX_VER >= 8
257    anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck);
258 
259    genX(emit_sample_pattern)(&batch, 0, NULL);
260 
261    /* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the
262     * section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer
263     * Clear." It mentions that the packet overrides GPU state for the clear
264     * operation and needs to be reset to 0s to clear the overrides. Depending
265     * on the kernel, we may not get a context with the state for this packet
266     * zeroed. Do it ourselves just in case. We've observed this to prevent a
267     * number of GPU hangs on ICL.
268     */
269    anv_batch_emit(&batch, GENX(3DSTATE_WM_HZ_OP), hzp);
270 #endif
271 
272 #if GFX_VER == 11
273    /* The default behavior of bit 5 "Headerless Message for Pre-emptable
274     * Contexts" in SAMPLER MODE register is set to 0, which means
275     * headerless sampler messages are not allowed for pre-emptable
276     * contexts. Set the bit 5 to 1 to allow them.
277     */
278    anv_batch_write_reg(&batch, GENX(SAMPLER_MODE), sm) {
279       sm.HeaderlessMessageforPreemptableContexts = true;
280       sm.HeaderlessMessageforPreemptableContextsMask = true;
281    }
282 
283    /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
284     * HALF_SLICE_CHICKEN7 register.
285     */
286    anv_batch_write_reg(&batch, GENX(HALF_SLICE_CHICKEN7), hsc7) {
287       hsc7.EnabledTexelOffsetPrecisionFix = true;
288       hsc7.EnabledTexelOffsetPrecisionFixMask = true;
289    }
290 
291    anv_batch_write_reg(&batch, GENX(TCCNTLREG), tcc) {
292       tcc.L3DataPartialWriteMergingEnable = true;
293       tcc.ColorZPartialWriteMergingEnable = true;
294       tcc.URBPartialWriteMergingEnable = true;
295       tcc.TCDisable = true;
296    }
297 #endif
298    genX(emit_slice_hashing_state)(device, &batch);
299 
300 #if GFX_VER >= 11
301    /* hardware specification recommends disabling repacking for
302     * the compatibility with decompression mechanism in display controller.
303     */
304    if (device->info.disable_ccs_repack) {
305       anv_batch_write_reg(&batch, GENX(CACHE_MODE_0), cm0) {
306          cm0.DisableRepackingforCompression = true;
307          cm0.DisableRepackingforCompressionMask = true;
308       }
309    }
310 
311    /* an unknown issue is causing vs push constants to become
312     * corrupted during object-level preemption. For now, restrict
313     * to command buffer level preemption to avoid rendering
314     * corruption.
315     */
316    anv_batch_write_reg(&batch, GENX(CS_CHICKEN1), cc1) {
317       cc1.ReplayMode = MidcmdbufferPreemption;
318       cc1.ReplayModeMask = true;
319    }
320 
321 #if GFX_VERx10 < 125
322 #define AA_LINE_QUALITY_REG GENX(3D_CHICKEN3)
323 #else
324 #define AA_LINE_QUALITY_REG GENX(CHICKEN_RASTER_1)
325 #endif
326 
327    /* Enable the new line drawing algorithm that produces higher quality
328     * lines.
329     */
330    anv_batch_write_reg(&batch, AA_LINE_QUALITY_REG, c3) {
331       c3.AALineQualityFix = true;
332       c3.AALineQualityFixMask = true;
333    }
334 #endif
335 
336 #if GFX_VER == 12
337    if (device->info.has_aux_map) {
338       uint64_t aux_base_addr = intel_aux_map_get_base(device->aux_map_ctx);
339       assert(aux_base_addr % (32 * 1024) == 0);
340       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
341          lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num);
342          lri.DataDWord = aux_base_addr & 0xffffffff;
343       }
344       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
345          lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num) + 4;
346          lri.DataDWord = aux_base_addr >> 32;
347       }
348    }
349 #endif
350 
351    /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
352     * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
353     *
354     * This is only safe on kernels with context isolation support.
355     */
356    if (GFX_VER >= 8 && device->physical->has_context_isolation) {
357 #if GFX_VER >= 9
358       anv_batch_write_reg(&batch, GENX(CS_DEBUG_MODE2), csdm2) {
359          csdm2.CONSTANT_BUFFERAddressOffsetDisable = true;
360          csdm2.CONSTANT_BUFFERAddressOffsetDisableMask = true;
361       }
362 #elif GFX_VER == 8
363       anv_batch_write_reg(&batch, GENX(INSTPM), instpm) {
364          instpm.CONSTANT_BUFFERAddressOffsetDisable = true;
365          instpm.CONSTANT_BUFFERAddressOffsetDisableMask = true;
366       }
367 #endif
368    }
369 
370 #if GFX_VER >= 11
371    /* Starting with GFX version 11, SLM is no longer part of the L3$ config
372     * so it never changes throughout the lifetime of the VkDevice.
373     */
374    const struct intel_l3_config *cfg = intel_get_default_l3_config(&device->info);
375    genX(emit_l3_config)(&batch, device, cfg);
376    device->l3_config = cfg;
377 #endif
378 
379    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
380 
381    assert(batch.next <= batch.end);
382 
383    return anv_queue_submit_simple_batch(queue, &batch);
384 }
385 
386 void
genX(init_physical_device_state)387 genX(init_physical_device_state)(ASSERTED struct anv_physical_device *device)
388 {
389    assert(device->info.verx10 == GFX_VERx10);
390 }
391 
392 VkResult
genX(init_device_state)393 genX(init_device_state)(struct anv_device *device)
394 {
395    VkResult res;
396 
397    device->slice_hash = (struct anv_state) { 0 };
398    for (uint32_t i = 0; i < device->queue_count; i++) {
399       struct anv_queue *queue = &device->queues[i];
400       switch (queue->family->engine_class) {
401       case I915_ENGINE_CLASS_RENDER:
402          res = init_render_queue_state(queue);
403          break;
404       default:
405          res = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
406          break;
407       }
408       if (res != VK_SUCCESS)
409          return res;
410    }
411 
412    return res;
413 }
414 
415 #if GFX_VERx10 >= 125
416 #define maybe_for_each_shading_rate_op(name) \
417    for (VkFragmentShadingRateCombinerOpKHR name = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; \
418         name <= VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR; \
419         name++)
420 #elif GFX_VER >= 12
421 #define maybe_for_each_shading_rate_op(name)
422 #endif
423 
424 /* Rather than reemitting the CPS_STATE structure everything those changes and
425  * for as many viewports as needed, we can just prepare all possible cases and
426  * just pick the right offset from the prepacked states when needed.
427  */
428 void
genX(init_cps_device_state)429 genX(init_cps_device_state)(struct anv_device *device)
430 {
431 #if GFX_VER >= 12
432    void *cps_state_ptr = device->cps_states.map;
433 
434    /* Disabled CPS mode */
435    for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) {
436       struct GENX(CPS_STATE) cps_state = {
437          .CoarsePixelShadingMode = CPS_MODE_CONSTANT,
438          .MinCPSizeX = 1,
439          .MinCPSizeY = 1,
440 #if GFX_VERx10 >= 125
441          .Combiner0OpcodeforCPsize = PASSTHROUGH,
442          .Combiner1OpcodeforCPsize = PASSTHROUGH,
443 #endif /* GFX_VERx10 >= 125 */
444 
445       };
446 
447       GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state);
448       cps_state_ptr += GENX(CPS_STATE_length) * 4;
449    }
450 
451    maybe_for_each_shading_rate_op(op0) {
452       maybe_for_each_shading_rate_op(op1) {
453          for (uint32_t x = 1; x <= 4; x *= 2) {
454             for (uint32_t y = 1; y <= 4; y *= 2) {
455                struct GENX(CPS_STATE) cps_state = {
456                   .CoarsePixelShadingMode = CPS_MODE_CONSTANT,
457                   .MinCPSizeX = x,
458                   .MinCPSizeY = y,
459                };
460 
461 #if GFX_VERx10 >= 125
462                static const uint32_t combiner_ops[] = {
463                   [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR]    = PASSTHROUGH,
464                   [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = OVERRIDE,
465                   [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR]     = HIGH_QUALITY,
466                   [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR]     = LOW_QUALITY,
467                   [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR]     = RELATIVE,
468                };
469 
470                cps_state.Combiner0OpcodeforCPsize = combiner_ops[op0];
471                cps_state.Combiner1OpcodeforCPsize = combiner_ops[op1];
472 #endif /* GFX_VERx10 >= 125 */
473 
474                for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) {
475                   GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state);
476                   cps_state_ptr += GENX(CPS_STATE_length) * 4;
477                }
478             }
479          }
480       }
481    }
482 #endif /* GFX_VER >= 12 */
483 }
484 
485 #if GFX_VER >= 12
486 static uint32_t
get_cps_state_offset(struct anv_device * device,bool cps_enabled,const struct anv_dynamic_state * d)487 get_cps_state_offset(struct anv_device *device, bool cps_enabled,
488                      const struct anv_dynamic_state *d)
489 {
490    if (!cps_enabled)
491       return device->cps_states.offset;
492 
493    uint32_t offset;
494    static const uint32_t size_index[] = {
495       [1] = 0,
496       [2] = 1,
497       [4] = 2,
498    };
499 
500 #if GFX_VERx10 >= 125
501    offset =
502       1 + /* skip disabled */
503       d->fragment_shading_rate.ops[0] * 5 * 3 * 3 +
504       d->fragment_shading_rate.ops[1] * 3 * 3 +
505       size_index[d->fragment_shading_rate.rate.width] * 3 +
506       size_index[d->fragment_shading_rate.rate.height];
507 #else
508    offset =
509       1 + /* skip disabled */
510       size_index[d->fragment_shading_rate.rate.width] * 3 +
511       size_index[d->fragment_shading_rate.rate.height];
512 #endif
513 
514    offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
515 
516    return device->cps_states.offset + offset;
517 }
518 #endif /* GFX_VER >= 12 */
519 
520 void
genX(emit_l3_config)521 genX(emit_l3_config)(struct anv_batch *batch,
522                      const struct anv_device *device,
523                      const struct intel_l3_config *cfg)
524 {
525    UNUSED const struct intel_device_info *devinfo = &device->info;
526 
527 #if GFX_VER >= 8
528 
529 #if GFX_VER >= 12
530 #define L3_ALLOCATION_REG GENX(L3ALLOC)
531 #define L3_ALLOCATION_REG_num GENX(L3ALLOC_num)
532 #else
533 #define L3_ALLOCATION_REG GENX(L3CNTLREG)
534 #define L3_ALLOCATION_REG_num GENX(L3CNTLREG_num)
535 #endif
536 
537    anv_batch_write_reg(batch, L3_ALLOCATION_REG, l3cr) {
538       if (cfg == NULL) {
539 #if GFX_VER >= 12
540          l3cr.L3FullWayAllocationEnable = true;
541 #else
542          unreachable("Invalid L3$ config");
543 #endif
544       } else {
545 #if GFX_VER < 11
546          l3cr.SLMEnable = cfg->n[INTEL_L3P_SLM];
547 #endif
548 #if GFX_VER == 11
549          /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be
550           * set in L3CNTLREG register. The default setting of the bit is not
551           * the desirable behavior.
552           */
553          l3cr.ErrorDetectionBehaviorControl = true;
554          l3cr.UseFullWays = true;
555 #endif /* GFX_VER == 11 */
556          assert(cfg->n[INTEL_L3P_IS] == 0);
557          assert(cfg->n[INTEL_L3P_C] == 0);
558          assert(cfg->n[INTEL_L3P_T] == 0);
559          l3cr.URBAllocation = cfg->n[INTEL_L3P_URB];
560          l3cr.ROAllocation = cfg->n[INTEL_L3P_RO];
561          l3cr.DCAllocation = cfg->n[INTEL_L3P_DC];
562          l3cr.AllAllocation = cfg->n[INTEL_L3P_ALL];
563       }
564    }
565 
566 #else /* GFX_VER < 8 */
567 
568    const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
569    const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
570                        cfg->n[INTEL_L3P_ALL];
571    const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
572                       cfg->n[INTEL_L3P_ALL];
573    const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
574                       cfg->n[INTEL_L3P_ALL];
575 
576    assert(!cfg->n[INTEL_L3P_ALL]);
577 
578    /* When enabled SLM only uses a portion of the L3 on half of the banks,
579     * the matching space on the remaining banks has to be allocated to a
580     * client (URB for all validated configurations) set to the
581     * lower-bandwidth 2-bank address hashing mode.
582     */
583    const bool urb_low_bw = cfg->n[INTEL_L3P_SLM] && devinfo->platform != INTEL_PLATFORM_BYT;
584    assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
585 
586    /* Minimum number of ways that can be allocated to the URB. */
587    const unsigned n0_urb = devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0;
588    assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
589 
590    anv_batch_write_reg(batch, GENX(L3SQCREG1), l3sqc) {
591       l3sqc.ConvertDC_UC = !has_dc;
592       l3sqc.ConvertIS_UC = !has_is;
593       l3sqc.ConvertC_UC = !has_c;
594       l3sqc.ConvertT_UC = !has_t;
595 #if GFX_VERx10 == 75
596       l3sqc.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT;
597 #else
598       l3sqc.L3SQGeneralPriorityCreditInitialization =
599          devinfo->platform == INTEL_PLATFORM_BYT ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT;
600 #endif
601       l3sqc.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT;
602    }
603 
604    anv_batch_write_reg(batch, GENX(L3CNTLREG2), l3cr2) {
605       l3cr2.SLMEnable = cfg->n[INTEL_L3P_SLM];
606       l3cr2.URBLowBandwidth = urb_low_bw;
607       l3cr2.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb;
608 #if !GFX_VERx10 == 75
609       l3cr2.ALLAllocation = cfg->n[INTEL_L3P_ALL];
610 #endif
611       l3cr2.ROAllocation = cfg->n[INTEL_L3P_RO];
612       l3cr2.DCAllocation = cfg->n[INTEL_L3P_DC];
613    }
614 
615    anv_batch_write_reg(batch, GENX(L3CNTLREG3), l3cr3) {
616       l3cr3.ISAllocation = cfg->n[INTEL_L3P_IS];
617       l3cr3.ISLowBandwidth = 0;
618       l3cr3.CAllocation = cfg->n[INTEL_L3P_C];
619       l3cr3.CLowBandwidth = 0;
620       l3cr3.TAllocation = cfg->n[INTEL_L3P_T];
621       l3cr3.TLowBandwidth = 0;
622    }
623 
624 #if GFX_VERx10 == 75
625    if (device->physical->cmd_parser_version >= 4) {
626       /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
627        * them disabled to avoid crashing the system hard.
628        */
629       anv_batch_write_reg(batch, GENX(SCRATCH1), s1) {
630          s1.L3AtomicDisable = !has_dc;
631       }
632       anv_batch_write_reg(batch, GENX(CHICKEN3), c3) {
633          c3.L3AtomicDisableMask = true;
634          c3.L3AtomicDisable = !has_dc;
635       }
636    }
637 #endif /* GFX_VERx10 == 75 */
638 
639 #endif /* GFX_VER < 8 */
640 }
641 
642 void
genX(emit_multisample)643 genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
644                        const VkSampleLocationEXT *locations)
645 {
646    anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
647       ms.NumberofMultisamples       = __builtin_ffs(samples) - 1;
648 
649       ms.PixelLocation              = CENTER;
650 #if GFX_VER >= 8
651       /* The PRM says that this bit is valid only for DX9:
652        *
653        *    SW can choose to set this bit only for DX9 API. DX10/OGL API's
654        *    should not have any effect by setting or not setting this bit.
655        */
656       ms.PixelPositionOffsetEnable  = false;
657 #else
658 
659       if (locations) {
660          switch (samples) {
661          case 1:
662             INTEL_SAMPLE_POS_1X_ARRAY(ms.Sample, locations);
663             break;
664          case 2:
665             INTEL_SAMPLE_POS_2X_ARRAY(ms.Sample, locations);
666             break;
667          case 4:
668             INTEL_SAMPLE_POS_4X_ARRAY(ms.Sample, locations);
669             break;
670          case 8:
671             INTEL_SAMPLE_POS_8X_ARRAY(ms.Sample, locations);
672             break;
673          default:
674             break;
675          }
676       } else {
677          switch (samples) {
678          case 1:
679             INTEL_SAMPLE_POS_1X(ms.Sample);
680             break;
681          case 2:
682             INTEL_SAMPLE_POS_2X(ms.Sample);
683             break;
684          case 4:
685             INTEL_SAMPLE_POS_4X(ms.Sample);
686             break;
687          case 8:
688             INTEL_SAMPLE_POS_8X(ms.Sample);
689             break;
690          default:
691             break;
692          }
693       }
694 #endif
695    }
696 }
697 
698 #if GFX_VER >= 8
699 void
genX(emit_sample_pattern)700 genX(emit_sample_pattern)(struct anv_batch *batch, uint32_t samples,
701                           const VkSampleLocationEXT *locations)
702 {
703    /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
704     * VkPhysicalDeviceFeatures::standardSampleLocations.
705     */
706    anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
707       if (locations) {
708          /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:
709           *
710           *    "When programming the sample offsets (for NUMSAMPLES_4 or _8
711           *    and MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3
712           *    (or 7 for 8X, or 15 for 16X) must have monotonically increasing
713           *    distance from the pixel center. This is required to get the
714           *    correct centroid computation in the device."
715           *
716           * However, the Vulkan spec seems to require that the the samples
717           * occur in the order provided through the API. The standard sample
718           * patterns have the above property that they have monotonically
719           * increasing distances from the center but client-provided ones do
720           * not. As long as this only affects centroid calculations as the
721           * docs say, we should be ok because OpenGL and Vulkan only require
722           * that the centroid be some lit sample and that it's the same for
723           * all samples in a pixel; they have no requirement that it be the
724           * one closest to center.
725           */
726          switch (samples) {
727          case 1:
728             INTEL_SAMPLE_POS_1X_ARRAY(sp._1xSample, locations);
729             break;
730          case 2:
731             INTEL_SAMPLE_POS_2X_ARRAY(sp._2xSample, locations);
732             break;
733          case 4:
734             INTEL_SAMPLE_POS_4X_ARRAY(sp._4xSample, locations);
735             break;
736          case 8:
737             INTEL_SAMPLE_POS_8X_ARRAY(sp._8xSample, locations);
738             break;
739 #if GFX_VER >= 9
740          case 16:
741             INTEL_SAMPLE_POS_16X_ARRAY(sp._16xSample, locations);
742             break;
743 #endif
744          default:
745             break;
746          }
747       } else {
748          INTEL_SAMPLE_POS_1X(sp._1xSample);
749          INTEL_SAMPLE_POS_2X(sp._2xSample);
750          INTEL_SAMPLE_POS_4X(sp._4xSample);
751          INTEL_SAMPLE_POS_8X(sp._8xSample);
752 #if GFX_VER >= 9
753          INTEL_SAMPLE_POS_16X(sp._16xSample);
754 #endif
755       }
756    }
757 }
758 #endif
759 
760 #if GFX_VER >= 11
761 void
genX(emit_shading_rate)762 genX(emit_shading_rate)(struct anv_batch *batch,
763                         const struct anv_graphics_pipeline *pipeline,
764                         struct anv_dynamic_state *dynamic_state)
765 {
766    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
767    const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
768 
769 #if GFX_VER == 11
770    anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
771       cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE;
772       if (cps_enable) {
773          cps.MinCPSizeX = dynamic_state->fragment_shading_rate.rate.width;
774          cps.MinCPSizeY = dynamic_state->fragment_shading_rate.rate.height;
775       }
776    }
777 #elif GFX_VER >= 12
778    /* TODO: we can optimize this flush in the following cases:
779     *
780     *    In the case where the last geometry shader emits a value that is not
781     *    constant, we can avoid this stall because we can synchronize the
782     *    pixel shader internally with
783     *    3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
784     *
785     *    If we know that the previous pipeline and the current one are using
786     *    the same fragment shading rate.
787     */
788    anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
789 #if GFX_VERx10 >= 125
790       pc.PSSStallSyncEnable = true;
791 #else
792       pc.PSDSyncEnable = true;
793 #endif
794    }
795 
796    anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) {
797       struct anv_device *device = pipeline->base.device;
798 
799       cps.CoarsePixelShadingStateArrayPointer =
800          get_cps_state_offset(device, cps_enable, dynamic_state);
801    }
802 #endif
803 }
804 #endif /* GFX_VER >= 11 */
805 
806 static uint32_t
vk_to_intel_tex_filter(VkFilter filter,bool anisotropyEnable)807 vk_to_intel_tex_filter(VkFilter filter, bool anisotropyEnable)
808 {
809    switch (filter) {
810    default:
811       assert(!"Invalid filter");
812    case VK_FILTER_NEAREST:
813       return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_NEAREST;
814    case VK_FILTER_LINEAR:
815       return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR;
816    }
817 }
818 
819 static uint32_t
vk_to_intel_max_anisotropy(float ratio)820 vk_to_intel_max_anisotropy(float ratio)
821 {
822    return (anv_clamp_f(ratio, 2, 16) - 2) / 2;
823 }
824 
825 static const uint32_t vk_to_intel_mipmap_mode[] = {
826    [VK_SAMPLER_MIPMAP_MODE_NEAREST]          = MIPFILTER_NEAREST,
827    [VK_SAMPLER_MIPMAP_MODE_LINEAR]           = MIPFILTER_LINEAR
828 };
829 
830 static const uint32_t vk_to_intel_tex_address[] = {
831    [VK_SAMPLER_ADDRESS_MODE_REPEAT]          = TCM_WRAP,
832    [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR,
833    [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE]   = TCM_CLAMP,
834    [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
835    [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
836 };
837 
838 /* Vulkan specifies the result of shadow comparisons as:
839  *     1     if   ref <op> texel,
840  *     0     otherwise.
841  *
842  * The hardware does:
843  *     0     if texel <op> ref,
844  *     1     otherwise.
845  *
846  * So, these look a bit strange because there's both a negation
847  * and swapping of the arguments involved.
848  */
849 static const uint32_t vk_to_intel_shadow_compare_op[] = {
850    [VK_COMPARE_OP_NEVER]                        = PREFILTEROP_ALWAYS,
851    [VK_COMPARE_OP_LESS]                         = PREFILTEROP_LEQUAL,
852    [VK_COMPARE_OP_EQUAL]                        = PREFILTEROP_NOTEQUAL,
853    [VK_COMPARE_OP_LESS_OR_EQUAL]                = PREFILTEROP_LESS,
854    [VK_COMPARE_OP_GREATER]                      = PREFILTEROP_GEQUAL,
855    [VK_COMPARE_OP_NOT_EQUAL]                    = PREFILTEROP_EQUAL,
856    [VK_COMPARE_OP_GREATER_OR_EQUAL]             = PREFILTEROP_GREATER,
857    [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROP_NEVER,
858 };
859 
860 #if GFX_VER >= 9
861 static const uint32_t vk_to_intel_sampler_reduction_mode[] = {
862    [VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT] = STD_FILTER,
863    [VK_SAMPLER_REDUCTION_MODE_MIN_EXT]              = MINIMUM,
864    [VK_SAMPLER_REDUCTION_MODE_MAX_EXT]              = MAXIMUM,
865 };
866 #endif
867 
genX(CreateSampler)868 VkResult genX(CreateSampler)(
869     VkDevice                                    _device,
870     const VkSamplerCreateInfo*                  pCreateInfo,
871     const VkAllocationCallbacks*                pAllocator,
872     VkSampler*                                  pSampler)
873 {
874    ANV_FROM_HANDLE(anv_device, device, _device);
875    struct anv_sampler *sampler;
876 
877    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
878 
879    sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler),
880                               VK_OBJECT_TYPE_SAMPLER);
881    if (!sampler)
882       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
883 
884    sampler->n_planes = 1;
885 
886    uint32_t border_color_stride = GFX_VERx10 == 75 ? 512 : 64;
887    uint32_t border_color_offset;
888    ASSERTED bool has_custom_color = false;
889    if (pCreateInfo->borderColor <= VK_BORDER_COLOR_INT_OPAQUE_WHITE) {
890       border_color_offset = device->border_colors.offset +
891                             pCreateInfo->borderColor *
892                             border_color_stride;
893    } else {
894       assert(GFX_VER >= 8);
895       sampler->custom_border_color =
896          anv_state_reserved_pool_alloc(&device->custom_border_colors);
897       border_color_offset = sampler->custom_border_color.offset;
898    }
899 
900 #if GFX_VER >= 9
901    unsigned sampler_reduction_mode = STD_FILTER;
902    bool enable_sampler_reduction = false;
903 #endif
904 
905    vk_foreach_struct(ext, pCreateInfo->pNext) {
906       switch (ext->sType) {
907       case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO: {
908          VkSamplerYcbcrConversionInfo *pSamplerConversion =
909             (VkSamplerYcbcrConversionInfo *) ext;
910          ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion,
911                          pSamplerConversion->conversion);
912 
913          /* Ignore conversion for non-YUV formats. This fulfills a requirement
914           * for clients that want to utilize same code path for images with
915           * external formats (VK_FORMAT_UNDEFINED) and "regular" RGBA images
916           * where format is known.
917           */
918          if (conversion == NULL || !conversion->format->can_ycbcr)
919             break;
920 
921          sampler->n_planes = conversion->format->n_planes;
922          sampler->conversion = conversion;
923          break;
924       }
925 #if GFX_VER >= 9
926       case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO: {
927          VkSamplerReductionModeCreateInfo *sampler_reduction =
928             (VkSamplerReductionModeCreateInfo *) ext;
929          sampler_reduction_mode =
930             vk_to_intel_sampler_reduction_mode[sampler_reduction->reductionMode];
931          enable_sampler_reduction = true;
932          break;
933       }
934 #endif
935       case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT: {
936          VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
937             (VkSamplerCustomBorderColorCreateInfoEXT *) ext;
938          if (sampler->custom_border_color.map == NULL)
939             break;
940          struct gfx8_border_color *cbc = sampler->custom_border_color.map;
941          if (custom_border_color->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
942             /* B4G4R4A4_UNORM_PACK16 is treated as R4G4B4A4_UNORM_PACK16 with
943              * a swizzle, but this does not carry over to the sampler for
944              * border colors, so we need to do the swizzle ourselves here.
945              */
946             cbc->uint32[0] = custom_border_color->customBorderColor.uint32[2];
947             cbc->uint32[1] = custom_border_color->customBorderColor.uint32[1];
948             cbc->uint32[2] = custom_border_color->customBorderColor.uint32[0];
949             cbc->uint32[3] = custom_border_color->customBorderColor.uint32[3];
950          } else {
951             /* Both structs share the same layout, so just copy them over. */
952             memcpy(cbc, &custom_border_color->customBorderColor,
953                    sizeof(VkClearColorValue));
954          }
955          has_custom_color = true;
956          break;
957       }
958       default:
959          anv_debug_ignored_stype(ext->sType);
960          break;
961       }
962    }
963 
964    assert((sampler->custom_border_color.map == NULL) || has_custom_color);
965 
966    if (device->physical->has_bindless_samplers) {
967       /* If we have bindless, allocate enough samplers.  We allocate 32 bytes
968        * for each sampler instead of 16 bytes because we want all bindless
969        * samplers to be 32-byte aligned so we don't have to use indirect
970        * sampler messages on them.
971        */
972       sampler->bindless_state =
973          anv_state_pool_alloc(&device->dynamic_state_pool,
974                               sampler->n_planes * 32, 32);
975    }
976 
977    for (unsigned p = 0; p < sampler->n_planes; p++) {
978       const bool plane_has_chroma =
979          sampler->conversion && sampler->conversion->format->planes[p].has_chroma;
980       const VkFilter min_filter =
981          plane_has_chroma ? sampler->conversion->chroma_filter : pCreateInfo->minFilter;
982       const VkFilter mag_filter =
983          plane_has_chroma ? sampler->conversion->chroma_filter : pCreateInfo->magFilter;
984       const bool enable_min_filter_addr_rounding = min_filter != VK_FILTER_NEAREST;
985       const bool enable_mag_filter_addr_rounding = mag_filter != VK_FILTER_NEAREST;
986       /* From Broadwell PRM, SAMPLER_STATE:
987        *   "Mip Mode Filter must be set to MIPFILTER_NONE for Planar YUV surfaces."
988        */
989       const bool isl_format_is_planar_yuv = sampler->conversion &&
990          isl_format_is_yuv(sampler->conversion->format->planes[0].isl_format) &&
991          isl_format_is_planar(sampler->conversion->format->planes[0].isl_format);
992 
993       const uint32_t mip_filter_mode =
994          isl_format_is_planar_yuv ?
995          MIPFILTER_NONE : vk_to_intel_mipmap_mode[pCreateInfo->mipmapMode];
996 
997       struct GENX(SAMPLER_STATE) sampler_state = {
998          .SamplerDisable = false,
999          .TextureBorderColorMode = DX10OGL,
1000 
1001 #if GFX_VER >= 11
1002          .CPSLODCompensationEnable = true,
1003 #endif
1004 
1005 #if GFX_VER >= 8
1006          .LODPreClampMode = CLAMP_MODE_OGL,
1007 #else
1008          .LODPreClampEnable = CLAMP_ENABLE_OGL,
1009 #endif
1010 
1011 #if GFX_VER == 8
1012          .BaseMipLevel = 0.0,
1013 #endif
1014          .MipModeFilter = mip_filter_mode,
1015          .MagModeFilter = vk_to_intel_tex_filter(mag_filter, pCreateInfo->anisotropyEnable),
1016          .MinModeFilter = vk_to_intel_tex_filter(min_filter, pCreateInfo->anisotropyEnable),
1017          .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996),
1018          .AnisotropicAlgorithm =
1019             pCreateInfo->anisotropyEnable ? EWAApproximation : LEGACY,
1020          .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14),
1021          .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14),
1022          .ChromaKeyEnable = 0,
1023          .ChromaKeyIndex = 0,
1024          .ChromaKeyMode = 0,
1025          .ShadowFunction =
1026             vk_to_intel_shadow_compare_op[pCreateInfo->compareEnable ?
1027                                         pCreateInfo->compareOp : VK_COMPARE_OP_NEVER],
1028          .CubeSurfaceControlMode = OVERRIDE,
1029 
1030          .BorderColorPointer = border_color_offset,
1031 
1032 #if GFX_VER >= 8
1033          .LODClampMagnificationMode = MIPNONE,
1034 #endif
1035 
1036          .MaximumAnisotropy = vk_to_intel_max_anisotropy(pCreateInfo->maxAnisotropy),
1037          .RAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
1038          .RAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding,
1039          .VAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
1040          .VAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding,
1041          .UAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
1042          .UAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding,
1043          .TrilinearFilterQuality = 0,
1044          .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates,
1045          .TCXAddressControlMode = vk_to_intel_tex_address[pCreateInfo->addressModeU],
1046          .TCYAddressControlMode = vk_to_intel_tex_address[pCreateInfo->addressModeV],
1047          .TCZAddressControlMode = vk_to_intel_tex_address[pCreateInfo->addressModeW],
1048 
1049 #if GFX_VER >= 9
1050          .ReductionType = sampler_reduction_mode,
1051          .ReductionTypeEnable = enable_sampler_reduction,
1052 #endif
1053       };
1054 
1055       GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state);
1056 
1057       if (sampler->bindless_state.map) {
1058          memcpy(sampler->bindless_state.map + p * 32,
1059                 sampler->state[p], GENX(SAMPLER_STATE_length) * 4);
1060       }
1061    }
1062 
1063    *pSampler = anv_sampler_to_handle(sampler);
1064 
1065    return VK_SUCCESS;
1066 }
1067