1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "anv_private.h"
31 
32 #include "common/gen_aux_map.h"
33 #include "common/gen_sample_positions.h"
34 #include "genxml/gen_macros.h"
35 #include "genxml/genX_pack.h"
36 
37 #include "vk_util.h"
38 
39 static void
genX(emit_slice_hashing_state)40 genX(emit_slice_hashing_state)(struct anv_device *device,
41                                struct anv_batch *batch)
42 {
43    device->slice_hash = (struct anv_state) { 0 };
44 
45 #if GEN_GEN == 11
46    const unsigned *ppipe_subslices = device->info.ppipe_subslices;
47    int subslices_delta = ppipe_subslices[0] - ppipe_subslices[1];
48    if (subslices_delta == 0)
49       return;
50 
51    unsigned size = GENX(SLICE_HASH_TABLE_length) * 4;
52    device->slice_hash =
53       anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
54 
55    struct GENX(SLICE_HASH_TABLE) table0 = {
56       .Entry = {
57          { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
58          { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 },
59          { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 },
60          { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
61          { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 },
62          { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 },
63          { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
64          { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 },
65          { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 },
66          { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
67          { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 },
68          { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 },
69          { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
70          { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 },
71          { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 },
72          { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 }
73       }
74    };
75 
76    struct GENX(SLICE_HASH_TABLE) table1 = {
77       .Entry = {
78          { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 },
79          { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 },
80          { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 },
81          { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 },
82          { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 },
83          { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 },
84          { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 },
85          { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 },
86          { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 },
87          { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 },
88          { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 },
89          { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 },
90          { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 },
91          { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 },
92          { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 },
93          { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 }
94       }
95    };
96 
97    const struct GENX(SLICE_HASH_TABLE) *table =
98       subslices_delta < 0 ? &table0 : &table1;
99    GENX(SLICE_HASH_TABLE_pack)(NULL, device->slice_hash.map, table);
100 
101    anv_batch_emit(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
102       ptr.SliceHashStatePointerValid = true;
103       ptr.SliceHashTableStatePointer = device->slice_hash.offset;
104    }
105 
106    anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) {
107       mode.SliceHashingTableEnable = true;
108    }
109 #endif
110 }
111 
112 VkResult
genX(init_device_state)113 genX(init_device_state)(struct anv_device *device)
114 {
115    struct anv_batch batch;
116 
117    uint32_t cmds[64];
118    batch.start = batch.next = cmds;
119    batch.end = (void *) cmds + sizeof(cmds);
120 
121    anv_batch_emit(&batch, GENX(PIPELINE_SELECT), ps) {
122 #if GEN_GEN >= 9
123       ps.MaskBits = GEN_GEN >= 12 ? 0x13 : 3;
124       ps.MediaSamplerDOPClockGateEnable = GEN_GEN >= 12;
125 #endif
126       ps.PipelineSelection = _3D;
127    }
128 
129 #if GEN_GEN == 9
130    uint32_t cache_mode_1;
131    anv_pack_struct(&cache_mode_1, GENX(CACHE_MODE_1),
132                    .FloatBlendOptimizationEnable = true,
133                    .FloatBlendOptimizationEnableMask = true,
134                    .MSCRAWHazardAvoidanceBit = true,
135                    .MSCRAWHazardAvoidanceBitMask = true,
136                    .PartialResolveDisableInVC = true,
137                    .PartialResolveDisableInVCMask = true);
138 
139    anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
140       lri.RegisterOffset = GENX(CACHE_MODE_1_num);
141       lri.DataDWord      = cache_mode_1;
142    }
143 #endif
144 
145    anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa);
146 
147    anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
148       rect.ClippedDrawingRectangleYMin = 0;
149       rect.ClippedDrawingRectangleXMin = 0;
150       rect.ClippedDrawingRectangleYMax = UINT16_MAX;
151       rect.ClippedDrawingRectangleXMax = UINT16_MAX;
152       rect.DrawingRectangleOriginY = 0;
153       rect.DrawingRectangleOriginX = 0;
154    }
155 
156 #if GEN_GEN >= 8
157    anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck);
158 
159    /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
160     * VkPhysicalDeviceFeatures::standardSampleLocations.
161     */
162    anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) {
163       GEN_SAMPLE_POS_1X(sp._1xSample);
164       GEN_SAMPLE_POS_2X(sp._2xSample);
165       GEN_SAMPLE_POS_4X(sp._4xSample);
166       GEN_SAMPLE_POS_8X(sp._8xSample);
167 #if GEN_GEN >= 9
168       GEN_SAMPLE_POS_16X(sp._16xSample);
169 #endif
170    }
171 
172    /* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the
173     * section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer
174     * Clear." It mentions that the packet overrides GPU state for the clear
175     * operation and needs to be reset to 0s to clear the overrides. Depending
176     * on the kernel, we may not get a context with the state for this packet
177     * zeroed. Do it ourselves just in case. We've observed this to prevent a
178     * number of GPU hangs on ICL.
179     */
180    anv_batch_emit(&batch, GENX(3DSTATE_WM_HZ_OP), hzp);
181 #endif
182 
183 #if GEN_GEN == 11
184    /* The default behavior of bit 5 "Headerless Message for Pre-emptable
185     * Contexts" in SAMPLER MODE register is set to 0, which means
186     * headerless sampler messages are not allowed for pre-emptable
187     * contexts. Set the bit 5 to 1 to allow them.
188     */
189    uint32_t sampler_mode;
190    anv_pack_struct(&sampler_mode, GENX(SAMPLER_MODE),
191                    .HeaderlessMessageforPreemptableContexts = true,
192                    .HeaderlessMessageforPreemptableContextsMask = true);
193 
194     anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
195       lri.RegisterOffset = GENX(SAMPLER_MODE_num);
196       lri.DataDWord      = sampler_mode;
197    }
198 
199    /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
200     * HALF_SLICE_CHICKEN7 register.
201     */
202    uint32_t half_slice_chicken7;
203    anv_pack_struct(&half_slice_chicken7, GENX(HALF_SLICE_CHICKEN7),
204                    .EnabledTexelOffsetPrecisionFix = true,
205                    .EnabledTexelOffsetPrecisionFixMask = true);
206 
207     anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
208       lri.RegisterOffset = GENX(HALF_SLICE_CHICKEN7_num);
209       lri.DataDWord      = half_slice_chicken7;
210    }
211 
212    uint32_t tccntlreg;
213    anv_pack_struct(&tccntlreg, GENX(TCCNTLREG),
214                    .L3DataPartialWriteMergingEnable = true,
215                    .ColorZPartialWriteMergingEnable = true,
216                    .URBPartialWriteMergingEnable = true,
217                    .TCDisable = true);
218 
219    anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
220       lri.RegisterOffset = GENX(TCCNTLREG_num);
221       lri.DataDWord      = tccntlreg;
222    }
223 
224 #endif
225    genX(emit_slice_hashing_state)(device, &batch);
226 
227 #if GEN_GEN >= 11
228    /* hardware specification recommends disabling repacking for
229     * the compatibility with decompression mechanism in display controller.
230     */
231    if (device->info.disable_ccs_repack) {
232       uint32_t cache_mode_0;
233       anv_pack_struct(&cache_mode_0,
234                       GENX(CACHE_MODE_0),
235                       .DisableRepackingforCompression = true,
236                       .DisableRepackingforCompressionMask = true);
237 
238       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
239          lri.RegisterOffset = GENX(CACHE_MODE_0_num);
240          lri.DataDWord      = cache_mode_0;
241       }
242    }
243 
244    /* an unknown issue is causing vs push constants to become
245     * corrupted during object-level preemption. For now, restrict
246     * to command buffer level preemption to avoid rendering
247     * corruption.
248     */
249    uint32_t cs_chicken1;
250    anv_pack_struct(&cs_chicken1,
251                    GENX(CS_CHICKEN1),
252                    .ReplayMode = MidcmdbufferPreemption,
253                    .ReplayModeMask = true);
254 
255    anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
256       lri.RegisterOffset = GENX(CS_CHICKEN1_num);
257       lri.DataDWord      = cs_chicken1;
258    }
259 #endif
260 
261 #if GEN_GEN == 12
262    if (device->info.has_aux_map) {
263       uint64_t aux_base_addr = gen_aux_map_get_base(device->aux_map_ctx);
264       assert(aux_base_addr % (32 * 1024) == 0);
265       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
266          lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num);
267          lri.DataDWord = aux_base_addr & 0xffffffff;
268       }
269       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
270          lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num) + 4;
271          lri.DataDWord = aux_base_addr >> 32;
272       }
273    }
274 #endif
275 
276    /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
277     * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
278     *
279     * This is only safe on kernels with context isolation support.
280     */
281    if (GEN_GEN >= 8 && device->physical->has_context_isolation) {
282       UNUSED uint32_t tmp_reg;
283 #if GEN_GEN >= 9
284       anv_pack_struct(&tmp_reg, GENX(CS_DEBUG_MODE2),
285                       .CONSTANT_BUFFERAddressOffsetDisable = true,
286                       .CONSTANT_BUFFERAddressOffsetDisableMask = true);
287       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
288          lri.RegisterOffset = GENX(CS_DEBUG_MODE2_num);
289          lri.DataDWord      = tmp_reg;
290       }
291 #elif GEN_GEN == 8
292       anv_pack_struct(&tmp_reg, GENX(INSTPM),
293                       .CONSTANT_BUFFERAddressOffsetDisable = true,
294                       .CONSTANT_BUFFERAddressOffsetDisableMask = true);
295       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
296          lri.RegisterOffset = GENX(INSTPM_num);
297          lri.DataDWord      = tmp_reg;
298       }
299 #endif
300    }
301 
302 #if GEN_GEN >= 12
303    const struct gen_l3_config *cfg = gen_get_default_l3_config(&device->info);
304    if (!cfg) {
305       /* Platforms with no configs just setup full-way allocation. */
306       uint32_t l3cr;
307       anv_pack_struct(&l3cr, GENX(L3ALLOC),
308                       .L3FullWayAllocationEnable = true);
309       anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
310          lri.RegisterOffset = GENX(L3ALLOC_num);
311          lri.DataDWord      = l3cr;
312       }
313    }
314 #endif
315 
316    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
317 
318    assert(batch.next <= batch.end);
319 
320    return anv_queue_submit_simple_batch(&device->queue, &batch);
321 }
322 
323 static uint32_t
vk_to_gen_tex_filter(VkFilter filter,bool anisotropyEnable)324 vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable)
325 {
326    switch (filter) {
327    default:
328       assert(!"Invalid filter");
329    case VK_FILTER_NEAREST:
330       return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_NEAREST;
331    case VK_FILTER_LINEAR:
332       return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR;
333    }
334 }
335 
336 static uint32_t
vk_to_gen_max_anisotropy(float ratio)337 vk_to_gen_max_anisotropy(float ratio)
338 {
339    return (anv_clamp_f(ratio, 2, 16) - 2) / 2;
340 }
341 
342 static const uint32_t vk_to_gen_mipmap_mode[] = {
343    [VK_SAMPLER_MIPMAP_MODE_NEAREST]          = MIPFILTER_NEAREST,
344    [VK_SAMPLER_MIPMAP_MODE_LINEAR]           = MIPFILTER_LINEAR
345 };
346 
347 static const uint32_t vk_to_gen_tex_address[] = {
348    [VK_SAMPLER_ADDRESS_MODE_REPEAT]          = TCM_WRAP,
349    [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR,
350    [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE]   = TCM_CLAMP,
351    [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
352    [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
353 };
354 
355 /* Vulkan specifies the result of shadow comparisons as:
356  *     1     if   ref <op> texel,
357  *     0     otherwise.
358  *
359  * The hardware does:
360  *     0     if texel <op> ref,
361  *     1     otherwise.
362  *
363  * So, these look a bit strange because there's both a negation
364  * and swapping of the arguments involved.
365  */
366 static const uint32_t vk_to_gen_shadow_compare_op[] = {
367    [VK_COMPARE_OP_NEVER]                        = PREFILTEROPALWAYS,
368    [VK_COMPARE_OP_LESS]                         = PREFILTEROPLEQUAL,
369    [VK_COMPARE_OP_EQUAL]                        = PREFILTEROPNOTEQUAL,
370    [VK_COMPARE_OP_LESS_OR_EQUAL]                = PREFILTEROPLESS,
371    [VK_COMPARE_OP_GREATER]                      = PREFILTEROPGEQUAL,
372    [VK_COMPARE_OP_NOT_EQUAL]                    = PREFILTEROPEQUAL,
373    [VK_COMPARE_OP_GREATER_OR_EQUAL]             = PREFILTEROPGREATER,
374    [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROPNEVER,
375 };
376 
377 #if GEN_GEN >= 9
378 static const uint32_t vk_to_gen_sampler_reduction_mode[] = {
379    [VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT] = STD_FILTER,
380    [VK_SAMPLER_REDUCTION_MODE_MIN_EXT]              = MINIMUM,
381    [VK_SAMPLER_REDUCTION_MODE_MAX_EXT]              = MAXIMUM,
382 };
383 #endif
384 
genX(CreateSampler)385 VkResult genX(CreateSampler)(
386     VkDevice                                    _device,
387     const VkSamplerCreateInfo*                  pCreateInfo,
388     const VkAllocationCallbacks*                pAllocator,
389     VkSampler*                                  pSampler)
390 {
391    ANV_FROM_HANDLE(anv_device, device, _device);
392    struct anv_sampler *sampler;
393 
394    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
395 
396    sampler = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
397                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
398    if (!sampler)
399       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
400 
401    vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
402    sampler->n_planes = 1;
403 
404    uint32_t border_color_stride = GEN_IS_HASWELL ? 512 : 64;
405    uint32_t border_color_offset;
406    ASSERTED bool has_custom_color = false;
407    if (pCreateInfo->borderColor <= VK_BORDER_COLOR_INT_OPAQUE_WHITE) {
408       border_color_offset = device->border_colors.offset +
409                             pCreateInfo->borderColor *
410                             border_color_stride;
411    } else {
412       assert(GEN_GEN >= 8);
413       sampler->custom_border_color =
414          anv_state_reserved_pool_alloc(&device->custom_border_colors);
415       border_color_offset = sampler->custom_border_color.offset;
416    }
417 
418 #if GEN_GEN >= 9
419    unsigned sampler_reduction_mode = STD_FILTER;
420    bool enable_sampler_reduction = false;
421 #endif
422 
423    vk_foreach_struct(ext, pCreateInfo->pNext) {
424       switch (ext->sType) {
425       case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO: {
426          VkSamplerYcbcrConversionInfo *pSamplerConversion =
427             (VkSamplerYcbcrConversionInfo *) ext;
428          ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion,
429                          pSamplerConversion->conversion);
430 
431          /* Ignore conversion for non-YUV formats. This fulfills a requirement
432           * for clients that want to utilize same code path for images with
433           * external formats (VK_FORMAT_UNDEFINED) and "regular" RGBA images
434           * where format is known.
435           */
436          if (conversion == NULL || !conversion->format->can_ycbcr)
437             break;
438 
439          sampler->n_planes = conversion->format->n_planes;
440          sampler->conversion = conversion;
441          break;
442       }
443 #if GEN_GEN >= 9
444       case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO: {
445          VkSamplerReductionModeCreateInfo *sampler_reduction =
446             (VkSamplerReductionModeCreateInfo *) ext;
447          sampler_reduction_mode =
448             vk_to_gen_sampler_reduction_mode[sampler_reduction->reductionMode];
449          enable_sampler_reduction = true;
450          break;
451       }
452 #endif
453       case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT: {
454          VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
455             (VkSamplerCustomBorderColorCreateInfoEXT *) ext;
456          if (sampler->custom_border_color.map == NULL)
457             break;
458          struct gen8_border_color *cbc = sampler->custom_border_color.map;
459          if (custom_border_color->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
460             /* B4G4R4A4_UNORM_PACK16 is treated as R4G4B4A4_UNORM_PACK16 with
461              * a swizzle, but this does not carry over to the sampler for
462              * border colors, so we need to do the swizzle ourselves here.
463              */
464             cbc->uint32[0] = custom_border_color->customBorderColor.uint32[2];
465             cbc->uint32[1] = custom_border_color->customBorderColor.uint32[1];
466             cbc->uint32[2] = custom_border_color->customBorderColor.uint32[0];
467             cbc->uint32[3] = custom_border_color->customBorderColor.uint32[3];
468          } else {
469             /* Both structs share the same layout, so just copy them over. */
470             memcpy(cbc, &custom_border_color->customBorderColor,
471                    sizeof(VkClearColorValue));
472          }
473          has_custom_color = true;
474          break;
475       }
476       default:
477          anv_debug_ignored_stype(ext->sType);
478          break;
479       }
480    }
481 
482    assert((sampler->custom_border_color.map == NULL) || has_custom_color);
483 
484    if (device->physical->has_bindless_samplers) {
485       /* If we have bindless, allocate enough samplers.  We allocate 32 bytes
486        * for each sampler instead of 16 bytes because we want all bindless
487        * samplers to be 32-byte aligned so we don't have to use indirect
488        * sampler messages on them.
489        */
490       sampler->bindless_state =
491          anv_state_pool_alloc(&device->dynamic_state_pool,
492                               sampler->n_planes * 32, 32);
493    }
494 
495    for (unsigned p = 0; p < sampler->n_planes; p++) {
496       const bool plane_has_chroma =
497          sampler->conversion && sampler->conversion->format->planes[p].has_chroma;
498       const VkFilter min_filter =
499          plane_has_chroma ? sampler->conversion->chroma_filter : pCreateInfo->minFilter;
500       const VkFilter mag_filter =
501          plane_has_chroma ? sampler->conversion->chroma_filter : pCreateInfo->magFilter;
502       const bool enable_min_filter_addr_rounding = min_filter != VK_FILTER_NEAREST;
503       const bool enable_mag_filter_addr_rounding = mag_filter != VK_FILTER_NEAREST;
504       /* From Broadwell PRM, SAMPLER_STATE:
505        *   "Mip Mode Filter must be set to MIPFILTER_NONE for Planar YUV surfaces."
506        */
507       const uint32_t mip_filter_mode =
508          (sampler->conversion &&
509           isl_format_is_yuv(sampler->conversion->format->planes[0].isl_format)) ?
510          MIPFILTER_NONE : vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode];
511 
512       struct GENX(SAMPLER_STATE) sampler_state = {
513          .SamplerDisable = false,
514          .TextureBorderColorMode = DX10OGL,
515 
516 #if GEN_GEN >= 8
517          .LODPreClampMode = CLAMP_MODE_OGL,
518 #else
519          .LODPreClampEnable = CLAMP_ENABLE_OGL,
520 #endif
521 
522 #if GEN_GEN == 8
523          .BaseMipLevel = 0.0,
524 #endif
525          .MipModeFilter = mip_filter_mode,
526          .MagModeFilter = vk_to_gen_tex_filter(mag_filter, pCreateInfo->anisotropyEnable),
527          .MinModeFilter = vk_to_gen_tex_filter(min_filter, pCreateInfo->anisotropyEnable),
528          .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996),
529          .AnisotropicAlgorithm =
530             pCreateInfo->anisotropyEnable ? EWAApproximation : LEGACY,
531          .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14),
532          .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14),
533          .ChromaKeyEnable = 0,
534          .ChromaKeyIndex = 0,
535          .ChromaKeyMode = 0,
536          .ShadowFunction =
537             vk_to_gen_shadow_compare_op[pCreateInfo->compareEnable ?
538                                         pCreateInfo->compareOp : VK_COMPARE_OP_NEVER],
539          .CubeSurfaceControlMode = OVERRIDE,
540 
541          .BorderColorPointer = border_color_offset,
542 
543 #if GEN_GEN >= 8
544          .LODClampMagnificationMode = MIPNONE,
545 #endif
546 
547          .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy),
548          .RAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
549          .RAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding,
550          .VAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
551          .VAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding,
552          .UAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
553          .UAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding,
554          .TrilinearFilterQuality = 0,
555          .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates,
556          .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU],
557          .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV],
558          .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW],
559 
560 #if GEN_GEN >= 9
561          .ReductionType = sampler_reduction_mode,
562          .ReductionTypeEnable = enable_sampler_reduction,
563 #endif
564       };
565 
566       GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state);
567 
568       if (sampler->bindless_state.map) {
569          memcpy(sampler->bindless_state.map + p * 32,
570                 sampler->state[p], GENX(SAMPLER_STATE_length) * 4);
571       }
572    }
573 
574    *pSampler = anv_sampler_to_handle(sampler);
575 
576    return VK_SUCCESS;
577 }
578