1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BRW_NIR_RT_BUILDER_H
25 #define BRW_NIR_RT_BUILDER_H
26 
27 #include "brw_rt.h"
28 #include "nir_builder.h"
29 
30 /* We have our own load/store scratch helpers because they emit a global
31  * memory read or write based on the scratch_base_ptr system value rather
32  * than a load/store_scratch intrinsic.
33  */
34 static inline nir_ssa_def *
brw_nir_rt_load_scratch(nir_builder * b,uint32_t offset,unsigned align,unsigned num_components,unsigned bit_size)35 brw_nir_rt_load_scratch(nir_builder *b, uint32_t offset, unsigned align,
36                         unsigned num_components, unsigned bit_size)
37 {
38    nir_ssa_def *addr =
39       nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
40    return nir_load_global(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
41                           num_components, bit_size);
42 }
43 
44 static inline void
brw_nir_rt_store_scratch(nir_builder * b,uint32_t offset,unsigned align,nir_ssa_def * value,nir_component_mask_t write_mask)45 brw_nir_rt_store_scratch(nir_builder *b, uint32_t offset, unsigned align,
46                          nir_ssa_def *value, nir_component_mask_t write_mask)
47 {
48    nir_ssa_def *addr =
49       nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
50    nir_store_global(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
51                     value, write_mask);
52 }
53 
54 static inline void
brw_nir_btd_spawn(nir_builder * b,nir_ssa_def * record_addr)55 brw_nir_btd_spawn(nir_builder *b, nir_ssa_def *record_addr)
56 {
57    nir_btd_spawn_intel(b, nir_load_btd_global_arg_addr_intel(b), record_addr);
58 }
59 
60 static inline void
brw_nir_btd_retire(nir_builder * b)61 brw_nir_btd_retire(nir_builder *b)
62 {
63    nir_btd_retire_intel(b);
64 }
65 
66 /** This is a pseudo-op which does a bindless return
67  *
68  * It loads the return address from the stack and calls btd_spawn to spawn the
69  * resume shader.
70  */
71 static inline void
brw_nir_btd_return(struct nir_builder * b)72 brw_nir_btd_return(struct nir_builder *b)
73 {
74    assert(b->shader->scratch_size == BRW_BTD_STACK_CALLEE_DATA_SIZE);
75    nir_ssa_def *resume_addr =
76       brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET,
77                               8 /* align */, 1, 64);
78    brw_nir_btd_spawn(b, resume_addr);
79 }
80 
81 static inline void
assert_def_size(nir_ssa_def * def,unsigned num_components,unsigned bit_size)82 assert_def_size(nir_ssa_def *def, unsigned num_components, unsigned bit_size)
83 {
84    assert(def->num_components == num_components);
85    assert(def->bit_size == bit_size);
86 }
87 
88 static inline nir_ssa_def *
brw_nir_num_rt_stacks(nir_builder * b,const struct intel_device_info * devinfo)89 brw_nir_num_rt_stacks(nir_builder *b,
90                       const struct intel_device_info *devinfo)
91 {
92    return nir_imul_imm(b, nir_load_ray_num_dss_rt_stacks_intel(b),
93                           intel_device_info_num_dual_subslices(devinfo));
94 }
95 
96 static inline nir_ssa_def *
brw_nir_rt_stack_id(nir_builder * b)97 brw_nir_rt_stack_id(nir_builder *b)
98 {
99    return nir_iadd(b, nir_umul_32x16(b, nir_load_ray_num_dss_rt_stacks_intel(b),
100                                      nir_load_btd_dss_id_intel(b)),
101                       nir_load_btd_stack_id_intel(b));
102 }
103 
104 static inline nir_ssa_def *
brw_nir_rt_sw_hotzone_addr(nir_builder * b,const struct intel_device_info * devinfo)105 brw_nir_rt_sw_hotzone_addr(nir_builder *b,
106                            const struct intel_device_info *devinfo)
107 {
108    nir_ssa_def *offset32 =
109       nir_imul_imm(b, brw_nir_rt_stack_id(b), BRW_RT_SIZEOF_HOTZONE);
110 
111    offset32 = nir_iadd(b, offset32, nir_ineg(b,
112       nir_imul_imm(b, brw_nir_num_rt_stacks(b, devinfo),
113                       BRW_RT_SIZEOF_HOTZONE)));
114 
115    return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
116                       nir_i2i64(b, offset32));
117 }
118 
119 static inline nir_ssa_def *
brw_nir_rt_ray_addr(nir_builder * b)120 brw_nir_rt_ray_addr(nir_builder *b)
121 {
122    /* From the BSpec "Address Computation for Memory Based Data Structures:
123     * Ray and TraversalStack (Async Ray Tracing)":
124     *
125     *    stackBase = RTDispatchGlobals.rtMemBasePtr
126     *              + (DSSID * RTDispatchGlobals.numDSSRTStacks + stackID)
127     *              * RTDispatchGlobals.stackSizePerRay // 64B aligned
128     *
129     * We assume that we can calculate a 32-bit offset first and then add it
130     * to the 64-bit base address at the end.
131     */
132    nir_ssa_def *offset32 =
133       nir_imul(b, brw_nir_rt_stack_id(b),
134                   nir_load_ray_hw_stack_size_intel(b));
135    return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
136                       nir_u2u64(b, offset32));
137 }
138 
139 static inline nir_ssa_def *
brw_nir_rt_mem_hit_addr(nir_builder * b,bool committed)140 brw_nir_rt_mem_hit_addr(nir_builder *b, bool committed)
141 {
142    return nir_iadd_imm(b, brw_nir_rt_ray_addr(b),
143                           committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
144 }
145 
146 static inline nir_ssa_def *
brw_nir_rt_hit_attrib_data_addr(nir_builder * b)147 brw_nir_rt_hit_attrib_data_addr(nir_builder *b)
148 {
149    return nir_iadd_imm(b, brw_nir_rt_ray_addr(b),
150                           BRW_RT_OFFSETOF_HIT_ATTRIB_DATA);
151 }
152 
153 static inline nir_ssa_def *
brw_nir_rt_mem_ray_addr(nir_builder * b,enum brw_rt_bvh_level bvh_level)154 brw_nir_rt_mem_ray_addr(nir_builder *b,
155                         enum brw_rt_bvh_level bvh_level)
156 {
157    /* From the BSpec "Address Computation for Memory Based Data Structures:
158     * Ray and TraversalStack (Async Ray Tracing)":
159     *
160     *    rayBase = stackBase + sizeof(HitInfo) * 2 // 64B aligned
161     *    rayPtr  = rayBase + bvhLevel * sizeof(Ray); // 64B aligned
162     *
163     * In Vulkan, we always have exactly two levels of BVH: World and Object.
164     */
165    uint32_t offset = BRW_RT_SIZEOF_HIT_INFO * 2 +
166                      bvh_level * BRW_RT_SIZEOF_RAY;
167    return nir_iadd_imm(b, brw_nir_rt_ray_addr(b), offset);
168 }
169 
170 static inline nir_ssa_def *
brw_nir_rt_sw_stack_addr(nir_builder * b,const struct intel_device_info * devinfo)171 brw_nir_rt_sw_stack_addr(nir_builder *b,
172                          const struct intel_device_info *devinfo)
173 {
174    nir_ssa_def *addr = nir_load_ray_base_mem_addr_intel(b);
175 
176    nir_ssa_def *offset32 = nir_imul(b, brw_nir_num_rt_stacks(b, devinfo),
177                                        nir_load_ray_hw_stack_size_intel(b));
178    addr = nir_iadd(b, addr, nir_u2u64(b, offset32));
179 
180    return nir_iadd(b, addr,
181       nir_imul(b, nir_u2u64(b, brw_nir_rt_stack_id(b)),
182                   nir_u2u64(b, nir_load_ray_sw_stack_size_intel(b))));
183 }
184 
185 static inline nir_ssa_def *
nir_unpack_64_4x16_split_z(nir_builder * b,nir_ssa_def * val)186 nir_unpack_64_4x16_split_z(nir_builder *b, nir_ssa_def *val)
187 {
188    return nir_unpack_32_2x16_split_x(b, nir_unpack_64_2x32_split_y(b, val));
189 }
190 
191 struct brw_nir_rt_globals_defs {
192    nir_ssa_def *base_mem_addr;
193    nir_ssa_def *call_stack_handler_addr;
194    nir_ssa_def *hw_stack_size;
195    nir_ssa_def *num_dss_rt_stacks;
196    nir_ssa_def *hit_sbt_addr;
197    nir_ssa_def *hit_sbt_stride;
198    nir_ssa_def *miss_sbt_addr;
199    nir_ssa_def *miss_sbt_stride;
200    nir_ssa_def *sw_stack_size;
201    nir_ssa_def *launch_size;
202    nir_ssa_def *call_sbt_addr;
203    nir_ssa_def *call_sbt_stride;
204    nir_ssa_def *resume_sbt_addr;
205 };
206 
207 static inline void
brw_nir_rt_load_globals(nir_builder * b,struct brw_nir_rt_globals_defs * defs)208 brw_nir_rt_load_globals(nir_builder *b,
209                         struct brw_nir_rt_globals_defs *defs)
210 {
211    nir_ssa_def *addr = nir_load_btd_global_arg_addr_intel(b);
212 
213    nir_ssa_def *data;
214    data = nir_load_global_const_block_intel(b, 16, addr, nir_imm_true(b));
215    defs->base_mem_addr = nir_pack_64_2x32(b, nir_channels(b, data, 0x3));
216 
217    defs->call_stack_handler_addr =
218       nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
219 
220    defs->hw_stack_size = nir_channel(b, data, 4);
221    defs->num_dss_rt_stacks = nir_iand_imm(b, nir_channel(b, data, 5), 0xffff);
222    defs->hit_sbt_addr =
223       nir_pack_64_2x32_split(b, nir_channel(b, data, 8),
224                                 nir_extract_i16(b, nir_channel(b, data, 9),
225                                                    nir_imm_int(b, 0)));
226    defs->hit_sbt_stride =
227       nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 9));
228    defs->miss_sbt_addr =
229       nir_pack_64_2x32_split(b, nir_channel(b, data, 10),
230                                 nir_extract_i16(b, nir_channel(b, data, 11),
231                                                    nir_imm_int(b, 0)));
232    defs->miss_sbt_stride =
233       nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 11));
234    defs->sw_stack_size = nir_channel(b, data, 12);
235    defs->launch_size = nir_channels(b, data, 0x7u << 13);
236 
237    data = nir_load_global_const_block_intel(b, 8, nir_iadd_imm(b, addr, 64),
238                                                   nir_imm_true(b));
239    defs->call_sbt_addr =
240       nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
241                                 nir_extract_i16(b, nir_channel(b, data, 1),
242                                                    nir_imm_int(b, 0)));
243    defs->call_sbt_stride =
244       nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
245 
246    defs->resume_sbt_addr =
247       nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
248 }
249 
250 static inline nir_ssa_def *
brw_nir_rt_unpack_leaf_ptr(nir_builder * b,nir_ssa_def * vec2)251 brw_nir_rt_unpack_leaf_ptr(nir_builder *b, nir_ssa_def *vec2)
252 {
253    /* Hit record leaf pointers are 42-bit and assumed to be in 64B chunks.
254     * This leaves 22 bits at the top for other stuff.
255     */
256    nir_ssa_def *ptr64 = nir_imul_imm(b, nir_pack_64_2x32(b, vec2), 64);
257 
258    /* The top 16 bits (remember, we shifted by 6 already) contain garbage
259     * that we need to get rid of.
260     */
261    nir_ssa_def *ptr_lo = nir_unpack_64_2x32_split_x(b, ptr64);
262    nir_ssa_def *ptr_hi = nir_unpack_64_2x32_split_y(b, ptr64);
263    ptr_hi = nir_extract_i16(b, ptr_hi, nir_imm_int(b, 0));
264    return nir_pack_64_2x32_split(b, ptr_lo, ptr_hi);
265 }
266 
267 struct brw_nir_rt_mem_hit_defs {
268    nir_ssa_def *t;
269    nir_ssa_def *tri_bary; /**< Only valid for triangle geometry */
270    nir_ssa_def *aabb_hit_kind; /**< Only valid for AABB geometry */
271    nir_ssa_def *leaf_type;
272    nir_ssa_def *prim_leaf_index;
273    nir_ssa_def *front_face;
274    nir_ssa_def *prim_leaf_ptr;
275    nir_ssa_def *inst_leaf_ptr;
276 };
277 
278 static inline void
brw_nir_rt_load_mem_hit(nir_builder * b,struct brw_nir_rt_mem_hit_defs * defs,bool committed)279 brw_nir_rt_load_mem_hit(nir_builder *b,
280                         struct brw_nir_rt_mem_hit_defs *defs,
281                         bool committed)
282 {
283    nir_ssa_def *hit_addr = brw_nir_rt_mem_hit_addr(b, committed);
284 
285    nir_ssa_def *data = nir_load_global(b, hit_addr, 16, 4, 32);
286    defs->t = nir_channel(b, data, 0);
287    defs->aabb_hit_kind = nir_channel(b, data, 1);
288    defs->tri_bary = nir_channels(b, data, 0x6);
289    nir_ssa_def *bitfield = nir_channel(b, data, 3);
290    defs->leaf_type =
291       nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 17), nir_imm_int(b, 3));
292    defs->prim_leaf_index =
293       nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 20), nir_imm_int(b, 4));
294    defs->front_face = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 27));
295 
296    data = nir_load_global(b, nir_iadd_imm(b, hit_addr, 16), 16, 4, 32);
297    defs->prim_leaf_ptr =
298       brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 0));
299    defs->inst_leaf_ptr =
300       brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
301 }
302 
303 static inline void
brw_nir_memcpy_global(nir_builder * b,nir_ssa_def * dst_addr,uint32_t dst_align,nir_ssa_def * src_addr,uint32_t src_align,uint32_t size)304 brw_nir_memcpy_global(nir_builder *b,
305                       nir_ssa_def *dst_addr, uint32_t dst_align,
306                       nir_ssa_def *src_addr, uint32_t src_align,
307                       uint32_t size)
308 {
309    /* We're going to copy in 16B chunks */
310    assert(size % 16 == 0);
311    dst_align = MIN2(dst_align, 16);
312    src_align = MIN2(src_align, 16);
313 
314    for (unsigned offset = 0; offset < size; offset += 16) {
315       nir_ssa_def *data =
316          nir_load_global(b, nir_iadd_imm(b, src_addr, offset), src_align,
317                          4, 32);
318       nir_store_global(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
319                        data, 0xf /* write_mask */);
320    }
321 }
322 
323 static inline void
brw_nir_rt_commit_hit(nir_builder * b)324 brw_nir_rt_commit_hit(nir_builder *b)
325 {
326    brw_nir_memcpy_global(b, brw_nir_rt_mem_hit_addr(b, true), 16,
327                             brw_nir_rt_mem_hit_addr(b, false), 16,
328                             BRW_RT_SIZEOF_HIT_INFO);
329 }
330 
331 struct brw_nir_rt_mem_ray_defs {
332    nir_ssa_def *orig;
333    nir_ssa_def *dir;
334    nir_ssa_def *t_near;
335    nir_ssa_def *t_far;
336    nir_ssa_def *root_node_ptr;
337    nir_ssa_def *ray_flags;
338    nir_ssa_def *hit_group_sr_base_ptr;
339    nir_ssa_def *hit_group_sr_stride;
340    nir_ssa_def *miss_sr_ptr;
341    nir_ssa_def *shader_index_multiplier;
342    nir_ssa_def *inst_leaf_ptr;
343    nir_ssa_def *ray_mask;
344 };
345 
346 static inline void
brw_nir_rt_store_mem_ray(nir_builder * b,const struct brw_nir_rt_mem_ray_defs * defs,enum brw_rt_bvh_level bvh_level)347 brw_nir_rt_store_mem_ray(nir_builder *b,
348                          const struct brw_nir_rt_mem_ray_defs *defs,
349                          enum brw_rt_bvh_level bvh_level)
350 {
351    nir_ssa_def *ray_addr = brw_nir_rt_mem_ray_addr(b, bvh_level);
352 
353    assert_def_size(defs->orig, 3, 32);
354    assert_def_size(defs->dir, 3, 32);
355    nir_store_global(b, nir_iadd_imm(b, ray_addr, 0), 16,
356       nir_vec4(b, nir_channel(b, defs->orig, 0),
357                   nir_channel(b, defs->orig, 1),
358                   nir_channel(b, defs->orig, 2),
359                   nir_channel(b, defs->dir, 0)),
360       ~0 /* write mask */);
361 
362    assert_def_size(defs->t_near, 1, 32);
363    assert_def_size(defs->t_far, 1, 32);
364    nir_store_global(b, nir_iadd_imm(b, ray_addr, 16), 16,
365       nir_vec4(b, nir_channel(b, defs->dir, 1),
366                   nir_channel(b, defs->dir, 2),
367                   defs->t_near,
368                   defs->t_far),
369       ~0 /* write mask */);
370 
371    assert_def_size(defs->root_node_ptr, 1, 64);
372    assert_def_size(defs->ray_flags, 1, 16);
373    assert_def_size(defs->hit_group_sr_base_ptr, 1, 64);
374    assert_def_size(defs->hit_group_sr_stride, 1, 16);
375    nir_store_global(b, nir_iadd_imm(b, ray_addr, 32), 16,
376       nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
377                   nir_pack_32_2x16_split(b,
378                      nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
379                      defs->ray_flags),
380                   nir_unpack_64_2x32_split_x(b, defs->hit_group_sr_base_ptr),
381                   nir_pack_32_2x16_split(b,
382                      nir_unpack_64_4x16_split_z(b, defs->hit_group_sr_base_ptr),
383                      defs->hit_group_sr_stride)),
384       ~0 /* write mask */);
385 
386    /* leaf_ptr is optional */
387    nir_ssa_def *inst_leaf_ptr;
388    if (defs->inst_leaf_ptr) {
389       inst_leaf_ptr = defs->inst_leaf_ptr;
390    } else {
391       inst_leaf_ptr = nir_imm_int64(b, 0);
392    }
393 
394    assert_def_size(defs->miss_sr_ptr, 1, 64);
395    assert_def_size(defs->shader_index_multiplier, 1, 32);
396    assert_def_size(inst_leaf_ptr, 1, 64);
397    assert_def_size(defs->ray_mask, 1, 32);
398    nir_store_global(b, nir_iadd_imm(b, ray_addr, 48), 16,
399       nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->miss_sr_ptr),
400                   nir_pack_32_2x16_split(b,
401                      nir_unpack_64_4x16_split_z(b, defs->miss_sr_ptr),
402                      nir_unpack_32_2x16_split_x(b,
403                         nir_ishl(b, defs->shader_index_multiplier,
404                                     nir_imm_int(b, 8)))),
405                   nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
406                   nir_pack_32_2x16_split(b,
407                      nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
408                      nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
409       ~0 /* write mask */);
410 }
411 
412 static inline void
brw_nir_rt_load_mem_ray(nir_builder * b,struct brw_nir_rt_mem_ray_defs * defs,enum brw_rt_bvh_level bvh_level)413 brw_nir_rt_load_mem_ray(nir_builder *b,
414                         struct brw_nir_rt_mem_ray_defs *defs,
415                         enum brw_rt_bvh_level bvh_level)
416 {
417    nir_ssa_def *ray_addr = brw_nir_rt_mem_ray_addr(b, bvh_level);
418 
419    nir_ssa_def *data[4] = {
420       nir_load_global(b, nir_iadd_imm(b, ray_addr,  0), 16, 4, 32),
421       nir_load_global(b, nir_iadd_imm(b, ray_addr, 16), 16, 4, 32),
422       nir_load_global(b, nir_iadd_imm(b, ray_addr, 32), 16, 4, 32),
423       nir_load_global(b, nir_iadd_imm(b, ray_addr, 48), 16, 4, 32),
424    };
425 
426    defs->orig = nir_channels(b, data[0], 0x7);
427    defs->dir = nir_vec3(b, nir_channel(b, data[0], 3),
428                            nir_channel(b, data[1], 0),
429                            nir_channel(b, data[1], 1));
430    defs->t_near = nir_channel(b, data[1], 2);
431    defs->t_far = nir_channel(b, data[1], 3);
432    defs->root_node_ptr =
433       nir_pack_64_2x32_split(b, nir_channel(b, data[2], 0),
434                                 nir_extract_i16(b, nir_channel(b, data[2], 1),
435                                                    nir_imm_int(b, 0)));
436    defs->ray_flags =
437       nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 1));
438    defs->hit_group_sr_base_ptr =
439       nir_pack_64_2x32_split(b, nir_channel(b, data[2], 2),
440                                 nir_extract_i16(b, nir_channel(b, data[2], 3),
441                                                    nir_imm_int(b, 0)));
442    defs->hit_group_sr_stride =
443       nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 3));
444    defs->miss_sr_ptr =
445       nir_pack_64_2x32_split(b, nir_channel(b, data[3], 0),
446                                 nir_extract_i16(b, nir_channel(b, data[3], 1),
447                                                    nir_imm_int(b, 0)));
448    defs->shader_index_multiplier =
449       nir_ushr(b, nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 1)),
450                   nir_imm_int(b, 8));
451    defs->inst_leaf_ptr =
452       nir_pack_64_2x32_split(b, nir_channel(b, data[3], 2),
453                                 nir_extract_i16(b, nir_channel(b, data[3], 3),
454                                                    nir_imm_int(b, 0)));
455    defs->ray_mask =
456       nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 3));
457 }
458 
459 struct brw_nir_rt_bvh_instance_leaf_defs {
460    nir_ssa_def *world_to_object[4];
461    nir_ssa_def *instance_id;
462    nir_ssa_def *instance_index;
463    nir_ssa_def *object_to_world[4];
464 };
465 
466 static inline void
brw_nir_rt_load_bvh_instance_leaf(nir_builder * b,struct brw_nir_rt_bvh_instance_leaf_defs * defs,nir_ssa_def * leaf_addr)467 brw_nir_rt_load_bvh_instance_leaf(nir_builder *b,
468                                   struct brw_nir_rt_bvh_instance_leaf_defs *defs,
469                                   nir_ssa_def *leaf_addr)
470 {
471    /* We don't care about the first 16B of the leaf for now.  One day, we may
472     * add code to decode it but none of that data is directly required for
473     * implementing any ray-tracing built-ins.
474     */
475 
476    defs->world_to_object[0] =
477       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 16), 4, 3, 32);
478    defs->world_to_object[1] =
479       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 28), 4, 3, 32);
480    defs->world_to_object[2] =
481       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 40), 4, 3, 32);
482    /* The last column of the matrices is swapped between the two probably
483     * because it makes it easier/faster for hardware somehow.
484     */
485    defs->object_to_world[3] =
486       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 52), 4, 3, 32);
487 
488    nir_ssa_def *data =
489       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 64), 4, 4, 32);
490    defs->instance_id = nir_channel(b, data, 2);
491    defs->instance_index = nir_channel(b, data, 3);
492 
493    defs->object_to_world[0] =
494       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 80), 4, 3, 32);
495    defs->object_to_world[1] =
496       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 92), 4, 3, 32);
497    defs->object_to_world[2] =
498       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 104), 4, 3, 32);
499    defs->world_to_object[3] =
500       nir_load_global(b, nir_iadd_imm(b, leaf_addr, 116), 4, 3, 32);
501 }
502 
503 #endif /* BRW_NIR_RT_BUILDER_H */
504