1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BRW_NIR_RT_BUILDER_H
25 #define BRW_NIR_RT_BUILDER_H
26 
27 /* This file provides helpers to access memory based data structures that the
28  * RT hardware reads/writes and their locations.
29  *
30  * See also "Memory Based Data Structures for Ray Tracing" (BSpec 47547) and
31  * "Ray Tracing Address Computation for Memory Resident Structures" (BSpec
32  * 47550).
33  */
34 
35 #include "brw_rt.h"
36 #include "nir_builder.h"
37 
38 #define is_access_for_builder(b) \
39    ((b)->shader->info.stage == MESA_SHADER_FRAGMENT ? \
40     ACCESS_INCLUDE_HELPERS : 0)
41 
42 static inline nir_ssa_def *
brw_nir_rt_load(nir_builder * b,nir_ssa_def * addr,unsigned align,unsigned components,unsigned bit_size)43 brw_nir_rt_load(nir_builder *b, nir_ssa_def *addr, unsigned align,
44                 unsigned components, unsigned bit_size)
45 {
46    return nir_build_load_global(b, components, bit_size, addr,
47                                 .align_mul = align,
48                                 .access = is_access_for_builder(b));
49 }
50 
51 static inline void
brw_nir_rt_store(nir_builder * b,nir_ssa_def * addr,unsigned align,nir_ssa_def * value,unsigned write_mask)52 brw_nir_rt_store(nir_builder *b, nir_ssa_def *addr, unsigned align,
53                  nir_ssa_def *value, unsigned write_mask)
54 {
55    nir_build_store_global(b, value, addr,
56                           .align_mul = align,
57                           .write_mask = (write_mask) &
58                                         BITFIELD_MASK(value->num_components),
59                           .access = is_access_for_builder(b));
60 }
61 
62 static inline nir_ssa_def *
brw_nir_rt_load_const(nir_builder * b,unsigned components,nir_ssa_def * addr,nir_ssa_def * pred)63 brw_nir_rt_load_const(nir_builder *b, unsigned components,
64                       nir_ssa_def *addr, nir_ssa_def *pred)
65 {
66    return nir_build_load_global_const_block_intel(b, components, addr, pred);
67 }
68 
69 static inline nir_ssa_def *
brw_load_btd_dss_id(nir_builder * b)70 brw_load_btd_dss_id(nir_builder *b)
71 {
72    return nir_build_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_DSS);
73 }
74 
75 static inline nir_ssa_def *
brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder * b,const struct intel_device_info * devinfo)76 brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder *b,
77                                        const struct intel_device_info *devinfo)
78 {
79    return nir_imm_int(b, devinfo->num_thread_per_eu *
80                          devinfo->max_eus_per_subslice *
81                          16 /* The RT computation is based off SIMD16 */);
82 }
83 
84 static inline nir_ssa_def *
brw_load_eu_thread_simd(nir_builder * b)85 brw_load_eu_thread_simd(nir_builder *b)
86 {
87    return nir_build_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_EU_THREAD_SIMD);
88 }
89 
90 static inline nir_ssa_def *
brw_nir_rt_async_stack_id(nir_builder * b)91 brw_nir_rt_async_stack_id(nir_builder *b)
92 {
93    assert(gl_shader_stage_is_callable(b->shader->info.stage) ||
94           b->shader->info.stage == MESA_SHADER_RAYGEN);
95    return nir_iadd(b, nir_umul_32x16(b, nir_load_ray_num_dss_rt_stacks_intel(b),
96                                         brw_load_btd_dss_id(b)),
97                       nir_load_btd_stack_id_intel(b));
98 }
99 
100 static inline nir_ssa_def *
brw_nir_rt_sync_stack_id(nir_builder * b)101 brw_nir_rt_sync_stack_id(nir_builder *b)
102 {
103    return brw_load_eu_thread_simd(b);
104 }
105 
106 /* We have our own load/store scratch helpers because they emit a global
107  * memory read or write based on the scratch_base_ptr system value rather
108  * than a load/store_scratch intrinsic.
109  */
110 static inline nir_ssa_def *
brw_nir_rt_load_scratch(nir_builder * b,uint32_t offset,unsigned align,unsigned num_components,unsigned bit_size)111 brw_nir_rt_load_scratch(nir_builder *b, uint32_t offset, unsigned align,
112                         unsigned num_components, unsigned bit_size)
113 {
114    nir_ssa_def *addr =
115       nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
116    return brw_nir_rt_load(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
117                              num_components, bit_size);
118 }
119 
120 static inline void
brw_nir_rt_store_scratch(nir_builder * b,uint32_t offset,unsigned align,nir_ssa_def * value,nir_component_mask_t write_mask)121 brw_nir_rt_store_scratch(nir_builder *b, uint32_t offset, unsigned align,
122                          nir_ssa_def *value, nir_component_mask_t write_mask)
123 {
124    nir_ssa_def *addr =
125       nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 64, 1), offset);
126    brw_nir_rt_store(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
127                     value, write_mask);
128 }
129 
130 static inline void
brw_nir_btd_spawn(nir_builder * b,nir_ssa_def * record_addr)131 brw_nir_btd_spawn(nir_builder *b, nir_ssa_def *record_addr)
132 {
133    nir_btd_spawn_intel(b, nir_load_btd_global_arg_addr_intel(b), record_addr);
134 }
135 
136 static inline void
brw_nir_btd_retire(nir_builder * b)137 brw_nir_btd_retire(nir_builder *b)
138 {
139    nir_btd_retire_intel(b);
140 }
141 
142 /** This is a pseudo-op which does a bindless return
143  *
144  * It loads the return address from the stack and calls btd_spawn to spawn the
145  * resume shader.
146  */
147 static inline void
brw_nir_btd_return(struct nir_builder * b)148 brw_nir_btd_return(struct nir_builder *b)
149 {
150    assert(b->shader->scratch_size == BRW_BTD_STACK_CALLEE_DATA_SIZE);
151    nir_ssa_def *resume_addr =
152       brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET,
153                               8 /* align */, 1, 64);
154    brw_nir_btd_spawn(b, resume_addr);
155 }
156 
157 static inline void
assert_def_size(nir_ssa_def * def,unsigned num_components,unsigned bit_size)158 assert_def_size(nir_ssa_def *def, unsigned num_components, unsigned bit_size)
159 {
160    assert(def->num_components == num_components);
161    assert(def->bit_size == bit_size);
162 }
163 
164 static inline nir_ssa_def *
brw_nir_num_rt_stacks(nir_builder * b,const struct intel_device_info * devinfo)165 brw_nir_num_rt_stacks(nir_builder *b,
166                       const struct intel_device_info *devinfo)
167 {
168    return nir_imul_imm(b, nir_load_ray_num_dss_rt_stacks_intel(b),
169                           intel_device_info_num_dual_subslices(devinfo));
170 }
171 
172 static inline nir_ssa_def *
brw_nir_rt_sw_hotzone_addr(nir_builder * b,const struct intel_device_info * devinfo)173 brw_nir_rt_sw_hotzone_addr(nir_builder *b,
174                            const struct intel_device_info *devinfo)
175 {
176    nir_ssa_def *offset32 =
177       nir_imul_imm(b, brw_nir_rt_async_stack_id(b),
178                       BRW_RT_SIZEOF_HOTZONE);
179 
180    offset32 = nir_iadd(b, offset32, nir_ineg(b,
181       nir_imul_imm(b, brw_nir_num_rt_stacks(b, devinfo),
182                       BRW_RT_SIZEOF_HOTZONE)));
183 
184    return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
185                       nir_i2i64(b, offset32));
186 }
187 
188 static inline nir_ssa_def *
brw_nir_rt_sync_stack_addr(nir_builder * b,nir_ssa_def * base_mem_addr,const struct intel_device_info * devinfo)189 brw_nir_rt_sync_stack_addr(nir_builder *b,
190                            nir_ssa_def *base_mem_addr,
191                            const struct intel_device_info *devinfo)
192 {
193    /* For Ray queries (Synchronous Ray Tracing), the formula is similar but
194     * goes down from rtMemBasePtr :
195     *
196     *    syncBase  = RTDispatchGlobals.rtMemBasePtr
197     *              - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1)
198     *              * syncStackSize
199     *
200     * We assume that we can calculate a 32-bit offset first and then add it
201     * to the 64-bit base address at the end.
202     */
203    nir_ssa_def *offset32 =
204       nir_imul(b,
205                nir_iadd(b,
206                         nir_imul(b, brw_load_btd_dss_id(b),
207                                     brw_nir_rt_load_num_simd_lanes_per_dss(b, devinfo)),
208                         nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
209                nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY));
210    return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));
211 }
212 
213 static inline nir_ssa_def *
brw_nir_rt_stack_addr(nir_builder * b)214 brw_nir_rt_stack_addr(nir_builder *b)
215 {
216    /* From the BSpec "Address Computation for Memory Based Data Structures:
217     * Ray and TraversalStack (Async Ray Tracing)":
218     *
219     *    stackBase = RTDispatchGlobals.rtMemBasePtr
220     *              + (DSSID * RTDispatchGlobals.numDSSRTStacks + stackID)
221     *              * RTDispatchGlobals.stackSizePerRay // 64B aligned
222     *
223     * We assume that we can calculate a 32-bit offset first and then add it
224     * to the 64-bit base address at the end.
225     */
226    nir_ssa_def *offset32 =
227       nir_imul(b, brw_nir_rt_async_stack_id(b),
228                   nir_load_ray_hw_stack_size_intel(b));
229    return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
230                       nir_u2u64(b, offset32));
231 }
232 
233 static inline nir_ssa_def *
brw_nir_rt_mem_hit_addr_from_addr(nir_builder * b,nir_ssa_def * stack_addr,bool committed)234 brw_nir_rt_mem_hit_addr_from_addr(nir_builder *b,
235                         nir_ssa_def *stack_addr,
236                         bool committed)
237 {
238    return nir_iadd_imm(b, stack_addr, committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
239 }
240 
241 static inline nir_ssa_def *
brw_nir_rt_mem_hit_addr(nir_builder * b,bool committed)242 brw_nir_rt_mem_hit_addr(nir_builder *b, bool committed)
243 {
244    return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
245                           committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
246 }
247 
248 static inline nir_ssa_def *
brw_nir_rt_hit_attrib_data_addr(nir_builder * b)249 brw_nir_rt_hit_attrib_data_addr(nir_builder *b)
250 {
251    return nir_iadd_imm(b, brw_nir_rt_stack_addr(b),
252                           BRW_RT_OFFSETOF_HIT_ATTRIB_DATA);
253 }
254 
255 static inline nir_ssa_def *
brw_nir_rt_mem_ray_addr(nir_builder * b,nir_ssa_def * stack_addr,enum brw_rt_bvh_level bvh_level)256 brw_nir_rt_mem_ray_addr(nir_builder *b,
257                         nir_ssa_def *stack_addr,
258                         enum brw_rt_bvh_level bvh_level)
259 {
260    /* From the BSpec "Address Computation for Memory Based Data Structures:
261     * Ray and TraversalStack (Async Ray Tracing)":
262     *
263     *    rayBase = stackBase + sizeof(HitInfo) * 2 // 64B aligned
264     *    rayPtr  = rayBase + bvhLevel * sizeof(Ray); // 64B aligned
265     *
266     * In Vulkan, we always have exactly two levels of BVH: World and Object.
267     */
268    uint32_t offset = BRW_RT_SIZEOF_HIT_INFO * 2 +
269                      bvh_level * BRW_RT_SIZEOF_RAY;
270    return nir_iadd_imm(b, stack_addr, offset);
271 }
272 
273 static inline nir_ssa_def *
brw_nir_rt_sw_stack_addr(nir_builder * b,const struct intel_device_info * devinfo)274 brw_nir_rt_sw_stack_addr(nir_builder *b,
275                          const struct intel_device_info *devinfo)
276 {
277    nir_ssa_def *addr = nir_load_ray_base_mem_addr_intel(b);
278 
279    nir_ssa_def *offset32 = nir_imul(b, brw_nir_num_rt_stacks(b, devinfo),
280                                        nir_load_ray_hw_stack_size_intel(b));
281    addr = nir_iadd(b, addr, nir_u2u64(b, offset32));
282 
283    nir_ssa_def *offset_in_stack =
284       nir_imul(b, nir_u2u64(b, brw_nir_rt_async_stack_id(b)),
285                   nir_u2u64(b, nir_load_ray_sw_stack_size_intel(b)));
286 
287    return nir_iadd(b, addr, offset_in_stack);
288 }
289 
290 static inline nir_ssa_def *
nir_unpack_64_4x16_split_z(nir_builder * b,nir_ssa_def * val)291 nir_unpack_64_4x16_split_z(nir_builder *b, nir_ssa_def *val)
292 {
293    return nir_unpack_32_2x16_split_x(b, nir_unpack_64_2x32_split_y(b, val));
294 }
295 
296 struct brw_nir_rt_globals_defs {
297    nir_ssa_def *base_mem_addr;
298    nir_ssa_def *call_stack_handler_addr;
299    nir_ssa_def *hw_stack_size;
300    nir_ssa_def *num_dss_rt_stacks;
301    nir_ssa_def *hit_sbt_addr;
302    nir_ssa_def *hit_sbt_stride;
303    nir_ssa_def *miss_sbt_addr;
304    nir_ssa_def *miss_sbt_stride;
305    nir_ssa_def *sw_stack_size;
306    nir_ssa_def *launch_size;
307    nir_ssa_def *call_sbt_addr;
308    nir_ssa_def *call_sbt_stride;
309    nir_ssa_def *resume_sbt_addr;
310 };
311 
312 static inline void
brw_nir_rt_load_globals_addr(nir_builder * b,struct brw_nir_rt_globals_defs * defs,nir_ssa_def * addr)313 brw_nir_rt_load_globals_addr(nir_builder *b,
314                              struct brw_nir_rt_globals_defs *defs,
315                              nir_ssa_def *addr)
316 {
317    nir_ssa_def *data;
318    data = brw_nir_rt_load_const(b, 16, addr, nir_imm_true(b));
319    defs->base_mem_addr = nir_pack_64_2x32(b, nir_channels(b, data, 0x3));
320 
321    defs->call_stack_handler_addr =
322       nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
323 
324    defs->hw_stack_size = nir_channel(b, data, 4);
325    defs->num_dss_rt_stacks = nir_iand_imm(b, nir_channel(b, data, 5), 0xffff);
326    defs->hit_sbt_addr =
327       nir_pack_64_2x32_split(b, nir_channel(b, data, 8),
328                                 nir_extract_i16(b, nir_channel(b, data, 9),
329                                                    nir_imm_int(b, 0)));
330    defs->hit_sbt_stride =
331       nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 9));
332    defs->miss_sbt_addr =
333       nir_pack_64_2x32_split(b, nir_channel(b, data, 10),
334                                 nir_extract_i16(b, nir_channel(b, data, 11),
335                                                    nir_imm_int(b, 0)));
336    defs->miss_sbt_stride =
337       nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 11));
338    defs->sw_stack_size = nir_channel(b, data, 12);
339    defs->launch_size = nir_channels(b, data, 0x7u << 13);
340 
341    data = brw_nir_rt_load_const(b, 8, nir_iadd_imm(b, addr, 64), nir_imm_true(b));
342    defs->call_sbt_addr =
343       nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
344                                 nir_extract_i16(b, nir_channel(b, data, 1),
345                                                    nir_imm_int(b, 0)));
346    defs->call_sbt_stride =
347       nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
348 
349    defs->resume_sbt_addr =
350       nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
351 }
352 
353 static inline void
brw_nir_rt_load_globals(nir_builder * b,struct brw_nir_rt_globals_defs * defs)354 brw_nir_rt_load_globals(nir_builder *b,
355                         struct brw_nir_rt_globals_defs *defs)
356 {
357    brw_nir_rt_load_globals_addr(b, defs, nir_load_btd_global_arg_addr_intel(b));
358 }
359 
360 static inline nir_ssa_def *
brw_nir_rt_unpack_leaf_ptr(nir_builder * b,nir_ssa_def * vec2)361 brw_nir_rt_unpack_leaf_ptr(nir_builder *b, nir_ssa_def *vec2)
362 {
363    /* Hit record leaf pointers are 42-bit and assumed to be in 64B chunks.
364     * This leaves 22 bits at the top for other stuff.
365     */
366    nir_ssa_def *ptr64 = nir_imul_imm(b, nir_pack_64_2x32(b, vec2), 64);
367 
368    /* The top 16 bits (remember, we shifted by 6 already) contain garbage
369     * that we need to get rid of.
370     */
371    nir_ssa_def *ptr_lo = nir_unpack_64_2x32_split_x(b, ptr64);
372    nir_ssa_def *ptr_hi = nir_unpack_64_2x32_split_y(b, ptr64);
373    ptr_hi = nir_extract_i16(b, ptr_hi, nir_imm_int(b, 0));
374    return nir_pack_64_2x32_split(b, ptr_lo, ptr_hi);
375 }
376 
377 struct brw_nir_rt_mem_hit_defs {
378    nir_ssa_def *t;
379    nir_ssa_def *tri_bary; /**< Only valid for triangle geometry */
380    nir_ssa_def *aabb_hit_kind; /**< Only valid for AABB geometry */
381    nir_ssa_def *valid;
382    nir_ssa_def *leaf_type;
383    nir_ssa_def *prim_leaf_index;
384    nir_ssa_def *bvh_level;
385    nir_ssa_def *front_face;
386    nir_ssa_def *done; /**< Only for ray queries */
387    nir_ssa_def *prim_leaf_ptr;
388    nir_ssa_def *inst_leaf_ptr;
389 };
390 
391 static inline void
brw_nir_rt_load_mem_hit_from_addr(nir_builder * b,struct brw_nir_rt_mem_hit_defs * defs,nir_ssa_def * stack_addr,bool committed)392 brw_nir_rt_load_mem_hit_from_addr(nir_builder *b,
393                                   struct brw_nir_rt_mem_hit_defs *defs,
394                                   nir_ssa_def *stack_addr,
395                                   bool committed)
396 {
397    nir_ssa_def *hit_addr =
398       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, committed);
399 
400    nir_ssa_def *data = brw_nir_rt_load(b, hit_addr, 16, 4, 32);
401    defs->t = nir_channel(b, data, 0);
402    defs->aabb_hit_kind = nir_channel(b, data, 1);
403    defs->tri_bary = nir_channels(b, data, 0x6);
404    nir_ssa_def *bitfield = nir_channel(b, data, 3);
405    defs->valid = nir_i2b(b, nir_iand_imm(b, bitfield, 1u << 16));
406    defs->leaf_type =
407       nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 17), nir_imm_int(b, 3));
408    defs->prim_leaf_index =
409       nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 20), nir_imm_int(b, 4));
410    defs->bvh_level =
411       nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 24), nir_imm_int(b, 3));
412    defs->front_face = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 27));
413    defs->done = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 28));
414 
415    data = brw_nir_rt_load(b, nir_iadd_imm(b, hit_addr, 16), 16, 4, 32);
416    defs->prim_leaf_ptr =
417       brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 0));
418    defs->inst_leaf_ptr =
419       brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
420 }
421 
422 static inline void
brw_nir_rt_init_mem_hit_at_addr(nir_builder * b,nir_ssa_def * stack_addr,bool committed,nir_ssa_def * t_max)423 brw_nir_rt_init_mem_hit_at_addr(nir_builder *b,
424                                 nir_ssa_def *stack_addr,
425                                 bool committed,
426                                 nir_ssa_def *t_max)
427 {
428    nir_ssa_def *mem_hit_addr =
429       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, committed);
430 
431    /* Set the t_max value from the ray initialization */
432    nir_ssa_def *hit_t_addr = mem_hit_addr;
433    brw_nir_rt_store(b, hit_t_addr, 4, t_max, 0x1);
434 
435    /* Clear all the flags packed behind primIndexDelta */
436    nir_ssa_def *state_addr = nir_iadd_imm(b, mem_hit_addr, 12);
437    brw_nir_rt_store(b, state_addr, 4, nir_imm_int(b, 0), 0x1);
438 }
439 
440 static inline void
brw_nir_rt_load_mem_hit(nir_builder * b,struct brw_nir_rt_mem_hit_defs * defs,bool committed)441 brw_nir_rt_load_mem_hit(nir_builder *b,
442                         struct brw_nir_rt_mem_hit_defs *defs,
443                         bool committed)
444 {
445    brw_nir_rt_load_mem_hit_from_addr(b, defs, brw_nir_rt_stack_addr(b),
446                                      committed);
447 }
448 
449 static inline void
brw_nir_memcpy_global(nir_builder * b,nir_ssa_def * dst_addr,uint32_t dst_align,nir_ssa_def * src_addr,uint32_t src_align,uint32_t size)450 brw_nir_memcpy_global(nir_builder *b,
451                       nir_ssa_def *dst_addr, uint32_t dst_align,
452                       nir_ssa_def *src_addr, uint32_t src_align,
453                       uint32_t size)
454 {
455    /* We're going to copy in 16B chunks */
456    assert(size % 16 == 0);
457    dst_align = MIN2(dst_align, 16);
458    src_align = MIN2(src_align, 16);
459 
460    for (unsigned offset = 0; offset < size; offset += 16) {
461       nir_ssa_def *data =
462          brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), src_align,
463                          4, 32);
464       brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
465                        data, 0xf /* write_mask */);
466    }
467 }
468 
469 static inline void
brw_nir_memclear_global(nir_builder * b,nir_ssa_def * dst_addr,uint32_t dst_align,uint32_t size)470 brw_nir_memclear_global(nir_builder *b,
471                         nir_ssa_def *dst_addr, uint32_t dst_align,
472                         uint32_t size)
473 {
474    /* We're going to copy in 16B chunks */
475    assert(size % 16 == 0);
476    dst_align = MIN2(dst_align, 16);
477 
478    nir_ssa_def *zero = nir_imm_ivec4(b, 0, 0, 0, 0);
479    for (unsigned offset = 0; offset < size; offset += 16) {
480       brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
481                        zero, 0xf /* write_mask */);
482    }
483 }
484 
485 static inline nir_ssa_def *
brw_nir_rt_query_done(nir_builder * b,nir_ssa_def * stack_addr)486 brw_nir_rt_query_done(nir_builder *b, nir_ssa_def *stack_addr)
487 {
488    struct brw_nir_rt_mem_hit_defs hit_in = {};
489    brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr,
490                                      false /* committed */);
491 
492    return hit_in.done;
493 }
494 
495 static inline void
brw_nir_rt_set_dword_bit_at(nir_builder * b,nir_ssa_def * addr,uint32_t addr_offset,uint32_t bit)496 brw_nir_rt_set_dword_bit_at(nir_builder *b,
497                             nir_ssa_def *addr,
498                             uint32_t addr_offset,
499                             uint32_t bit)
500 {
501    nir_ssa_def *dword_addr = nir_iadd_imm(b, addr, addr_offset);
502    nir_ssa_def *dword = brw_nir_rt_load(b, dword_addr, 4, 1, 32);
503    brw_nir_rt_store(b, dword_addr, 4, nir_ior_imm(b, dword, 1u << bit), 0x1);
504 }
505 
506 static inline void
brw_nir_rt_query_mark_done(nir_builder * b,nir_ssa_def * stack_addr)507 brw_nir_rt_query_mark_done(nir_builder *b, nir_ssa_def *stack_addr)
508 {
509    brw_nir_rt_set_dword_bit_at(b,
510                                brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
511                                                                  false /* committed */),
512                                4 * 3 /* dword offset */, 28 /* bit */);
513 }
514 
515 /* This helper clears the 3rd dword of the MemHit structure where the valid
516  * bit is located.
517  */
518 static inline void
brw_nir_rt_query_mark_init(nir_builder * b,nir_ssa_def * stack_addr)519 brw_nir_rt_query_mark_init(nir_builder *b, nir_ssa_def *stack_addr)
520 {
521    nir_ssa_def *dword_addr;
522 
523    for (uint32_t i = 0; i < 2; i++) {
524       dword_addr =
525          nir_iadd_imm(b,
526                       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr,
527                                                         i == 0 /* committed */),
528                       4 * 3 /* dword offset */);
529       brw_nir_rt_store(b, dword_addr, 4, nir_imm_int(b, 0), 0x1);
530    }
531 }
532 
533 /* This helper is pretty much a memcpy of uncommitted into committed hit
534  * structure, just adding the valid bit.
535  */
536 static inline void
brw_nir_rt_commit_hit_addr(nir_builder * b,nir_ssa_def * stack_addr)537 brw_nir_rt_commit_hit_addr(nir_builder *b, nir_ssa_def *stack_addr)
538 {
539    nir_ssa_def *dst_addr =
540       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
541    nir_ssa_def *src_addr =
542       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
543 
544    for (unsigned offset = 0; offset < BRW_RT_SIZEOF_HIT_INFO; offset += 16) {
545       nir_ssa_def *data =
546          brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, offset), 16, 4, 32);
547 
548       if (offset == 0) {
549          data = nir_vec4(b,
550                          nir_channel(b, data, 0),
551                          nir_channel(b, data, 1),
552                          nir_channel(b, data, 2),
553                          nir_ior_imm(b,
554                                      nir_channel(b, data, 3),
555                                      0x1 << 16 /* valid */));
556 
557          /* Also write the potential hit as we change it. */
558          brw_nir_rt_store(b, nir_iadd_imm(b, src_addr, offset), 16,
559                           data, 0xf /* write_mask */);
560       }
561 
562       brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, offset), 16,
563                        data, 0xf /* write_mask */);
564    }
565 }
566 
567 static inline void
brw_nir_rt_commit_hit(nir_builder * b)568 brw_nir_rt_commit_hit(nir_builder *b)
569 {
570    nir_ssa_def *stack_addr = brw_nir_rt_stack_addr(b);
571    brw_nir_rt_commit_hit_addr(b, stack_addr);
572 }
573 
574 static inline void
brw_nir_rt_generate_hit_addr(nir_builder * b,nir_ssa_def * stack_addr,nir_ssa_def * t_val)575 brw_nir_rt_generate_hit_addr(nir_builder *b, nir_ssa_def *stack_addr, nir_ssa_def *t_val)
576 {
577    nir_ssa_def *dst_addr =
578       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true /* committed */);
579    nir_ssa_def *src_addr =
580       brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false /* committed */);
581 
582    /* Load 2 vec4 */
583    nir_ssa_def *potential_data[2] = {
584       brw_nir_rt_load(b, src_addr, 16, 4, 32),
585       brw_nir_rt_load(b, nir_iadd_imm(b, src_addr, 16), 16, 4, 32),
586    };
587 
588    /* Update the potential hit distance */
589    brw_nir_rt_store(b, src_addr, 4, t_val, 0x1);
590    /* Also mark the potential hit as valid */
591    brw_nir_rt_store(b, nir_iadd_imm(b, src_addr, 12), 4,
592                     nir_ior_imm(b, nir_channel(b, potential_data[0], 3),
593                                    (0x1 << 16) /* valid */), 0x1);
594 
595    /* Now write the committed hit. */
596    nir_ssa_def *committed_data[2] = {
597       nir_vec4(b,
598                t_val,
599                nir_imm_float(b, 0.0f), /* barycentric */
600                nir_imm_float(b, 0.0f), /* barycentric */
601                nir_ior_imm(b,
602                            /* Just keep leaf_type */
603                            nir_iand_imm(b, nir_channel(b, potential_data[0], 3), 0x0000e000),
604                            (0x1 << 16) /* valid */ |
605                            (BRW_RT_BVH_LEVEL_OBJECT << 5))),
606       potential_data[1],
607    };
608 
609    brw_nir_rt_store(b, dst_addr, 16, committed_data[0], 0xf /* write_mask */);
610    brw_nir_rt_store(b, nir_iadd_imm(b, dst_addr, 16), 16,
611                     committed_data[1], 0xf /* write_mask */);
612 }
613 
614 struct brw_nir_rt_mem_ray_defs {
615    nir_ssa_def *orig;
616    nir_ssa_def *dir;
617    nir_ssa_def *t_near;
618    nir_ssa_def *t_far;
619    nir_ssa_def *root_node_ptr;
620    nir_ssa_def *ray_flags;
621    nir_ssa_def *hit_group_sr_base_ptr;
622    nir_ssa_def *hit_group_sr_stride;
623    nir_ssa_def *miss_sr_ptr;
624    nir_ssa_def *shader_index_multiplier;
625    nir_ssa_def *inst_leaf_ptr;
626    nir_ssa_def *ray_mask;
627 };
628 
629 static inline void
brw_nir_rt_store_mem_ray_query_at_addr(nir_builder * b,nir_ssa_def * ray_addr,const struct brw_nir_rt_mem_ray_defs * defs)630 brw_nir_rt_store_mem_ray_query_at_addr(nir_builder *b,
631                                        nir_ssa_def *ray_addr,
632                                        const struct brw_nir_rt_mem_ray_defs *defs)
633 {
634    assert_def_size(defs->orig, 3, 32);
635    assert_def_size(defs->dir, 3, 32);
636    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
637       nir_vec4(b, nir_channel(b, defs->orig, 0),
638                   nir_channel(b, defs->orig, 1),
639                   nir_channel(b, defs->orig, 2),
640                   nir_channel(b, defs->dir, 0)),
641       ~0 /* write mask */);
642 
643    assert_def_size(defs->t_near, 1, 32);
644    assert_def_size(defs->t_far, 1, 32);
645    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
646       nir_vec4(b, nir_channel(b, defs->dir, 1),
647                   nir_channel(b, defs->dir, 2),
648                   defs->t_near,
649                   defs->t_far),
650       ~0 /* write mask */);
651 
652    assert_def_size(defs->root_node_ptr, 1, 64);
653    assert_def_size(defs->ray_flags, 1, 16);
654    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
655       nir_vec2(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
656                   nir_pack_32_2x16_split(b,
657                      nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
658                      defs->ray_flags)),
659       0x3 /* write mask */);
660 
661    /* leaf_ptr is optional */
662    nir_ssa_def *inst_leaf_ptr;
663    if (defs->inst_leaf_ptr) {
664       inst_leaf_ptr = defs->inst_leaf_ptr;
665    } else {
666       inst_leaf_ptr = nir_imm_int64(b, 0);
667    }
668 
669    assert_def_size(inst_leaf_ptr, 1, 64);
670    assert_def_size(defs->ray_mask, 1, 32);
671    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 56), 8,
672       nir_vec2(b, nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
673                   nir_pack_32_2x16_split(b,
674                      nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
675                      nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
676       ~0 /* write mask */);
677 }
678 
679 static inline void
brw_nir_rt_store_mem_ray(nir_builder * b,const struct brw_nir_rt_mem_ray_defs * defs,enum brw_rt_bvh_level bvh_level)680 brw_nir_rt_store_mem_ray(nir_builder *b,
681                          const struct brw_nir_rt_mem_ray_defs *defs,
682                          enum brw_rt_bvh_level bvh_level)
683 {
684    nir_ssa_def *ray_addr =
685       brw_nir_rt_mem_ray_addr(b, brw_nir_rt_stack_addr(b), bvh_level);
686 
687    assert_def_size(defs->orig, 3, 32);
688    assert_def_size(defs->dir, 3, 32);
689    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 0), 16,
690       nir_vec4(b, nir_channel(b, defs->orig, 0),
691                   nir_channel(b, defs->orig, 1),
692                   nir_channel(b, defs->orig, 2),
693                   nir_channel(b, defs->dir, 0)),
694       ~0 /* write mask */);
695 
696    assert_def_size(defs->t_near, 1, 32);
697    assert_def_size(defs->t_far, 1, 32);
698    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 16), 16,
699       nir_vec4(b, nir_channel(b, defs->dir, 1),
700                   nir_channel(b, defs->dir, 2),
701                   defs->t_near,
702                   defs->t_far),
703       ~0 /* write mask */);
704 
705    assert_def_size(defs->root_node_ptr, 1, 64);
706    assert_def_size(defs->ray_flags, 1, 16);
707    assert_def_size(defs->hit_group_sr_base_ptr, 1, 64);
708    assert_def_size(defs->hit_group_sr_stride, 1, 16);
709    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 32), 16,
710       nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
711                   nir_pack_32_2x16_split(b,
712                      nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
713                      defs->ray_flags),
714                   nir_unpack_64_2x32_split_x(b, defs->hit_group_sr_base_ptr),
715                   nir_pack_32_2x16_split(b,
716                      nir_unpack_64_4x16_split_z(b, defs->hit_group_sr_base_ptr),
717                      defs->hit_group_sr_stride)),
718       ~0 /* write mask */);
719 
720    /* leaf_ptr is optional */
721    nir_ssa_def *inst_leaf_ptr;
722    if (defs->inst_leaf_ptr) {
723       inst_leaf_ptr = defs->inst_leaf_ptr;
724    } else {
725       inst_leaf_ptr = nir_imm_int64(b, 0);
726    }
727 
728    assert_def_size(defs->miss_sr_ptr, 1, 64);
729    assert_def_size(defs->shader_index_multiplier, 1, 32);
730    assert_def_size(inst_leaf_ptr, 1, 64);
731    assert_def_size(defs->ray_mask, 1, 32);
732    brw_nir_rt_store(b, nir_iadd_imm(b, ray_addr, 48), 16,
733       nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->miss_sr_ptr),
734                   nir_pack_32_2x16_split(b,
735                      nir_unpack_64_4x16_split_z(b, defs->miss_sr_ptr),
736                      nir_unpack_32_2x16_split_x(b,
737                         nir_ishl(b, defs->shader_index_multiplier,
738                                     nir_imm_int(b, 8)))),
739                   nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
740                   nir_pack_32_2x16_split(b,
741                      nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
742                      nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
743       ~0 /* write mask */);
744 }
745 
746 static inline void
brw_nir_rt_load_mem_ray_from_addr(nir_builder * b,struct brw_nir_rt_mem_ray_defs * defs,nir_ssa_def * ray_base_addr,enum brw_rt_bvh_level bvh_level)747 brw_nir_rt_load_mem_ray_from_addr(nir_builder *b,
748                                   struct brw_nir_rt_mem_ray_defs *defs,
749                                   nir_ssa_def *ray_base_addr,
750                                   enum brw_rt_bvh_level bvh_level)
751 {
752    nir_ssa_def *ray_addr = brw_nir_rt_mem_ray_addr(b,
753                                                    ray_base_addr,
754                                                    bvh_level);
755 
756    nir_ssa_def *data[4] = {
757       brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr,  0), 16, 4, 32),
758       brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 16), 16, 4, 32),
759       brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 32), 16, 4, 32),
760       brw_nir_rt_load(b, nir_iadd_imm(b, ray_addr, 48), 16, 4, 32),
761    };
762 
763    defs->orig = nir_channels(b, data[0], 0x7);
764    defs->dir = nir_vec3(b, nir_channel(b, data[0], 3),
765                            nir_channel(b, data[1], 0),
766                            nir_channel(b, data[1], 1));
767    defs->t_near = nir_channel(b, data[1], 2);
768    defs->t_far = nir_channel(b, data[1], 3);
769    defs->root_node_ptr =
770       nir_pack_64_2x32_split(b, nir_channel(b, data[2], 0),
771                                 nir_extract_i16(b, nir_channel(b, data[2], 1),
772                                                    nir_imm_int(b, 0)));
773    defs->ray_flags =
774       nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 1));
775    defs->hit_group_sr_base_ptr =
776       nir_pack_64_2x32_split(b, nir_channel(b, data[2], 2),
777                                 nir_extract_i16(b, nir_channel(b, data[2], 3),
778                                                    nir_imm_int(b, 0)));
779    defs->hit_group_sr_stride =
780       nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 3));
781    defs->miss_sr_ptr =
782       nir_pack_64_2x32_split(b, nir_channel(b, data[3], 0),
783                                 nir_extract_i16(b, nir_channel(b, data[3], 1),
784                                                    nir_imm_int(b, 0)));
785    defs->shader_index_multiplier =
786       nir_ushr(b, nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 1)),
787                   nir_imm_int(b, 8));
788    defs->inst_leaf_ptr =
789       nir_pack_64_2x32_split(b, nir_channel(b, data[3], 2),
790                                 nir_extract_i16(b, nir_channel(b, data[3], 3),
791                                                    nir_imm_int(b, 0)));
792    defs->ray_mask =
793       nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 3));
794 }
795 
796 static inline void
brw_nir_rt_load_mem_ray(nir_builder * b,struct brw_nir_rt_mem_ray_defs * defs,enum brw_rt_bvh_level bvh_level)797 brw_nir_rt_load_mem_ray(nir_builder *b,
798                         struct brw_nir_rt_mem_ray_defs *defs,
799                         enum brw_rt_bvh_level bvh_level)
800 {
801    brw_nir_rt_load_mem_ray_from_addr(b, defs, brw_nir_rt_stack_addr(b),
802                                      bvh_level);
803 }
804 
805 struct brw_nir_rt_bvh_instance_leaf_defs {
806    nir_ssa_def *shader_index;
807    nir_ssa_def *contribution_to_hit_group_index;
808    nir_ssa_def *world_to_object[4];
809    nir_ssa_def *instance_id;
810    nir_ssa_def *instance_index;
811    nir_ssa_def *object_to_world[4];
812 };
813 
814 static inline void
brw_nir_rt_load_bvh_instance_leaf(nir_builder * b,struct brw_nir_rt_bvh_instance_leaf_defs * defs,nir_ssa_def * leaf_addr)815 brw_nir_rt_load_bvh_instance_leaf(nir_builder *b,
816                                   struct brw_nir_rt_bvh_instance_leaf_defs *defs,
817                                   nir_ssa_def *leaf_addr)
818 {
819    defs->shader_index =
820       nir_iand_imm(b, brw_nir_rt_load(b, leaf_addr, 4, 1, 32), (1 << 24) - 1);
821    defs->contribution_to_hit_group_index =
822       nir_iand_imm(b,
823                    brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 4), 4, 1, 32),
824                    (1 << 24) - 1);
825 
826    defs->world_to_object[0] =
827       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 16), 4, 3, 32);
828    defs->world_to_object[1] =
829       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 28), 4, 3, 32);
830    defs->world_to_object[2] =
831       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 40), 4, 3, 32);
832    /* The last column of the matrices is swapped between the two probably
833     * because it makes it easier/faster for hardware somehow.
834     */
835    defs->object_to_world[3] =
836       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 52), 4, 3, 32);
837 
838    nir_ssa_def *data =
839       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 64), 4, 4, 32);
840    defs->instance_id = nir_channel(b, data, 2);
841    defs->instance_index = nir_channel(b, data, 3);
842 
843    defs->object_to_world[0] =
844       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 80), 4, 3, 32);
845    defs->object_to_world[1] =
846       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 92), 4, 3, 32);
847    defs->object_to_world[2] =
848       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 104), 4, 3, 32);
849    defs->world_to_object[3] =
850       brw_nir_rt_load(b, nir_iadd_imm(b, leaf_addr, 116), 4, 3, 32);
851 }
852 
853 struct brw_nir_rt_bvh_primitive_leaf_defs {
854    nir_ssa_def *shader_index;
855    nir_ssa_def *geom_mask;
856    nir_ssa_def *geom_index;
857    nir_ssa_def *type;
858    nir_ssa_def *geom_flags;
859 };
860 
861 static inline void
brw_nir_rt_load_bvh_primitive_leaf(nir_builder * b,struct brw_nir_rt_bvh_primitive_leaf_defs * defs,nir_ssa_def * leaf_addr)862 brw_nir_rt_load_bvh_primitive_leaf(nir_builder *b,
863                                    struct brw_nir_rt_bvh_primitive_leaf_defs *defs,
864                                    nir_ssa_def *leaf_addr)
865 {
866    nir_ssa_def *desc = brw_nir_rt_load(b, leaf_addr, 4, 2, 32);
867 
868    defs->shader_index =
869       nir_ubitfield_extract(b, nir_channel(b, desc, 0),
870                             nir_imm_int(b, 23), nir_imm_int(b, 0));
871    defs->geom_mask =
872       nir_ubitfield_extract(b, nir_channel(b, desc, 0),
873                             nir_imm_int(b, 31), nir_imm_int(b, 24));
874 
875    defs->geom_index =
876       nir_ubitfield_extract(b, nir_channel(b, desc, 1),
877                             nir_imm_int(b, 28), nir_imm_int(b, 0));
878    defs->type =
879       nir_ubitfield_extract(b, nir_channel(b, desc, 1),
880                             nir_imm_int(b, 29), nir_imm_int(b, 29));
881    defs->geom_flags =
882       nir_ubitfield_extract(b, nir_channel(b, desc, 1),
883                             nir_imm_int(b, 31), nir_imm_int(b, 30));
884 }
885 
886 static inline nir_ssa_def *
brw_nir_rt_load_primitive_id_from_hit(nir_builder * b,nir_ssa_def * is_procedural,const struct brw_nir_rt_mem_hit_defs * defs)887 brw_nir_rt_load_primitive_id_from_hit(nir_builder *b,
888                                       nir_ssa_def *is_procedural,
889                                       const struct brw_nir_rt_mem_hit_defs *defs)
890 {
891    if (!is_procedural) {
892       is_procedural =
893          nir_ieq(b, defs->leaf_type,
894                     nir_imm_int(b, BRW_RT_BVH_NODE_TYPE_PROCEDURAL));
895    }
896 
897    /* The IDs are located in the leaf. Take the index of the hit.
898     *
899     * The index in dw[3] for procedural and dw[2] for quad.
900     */
901    nir_ssa_def *offset =
902       nir_bcsel(b, is_procedural,
903                    nir_iadd_imm(b, nir_ishl_imm(b, defs->prim_leaf_index, 2), 12),
904                    nir_imm_int(b, 8));
905    return nir_load_global(b, nir_iadd(b, defs->prim_leaf_ptr,
906                                          nir_u2u64(b, offset)),
907                              4, /* align */ 1, 32);
908 }
909 
910 static inline nir_ssa_def *
brw_nir_rt_acceleration_structure_to_root_node(nir_builder * b,nir_ssa_def * as_addr)911 brw_nir_rt_acceleration_structure_to_root_node(nir_builder *b,
912                                                nir_ssa_def *as_addr)
913 {
914    /* The HW memory structure in which we specify what acceleration structure
915     * to traverse, takes the address to the root node in the acceleration
916     * structure, not the acceleration structure itself. To find that, we have
917     * to read the root node offset from the acceleration structure which is
918     * the first QWord.
919     *
920     * But if the acceleration structure pointer is NULL, then we should return
921     * NULL as root node pointer.
922     */
923    nir_ssa_def *root_node_ptr, *null_node_ptr;
924    nir_push_if(b, nir_ieq(b, as_addr, nir_imm_int64(b, 0)));
925    {
926       null_node_ptr = nir_imm_int64(b, 0);
927    }
928    nir_push_else(b, NULL);
929    {
930       root_node_ptr =
931          nir_iadd(b, as_addr, brw_nir_rt_load(b, as_addr, 256, 1, 64));
932    }
933    nir_pop_if(b, NULL);
934 
935    return nir_if_phi(b, null_node_ptr, root_node_ptr);
936 }
937 
938 #endif /* BRW_NIR_RT_BUILDER_H */
939