1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef MI_BUILDER_H
25 #define MI_BUILDER_H
26 
27 #include "dev/intel_device_info.h"
28 #include "genxml/genX_bits.h"
29 #include "util/bitscan.h"
30 #include "util/fast_idiv_by_const.h"
31 #include "util/u_math.h"
32 
33 #ifndef MI_BUILDER_NUM_ALLOC_GPRS
34 /** The number of GPRs the MI builder is allowed to allocate
35  *
36  * This may be set by a user of this API so that it can reserve some GPRs at
37  * the top end for its own use.
38  */
39 #define MI_BUILDER_NUM_ALLOC_GPRS 16
40 #endif
41 
42 /** These must be defined by the user of the builder
43  *
44  * void *__gen_get_batch_dwords(__gen_user_data *user_data,
45  *                              unsigned num_dwords);
46  *
47  * __gen_address_type
48  * __gen_address_offset(__gen_address_type addr, uint64_t offset);
49  *
50  *
51  * If self-modifying batches are supported, we must be able to pass batch
52  * addresses around as void*s so pinning as well as batch chaining or some
53  * other mechanism for ensuring batch pointers remain valid during building is
54  * required. The following function must also be defined, it returns an
55  * address in canonical form:
56  *
57  * __gen_address_type
58  * __gen_get_batch_address(__gen_user_data *user_data, void *location);
59  *
60  * Also, __gen_combine_address must accept a location value of NULL and return
61  * a fully valid 64-bit address.
62  */
63 
64 /*
65  * Start of the actual MI builder
66  */
67 
68 #define __genxml_cmd_length(cmd) cmd ## _length
69 #define __genxml_cmd_header(cmd) cmd ## _header
70 #define __genxml_cmd_pack(cmd) cmd ## _pack
71 
72 #define mi_builder_pack(b, cmd, dst, name)                          \
73    for (struct cmd name = { __genxml_cmd_header(cmd) },                 \
74         *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
75         __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name),    \
76         _dst = NULL)
77 
78 #define mi_builder_emit(b, cmd, name)                               \
79    mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
80 
81 
82 enum mi_value_type {
83    MI_VALUE_TYPE_IMM,
84    MI_VALUE_TYPE_MEM32,
85    MI_VALUE_TYPE_MEM64,
86    MI_VALUE_TYPE_REG32,
87    MI_VALUE_TYPE_REG64,
88 };
89 
90 struct mi_value {
91    enum mi_value_type type;
92 
93    union {
94       uint64_t imm;
95       __gen_address_type addr;
96       uint32_t reg;
97    };
98 
99 #if GFX_VERx10 >= 75
100    bool invert;
101 #endif
102 };
103 
104 struct mi_reg_num {
105    uint32_t num;
106 #if GFX_VER >= 11
107    bool cs;
108 #endif
109 };
110 
111 static inline struct mi_reg_num
mi_adjust_reg_num(uint32_t reg)112 mi_adjust_reg_num(uint32_t reg)
113 {
114 #if GFX_VER >= 11
115    bool cs = reg >= 0x2000 && reg < 0x4000;
116    return (struct mi_reg_num) {
117       .num = reg - (cs ? 0x2000 : 0),
118       .cs = cs,
119    };
120 #else
121    return (struct mi_reg_num) { .num = reg, };
122 #endif
123 }
124 
125 #if GFX_VER >= 9
126 #define MI_BUILDER_MAX_MATH_DWORDS 256
127 #else
128 #define MI_BUILDER_MAX_MATH_DWORDS 64
129 #endif
130 
131 struct mi_builder {
132    const struct intel_device_info *devinfo;
133    __gen_user_data *user_data;
134 
135 #if GFX_VERx10 >= 75
136    uint32_t gprs;
137    uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
138 
139    unsigned num_math_dwords;
140    uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
141 #endif
142 };
143 
144 static inline void
mi_builder_init(struct mi_builder * b,const struct intel_device_info * devinfo,__gen_user_data * user_data)145 mi_builder_init(struct mi_builder *b,
146                 const struct intel_device_info *devinfo,
147                 __gen_user_data *user_data)
148 {
149    memset(b, 0, sizeof(*b));
150    b->devinfo = devinfo;
151    b->user_data = user_data;
152 
153 #if GFX_VERx10 >= 75
154    b->gprs = 0;
155    b->num_math_dwords = 0;
156 #endif
157 }
158 
159 static inline void
mi_builder_flush_math(struct mi_builder * b)160 mi_builder_flush_math(struct mi_builder *b)
161 {
162 #if GFX_VERx10 >= 75
163    if (b->num_math_dwords == 0)
164       return;
165 
166    uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
167                                                      1 + b->num_math_dwords);
168    mi_builder_pack(b, GENX(MI_MATH), dw, math) {
169       math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
170    }
171    memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
172    b->num_math_dwords = 0;
173 #endif
174 }
175 
176 #define _MI_BUILDER_GPR_BASE 0x2600
177 /* The actual hardware limit on GPRs */
178 #define _MI_BUILDER_NUM_HW_GPRS 16
179 
180 #if GFX_VERx10 >= 75
181 
182 static inline bool
mi_value_is_reg(struct mi_value val)183 mi_value_is_reg(struct mi_value val)
184 {
185    return val.type == MI_VALUE_TYPE_REG32 ||
186           val.type == MI_VALUE_TYPE_REG64;
187 }
188 
189 static inline bool
mi_value_is_gpr(struct mi_value val)190 mi_value_is_gpr(struct mi_value val)
191 {
192    return mi_value_is_reg(val) &&
193           val.reg >= _MI_BUILDER_GPR_BASE &&
194           val.reg < _MI_BUILDER_GPR_BASE +
195                     _MI_BUILDER_NUM_HW_GPRS * 8;
196 }
197 
198 static inline bool
_mi_value_is_allocated_gpr(struct mi_value val)199 _mi_value_is_allocated_gpr(struct mi_value val)
200 {
201    return mi_value_is_reg(val) &&
202           val.reg >= _MI_BUILDER_GPR_BASE &&
203           val.reg < _MI_BUILDER_GPR_BASE +
204                     MI_BUILDER_NUM_ALLOC_GPRS * 8;
205 }
206 
207 static inline uint32_t
_mi_value_as_gpr(struct mi_value val)208 _mi_value_as_gpr(struct mi_value val)
209 {
210    assert(mi_value_is_gpr(val));
211    assert(val.reg % 8 == 0);
212    return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
213 }
214 
215 static inline struct mi_value
mi_new_gpr(struct mi_builder * b)216 mi_new_gpr(struct mi_builder *b)
217 {
218    unsigned gpr = ffs(~b->gprs) - 1;
219    assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
220    assert(b->gpr_refs[gpr] == 0);
221    b->gprs |= (1u << gpr);
222    b->gpr_refs[gpr] = 1;
223 
224    return (struct mi_value) {
225       .type = MI_VALUE_TYPE_REG64,
226       .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
227    };
228 }
229 #endif /* GFX_VERx10 >= 75 */
230 
231 /** Take a reference to a mi_value
232  *
233  * The MI builder uses reference counting to automatically free ALU GPRs for
234  * re-use in calculations.  All mi_* math functions consume the reference
235  * they are handed for each source and return a reference to a value which the
236  * caller must consume.  In particular, if you pas the same value into a
237  * single mi_* math function twice (say to add a number to itself), you
238  * are responsible for calling mi_value_ref() to get a second reference
239  * because the mi_* math function will consume it twice.
240  */
241 static inline struct mi_value
mi_value_ref(struct mi_builder * b,struct mi_value val)242 mi_value_ref(struct mi_builder *b, struct mi_value val)
243 {
244 #if GFX_VERx10 >= 75
245    if (_mi_value_is_allocated_gpr(val)) {
246       unsigned gpr = _mi_value_as_gpr(val);
247       assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
248       assert(b->gprs & (1u << gpr));
249       assert(b->gpr_refs[gpr] < UINT8_MAX);
250       b->gpr_refs[gpr]++;
251    }
252 #endif /* GFX_VERx10 >= 75 */
253 
254    return val;
255 }
256 
257 /** Drop a reference to a mi_value
258  *
259  * See also mi_value_ref.
260  */
261 static inline void
mi_value_unref(struct mi_builder * b,struct mi_value val)262 mi_value_unref(struct mi_builder *b, struct mi_value val)
263 {
264 #if GFX_VERx10 >= 75
265    if (_mi_value_is_allocated_gpr(val)) {
266       unsigned gpr = _mi_value_as_gpr(val);
267       assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
268       assert(b->gprs & (1u << gpr));
269       assert(b->gpr_refs[gpr] > 0);
270       if (--b->gpr_refs[gpr] == 0)
271          b->gprs &= ~(1u << gpr);
272    }
273 #endif /* GFX_VERx10 >= 75 */
274 }
275 
276 static inline struct mi_value
mi_imm(uint64_t imm)277 mi_imm(uint64_t imm)
278 {
279    return (struct mi_value) {
280       .type = MI_VALUE_TYPE_IMM,
281       .imm = imm,
282    };
283 }
284 
285 static inline struct mi_value
mi_reg32(uint32_t reg)286 mi_reg32(uint32_t reg)
287 {
288    struct mi_value val = {
289       .type = MI_VALUE_TYPE_REG32,
290       .reg = reg,
291    };
292 #if GFX_VERx10 >= 75
293    assert(!_mi_value_is_allocated_gpr(val));
294 #endif
295    return val;
296 }
297 
298 static inline struct mi_value
mi_reg64(uint32_t reg)299 mi_reg64(uint32_t reg)
300 {
301    struct mi_value val = {
302       .type = MI_VALUE_TYPE_REG64,
303       .reg = reg,
304    };
305 #if GFX_VERx10 >= 75
306    assert(!_mi_value_is_allocated_gpr(val));
307 #endif
308    return val;
309 }
310 
311 static inline struct mi_value
mi_mem32(__gen_address_type addr)312 mi_mem32(__gen_address_type addr)
313 {
314    return (struct mi_value) {
315       .type = MI_VALUE_TYPE_MEM32,
316       .addr = addr,
317    };
318 }
319 
320 static inline struct mi_value
mi_mem64(__gen_address_type addr)321 mi_mem64(__gen_address_type addr)
322 {
323    return (struct mi_value) {
324       .type = MI_VALUE_TYPE_MEM64,
325       .addr = addr,
326    };
327 }
328 
329 static inline struct mi_value
mi_value_half(struct mi_value value,bool top_32_bits)330 mi_value_half(struct mi_value value, bool top_32_bits)
331 {
332    switch (value.type) {
333    case MI_VALUE_TYPE_IMM:
334       if (top_32_bits)
335          value.imm >>= 32;
336       else
337          value.imm &= 0xffffffffu;
338       return value;
339 
340    case MI_VALUE_TYPE_MEM32:
341       assert(!top_32_bits);
342       return value;
343 
344    case MI_VALUE_TYPE_MEM64:
345       if (top_32_bits)
346          value.addr = __gen_address_offset(value.addr, 4);
347       value.type = MI_VALUE_TYPE_MEM32;
348       return value;
349 
350    case MI_VALUE_TYPE_REG32:
351       assert(!top_32_bits);
352       return value;
353 
354    case MI_VALUE_TYPE_REG64:
355       if (top_32_bits)
356          value.reg += 4;
357       value.type = MI_VALUE_TYPE_REG32;
358       return value;
359    }
360 
361    unreachable("Invalid mi_value type");
362 }
363 
364 static inline void
_mi_copy_no_unref(struct mi_builder * b,struct mi_value dst,struct mi_value src)365 _mi_copy_no_unref(struct mi_builder *b,
366                   struct mi_value dst, struct mi_value src)
367 {
368 #if GFX_VERx10 >= 75
369    /* TODO: We could handle src.invert by emitting a bit of math if we really
370     * wanted to.
371     */
372    assert(!dst.invert && !src.invert);
373 #endif
374    mi_builder_flush_math(b);
375 
376    switch (dst.type) {
377    case MI_VALUE_TYPE_IMM:
378       unreachable("Cannot copy to an immediate");
379 
380    case MI_VALUE_TYPE_MEM64:
381    case MI_VALUE_TYPE_REG64:
382       switch (src.type) {
383       case MI_VALUE_TYPE_IMM:
384          if (dst.type == MI_VALUE_TYPE_REG64) {
385             uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
386                                                               GENX(MI_LOAD_REGISTER_IMM_length) + 2);
387             struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
388             mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
389                lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
390                                  GENX(MI_LOAD_REGISTER_IMM_length_bias);
391 #if GFX_VER >= 11
392                lri.AddCSMMIOStartOffset = reg.cs;
393 #endif
394             }
395             dw[1] = reg.num;
396             dw[2] = src.imm;
397             dw[3] = reg.num + 4;
398             dw[4] = src.imm >> 32;
399          } else {
400 #if GFX_VER >= 8
401             assert(dst.type == MI_VALUE_TYPE_MEM64);
402             uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
403                                                               GENX(MI_STORE_DATA_IMM_length) + 1);
404             mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
405                sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
406                                  GENX(MI_STORE_DATA_IMM_length_bias);
407                sdm.StoreQword = true;
408                sdm.Address = dst.addr;
409             }
410             dw[3] = src.imm;
411             dw[4] = src.imm >> 32;
412 #else
413          _mi_copy_no_unref(b, mi_value_half(dst, false),
414                               mi_value_half(src, false));
415          _mi_copy_no_unref(b, mi_value_half(dst, true),
416                               mi_value_half(src, true));
417 #endif
418          }
419          break;
420       case MI_VALUE_TYPE_REG32:
421       case MI_VALUE_TYPE_MEM32:
422          _mi_copy_no_unref(b, mi_value_half(dst, false),
423                               mi_value_half(src, false));
424          _mi_copy_no_unref(b, mi_value_half(dst, true),
425                               mi_imm(0));
426          break;
427       case MI_VALUE_TYPE_REG64:
428       case MI_VALUE_TYPE_MEM64:
429          _mi_copy_no_unref(b, mi_value_half(dst, false),
430                               mi_value_half(src, false));
431          _mi_copy_no_unref(b, mi_value_half(dst, true),
432                               mi_value_half(src, true));
433          break;
434       default:
435          unreachable("Invalid mi_value type");
436       }
437       break;
438 
439    case MI_VALUE_TYPE_MEM32:
440       switch (src.type) {
441       case MI_VALUE_TYPE_IMM:
442          mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
443             sdi.Address = dst.addr;
444 #if GFX_VER >= 12
445             sdi.ForceWriteCompletionCheck = true;
446 #endif
447             sdi.ImmediateData = src.imm;
448          }
449          break;
450 
451       case MI_VALUE_TYPE_MEM32:
452       case MI_VALUE_TYPE_MEM64:
453 #if GFX_VER >= 8
454          mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
455             cmm.DestinationMemoryAddress = dst.addr;
456             cmm.SourceMemoryAddress = src.addr;
457          }
458 #elif GFX_VERx10 == 75
459          {
460             struct mi_value tmp = mi_new_gpr(b);
461             _mi_copy_no_unref(b, tmp, src);
462             _mi_copy_no_unref(b, dst, tmp);
463             mi_value_unref(b, tmp);
464          }
465 #else
466          unreachable("Cannot do mem <-> mem copy on IVB and earlier");
467 #endif
468          break;
469 
470       case MI_VALUE_TYPE_REG32:
471       case MI_VALUE_TYPE_REG64:
472          mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
473             struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
474             srm.RegisterAddress = reg.num;
475 #if GFX_VER >= 11
476             srm.AddCSMMIOStartOffset = reg.cs;
477 #endif
478             srm.MemoryAddress = dst.addr;
479          }
480          break;
481 
482       default:
483          unreachable("Invalid mi_value type");
484       }
485       break;
486 
487    case MI_VALUE_TYPE_REG32:
488       switch (src.type) {
489       case MI_VALUE_TYPE_IMM:
490          mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
491             struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
492             lri.RegisterOffset = reg.num;
493 #if GFX_VER >= 11
494             lri.AddCSMMIOStartOffset = reg.cs;
495 #endif
496             lri.DataDWord = src.imm;
497          }
498          break;
499 
500       case MI_VALUE_TYPE_MEM32:
501       case MI_VALUE_TYPE_MEM64:
502 #if GFX_VER >= 7
503          mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
504             struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
505             lrm.RegisterAddress = reg.num;
506 #if GFX_VER >= 11
507             lrm.AddCSMMIOStartOffset = reg.cs;
508 #endif
509             lrm.MemoryAddress = src.addr;
510          }
511 #else
512          unreachable("Cannot load do mem -> reg copy on SNB and earlier");
513 #endif
514          break;
515 
516       case MI_VALUE_TYPE_REG32:
517       case MI_VALUE_TYPE_REG64:
518 #if GFX_VERx10 >= 75
519          if (src.reg != dst.reg) {
520             mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
521                struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
522                lrr.SourceRegisterAddress = reg.num;
523 #if GFX_VER >= 11
524                lrr.AddCSMMIOStartOffsetSource = reg.cs;
525 #endif
526                reg = mi_adjust_reg_num(dst.reg);
527                lrr.DestinationRegisterAddress = reg.num;
528 #if GFX_VER >= 11
529                lrr.AddCSMMIOStartOffsetDestination = reg.cs;
530 #endif
531             }
532          }
533 #else
534          unreachable("Cannot do reg <-> reg copy on IVB and earlier");
535 #endif
536          break;
537 
538       default:
539          unreachable("Invalid mi_value type");
540       }
541       break;
542 
543    default:
544       unreachable("Invalid mi_value type");
545    }
546 }
547 
548 #if GFX_VERx10 >= 75
549 static inline struct mi_value
550 mi_resolve_invert(struct mi_builder *b, struct mi_value src);
551 #endif
552 
553 /** Store the value in src to the value represented by dst
554  *
555  * If the bit size of src and dst mismatch, this function does an unsigned
556  * integer cast.  If src has more bits than dst, it takes the bottom bits.  If
557  * src has fewer bits then dst, it fills the top bits with zeros.
558  *
559  * This function consumes one reference for each of src and dst.
560  */
561 static inline void
mi_store(struct mi_builder * b,struct mi_value dst,struct mi_value src)562 mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
563 {
564 #if GFX_VERx10 >= 75
565    src = mi_resolve_invert(b, src);
566 #endif
567    _mi_copy_no_unref(b, dst, src);
568    mi_value_unref(b, src);
569    mi_value_unref(b, dst);
570 }
571 
572 static inline void
mi_memset(struct mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)573 mi_memset(struct mi_builder *b, __gen_address_type dst,
574           uint32_t value, uint32_t size)
575 {
576 #if GFX_VERx10 >= 75
577    assert(b->num_math_dwords == 0);
578 #endif
579 
580    /* This memset operates in units of dwords. */
581    assert(size % 4 == 0);
582 
583    for (uint32_t i = 0; i < size; i += 4) {
584       mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
585                       mi_imm(value));
586    }
587 }
588 
589 /* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
590 static inline void
mi_memcpy(struct mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)591 mi_memcpy(struct mi_builder *b, __gen_address_type dst,
592           __gen_address_type src, uint32_t size)
593 {
594 #if GFX_VERx10 >= 75
595    assert(b->num_math_dwords == 0);
596 #endif
597 
598    /* This memcpy operates in units of dwords. */
599    assert(size % 4 == 0);
600 
601    for (uint32_t i = 0; i < size; i += 4) {
602       struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
603       struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
604 #if GFX_VERx10 >= 75
605       mi_store(b, dst_val, src_val);
606 #else
607       /* IVB does not have a general purpose register for command streamer
608        * commands. Therefore, we use an alternate temporary register.
609        */
610       struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
611       mi_store(b, tmp_reg, src_val);
612       mi_store(b, dst_val, tmp_reg);
613 #endif
614    }
615 }
616 
617 /*
618  * MI_MATH Section.  Only available on Haswell+
619  */
620 
621 #if GFX_VERx10 >= 75
622 
623 /**
624  * Perform a predicated store (assuming the condition is already loaded
625  * in the MI_PREDICATE_RESULT register) of the value in src to the memory
626  * location specified by dst.  Non-memory destinations are not supported.
627  *
628  * This function consumes one reference for each of src and dst.
629  */
630 static inline void
mi_store_if(struct mi_builder * b,struct mi_value dst,struct mi_value src)631 mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
632 {
633    assert(!dst.invert && !src.invert);
634 
635    mi_builder_flush_math(b);
636 
637    /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
638     * destination to be memory, and resolve the source to a temporary
639     * register if it isn't in one already.
640     */
641    assert(dst.type == MI_VALUE_TYPE_MEM64 ||
642           dst.type == MI_VALUE_TYPE_MEM32);
643 
644    if (src.type != MI_VALUE_TYPE_REG32 &&
645        src.type != MI_VALUE_TYPE_REG64) {
646       struct mi_value tmp = mi_new_gpr(b);
647       _mi_copy_no_unref(b, tmp, src);
648       src = tmp;
649    }
650 
651    if (dst.type == MI_VALUE_TYPE_MEM64) {
652       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
653          struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
654          srm.RegisterAddress = reg.num;
655 #if GFX_VER >= 11
656          srm.AddCSMMIOStartOffset = reg.cs;
657 #endif
658          srm.MemoryAddress = dst.addr;
659          srm.PredicateEnable = true;
660       }
661       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
662          struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
663          srm.RegisterAddress = reg.num;
664 #if GFX_VER >= 11
665          srm.AddCSMMIOStartOffset = reg.cs;
666 #endif
667          srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
668          srm.PredicateEnable = true;
669       }
670    } else {
671       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
672          struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
673          srm.RegisterAddress = reg.num;
674 #if GFX_VER >= 11
675          srm.AddCSMMIOStartOffset = reg.cs;
676 #endif
677          srm.MemoryAddress = dst.addr;
678          srm.PredicateEnable = true;
679       }
680    }
681 
682    mi_value_unref(b, src);
683    mi_value_unref(b, dst);
684 }
685 
686 static inline void
_mi_builder_push_math(struct mi_builder * b,const uint32_t * dwords,unsigned num_dwords)687 _mi_builder_push_math(struct mi_builder *b,
688                       const uint32_t *dwords,
689                       unsigned num_dwords)
690 {
691    assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
692    if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
693       mi_builder_flush_math(b);
694 
695    memcpy(&b->math_dwords[b->num_math_dwords],
696           dwords, num_dwords * sizeof(*dwords));
697    b->num_math_dwords += num_dwords;
698 }
699 
700 static inline uint32_t
_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)701 _mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
702 {
703    struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
704       .Operand2 = operand2,
705       .Operand1 = operand1,
706       .ALUOpcode = opcode,
707    };
708 
709    uint32_t dw;
710    GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
711 
712    return dw;
713 }
714 
715 static inline struct mi_value
mi_value_to_gpr(struct mi_builder * b,struct mi_value val)716 mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
717 {
718    if (mi_value_is_gpr(val))
719       return val;
720 
721    /* Save off the invert flag because it makes copy() grumpy */
722    bool invert = val.invert;
723    val.invert = false;
724 
725    struct mi_value tmp = mi_new_gpr(b);
726    _mi_copy_no_unref(b, tmp, val);
727    tmp.invert = invert;
728 
729    return tmp;
730 }
731 
732 static inline uint64_t
mi_value_to_u64(struct mi_value val)733 mi_value_to_u64(struct mi_value val)
734 {
735    assert(val.type == MI_VALUE_TYPE_IMM);
736    return val.invert ? ~val.imm : val.imm;
737 }
738 
739 static inline uint32_t
_mi_math_load_src(struct mi_builder * b,unsigned src,struct mi_value * val)740 _mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
741 {
742    if (val->type == MI_VALUE_TYPE_IMM &&
743        (val->imm == 0 || val->imm == UINT64_MAX)) {
744       uint64_t imm = val->invert ? ~val->imm : val->imm;
745       return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
746    } else {
747       *val = mi_value_to_gpr(b, *val);
748       return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
749                           src, _mi_value_as_gpr(*val));
750    }
751 }
752 
753 static inline struct mi_value
mi_math_binop(struct mi_builder * b,uint32_t opcode,struct mi_value src0,struct mi_value src1,uint32_t store_op,uint32_t store_src)754 mi_math_binop(struct mi_builder *b, uint32_t opcode,
755               struct mi_value src0, struct mi_value src1,
756               uint32_t store_op, uint32_t store_src)
757 {
758    struct mi_value dst = mi_new_gpr(b);
759 
760    uint32_t dw[4];
761    dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
762    dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
763    dw[2] = _mi_pack_alu(opcode, 0, 0);
764    dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
765    _mi_builder_push_math(b, dw, 4);
766 
767    mi_value_unref(b, src0);
768    mi_value_unref(b, src1);
769 
770    return dst;
771 }
772 
773 static inline struct mi_value
mi_inot(struct mi_builder * b,struct mi_value val)774 mi_inot(struct mi_builder *b, struct mi_value val)
775 {
776    if (val.type == MI_VALUE_TYPE_IMM)
777       return mi_imm(~mi_value_to_u64(val));
778 
779    val.invert = !val.invert;
780    return val;
781 }
782 
783 static inline struct mi_value
mi_resolve_invert(struct mi_builder * b,struct mi_value src)784 mi_resolve_invert(struct mi_builder *b, struct mi_value src)
785 {
786    if (!src.invert)
787       return src;
788 
789    assert(src.type != MI_VALUE_TYPE_IMM);
790    return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
791                            MI_ALU_STORE, MI_ALU_ACCU);
792 }
793 
794 static inline struct mi_value
mi_iadd(struct mi_builder * b,struct mi_value src0,struct mi_value src1)795 mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
796 {
797    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
798       return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
799 
800    return mi_math_binop(b, MI_ALU_ADD, src0, src1,
801                            MI_ALU_STORE, MI_ALU_ACCU);
802 }
803 
804 static inline struct mi_value
mi_iadd_imm(struct mi_builder * b,struct mi_value src,uint64_t N)805 mi_iadd_imm(struct mi_builder *b,
806                 struct mi_value src, uint64_t N)
807 {
808    if (N == 0)
809       return src;
810 
811    return mi_iadd(b, src, mi_imm(N));
812 }
813 
814 static inline struct mi_value
mi_isub(struct mi_builder * b,struct mi_value src0,struct mi_value src1)815 mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
816 {
817    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
818       return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
819 
820    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
821                            MI_ALU_STORE, MI_ALU_ACCU);
822 }
823 
824 static inline struct mi_value
mi_ieq(struct mi_builder * b,struct mi_value src0,struct mi_value src1)825 mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
826 {
827    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
828       return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
829 
830    /* Compute "equal" by subtracting and storing the zero bit */
831    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
832                             MI_ALU_STORE, MI_ALU_ZF);
833 }
834 
835 static inline struct mi_value
mi_ine(struct mi_builder * b,struct mi_value src0,struct mi_value src1)836 mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
837 {
838    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
839       return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
840 
841    /* Compute "not equal" by subtracting and storing the inverse zero bit */
842    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
843                             MI_ALU_STOREINV, MI_ALU_ZF);
844 }
845 
846 static inline struct mi_value
mi_ult(struct mi_builder * b,struct mi_value src0,struct mi_value src1)847 mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
848 {
849    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
850       return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
851 
852    /* Compute "less than" by subtracting and storing the carry bit */
853    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
854                            MI_ALU_STORE, MI_ALU_CF);
855 }
856 
857 static inline struct mi_value
mi_uge(struct mi_builder * b,struct mi_value src0,struct mi_value src1)858 mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
859 {
860    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
861       return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
862 
863    /* Compute "less than" by subtracting and storing the carry bit */
864    return mi_math_binop(b, MI_ALU_SUB, src0, src1,
865                            MI_ALU_STOREINV, MI_ALU_CF);
866 }
867 
868 static inline struct mi_value
mi_iand(struct mi_builder * b,struct mi_value src0,struct mi_value src1)869 mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
870 {
871    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
872       return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
873 
874    return mi_math_binop(b, MI_ALU_AND, src0, src1,
875                            MI_ALU_STORE, MI_ALU_ACCU);
876 }
877 
878 static inline struct mi_value
mi_nz(struct mi_builder * b,struct mi_value src)879 mi_nz(struct mi_builder *b, struct mi_value src)
880 {
881    if (src.type == MI_VALUE_TYPE_IMM)
882       return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
883 
884    return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
885                            MI_ALU_STOREINV, MI_ALU_ZF);
886 }
887 
888 static inline struct mi_value
mi_z(struct mi_builder * b,struct mi_value src)889 mi_z(struct mi_builder *b, struct mi_value src)
890 {
891    if (src.type == MI_VALUE_TYPE_IMM)
892       return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
893 
894    return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
895                            MI_ALU_STORE, MI_ALU_ZF);
896 }
897 
898 static inline struct mi_value
mi_ior(struct mi_builder * b,struct mi_value src0,struct mi_value src1)899 mi_ior(struct mi_builder *b,
900        struct mi_value src0, struct mi_value src1)
901 {
902    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
903       return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
904 
905    return mi_math_binop(b, MI_ALU_OR, src0, src1,
906                            MI_ALU_STORE, MI_ALU_ACCU);
907 }
908 
909 #if GFX_VERx10 >= 125
910 static inline struct mi_value
mi_ishl(struct mi_builder * b,struct mi_value src0,struct mi_value src1)911 mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
912 {
913    if (src1.type == MI_VALUE_TYPE_IMM) {
914       assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
915       assert(mi_value_to_u64(src1) <= 32);
916    }
917 
918    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
919       return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
920 
921    return mi_math_binop(b, MI_ALU_SHL, src0, src1,
922                            MI_ALU_STORE, MI_ALU_ACCU);
923 }
924 
925 static inline struct mi_value
mi_ushr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)926 mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
927 {
928    if (src1.type == MI_VALUE_TYPE_IMM) {
929       assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
930       assert(mi_value_to_u64(src1) <= 32);
931    }
932 
933    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
934       return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
935 
936    return mi_math_binop(b, MI_ALU_SHR, src0, src1,
937                            MI_ALU_STORE, MI_ALU_ACCU);
938 }
939 
940 static inline struct mi_value
mi_ushr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)941 mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
942 {
943    if (shift == 0)
944       return src;
945 
946    if (shift >= 64)
947       return mi_imm(0);
948 
949    if (src.type == MI_VALUE_TYPE_IMM)
950       return mi_imm(mi_value_to_u64(src) >> shift);
951 
952    struct mi_value res = mi_value_to_gpr(b, src);
953 
954    /* Annoyingly, we only have power-of-two shifts */
955    while (shift) {
956       int bit = u_bit_scan(&shift);
957       assert(bit <= 5);
958       res = mi_ushr(b, res, mi_imm(1 << bit));
959    }
960 
961    return res;
962 }
963 
964 static inline struct mi_value
mi_ishr(struct mi_builder * b,struct mi_value src0,struct mi_value src1)965 mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
966 {
967    if (src1.type == MI_VALUE_TYPE_IMM) {
968       assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
969       assert(mi_value_to_u64(src1) <= 32);
970    }
971 
972    if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
973       return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
974 
975    return mi_math_binop(b, MI_ALU_SAR, src0, src1,
976                             MI_ALU_STORE, MI_ALU_ACCU);
977 }
978 
979 static inline struct mi_value
mi_ishr_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)980 mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
981 {
982    if (shift == 0)
983       return src;
984 
985    if (shift >= 64)
986       return mi_imm(0);
987 
988    if (src.type == MI_VALUE_TYPE_IMM)
989       return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
990 
991    struct mi_value res = mi_value_to_gpr(b, src);
992 
993    /* Annoyingly, we only have power-of-two shifts */
994    while (shift) {
995       int bit = u_bit_scan(&shift);
996       assert(bit <= 5);
997       res = mi_ishr(b, res, mi_imm(1 << bit));
998    }
999 
1000    return res;
1001 }
1002 #endif /* if GFX_VERx10 >= 125 */
1003 
1004 static inline struct mi_value
mi_imul_imm(struct mi_builder * b,struct mi_value src,uint32_t N)1005 mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
1006 {
1007    if (src.type == MI_VALUE_TYPE_IMM)
1008       return mi_imm(mi_value_to_u64(src) * N);
1009 
1010    if (N == 0) {
1011       mi_value_unref(b, src);
1012       return mi_imm(0);
1013    }
1014 
1015    if (N == 1)
1016       return src;
1017 
1018    src = mi_value_to_gpr(b, src);
1019 
1020    struct mi_value res = mi_value_ref(b, src);
1021 
1022    unsigned top_bit = 31 - __builtin_clz(N);
1023    for (int i = top_bit - 1; i >= 0; i--) {
1024       res = mi_iadd(b, res, mi_value_ref(b, res));
1025       if (N & (1 << i))
1026          res = mi_iadd(b, res, mi_value_ref(b, src));
1027    }
1028 
1029    mi_value_unref(b, src);
1030 
1031    return res;
1032 }
1033 
1034 static inline struct mi_value
mi_ishl_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1035 mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1036 {
1037    if (shift == 0)
1038       return src;
1039 
1040    if (shift >= 64)
1041       return mi_imm(0);
1042 
1043    if (src.type == MI_VALUE_TYPE_IMM)
1044       return mi_imm(mi_value_to_u64(src) << shift);
1045 
1046    struct mi_value res = mi_value_to_gpr(b, src);
1047 
1048 #if GFX_VERx10 >= 125
1049    /* Annoyingly, we only have power-of-two shifts */
1050    while (shift) {
1051       int bit = u_bit_scan(&shift);
1052       assert(bit <= 5);
1053       res = mi_ishl(b, res, mi_imm(1 << bit));
1054    }
1055 #else
1056    for (unsigned i = 0; i < shift; i++)
1057       res = mi_iadd(b, res, mi_value_ref(b, res));
1058 #endif
1059 
1060    return res;
1061 }
1062 
1063 static inline struct mi_value
mi_ushr32_imm(struct mi_builder * b,struct mi_value src,uint32_t shift)1064 mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1065 {
1066    if (shift == 0)
1067       return src;
1068 
1069    if (shift >= 64)
1070       return mi_imm(0);
1071 
1072    /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
1073     * of the result.
1074     */
1075    if (src.type == MI_VALUE_TYPE_IMM)
1076       return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
1077 
1078    if (shift > 32) {
1079       struct mi_value tmp = mi_new_gpr(b);
1080       _mi_copy_no_unref(b, mi_value_half(tmp, false),
1081                                mi_value_half(src, true));
1082       _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
1083       mi_value_unref(b, src);
1084       src = tmp;
1085       shift -= 32;
1086    }
1087    assert(shift <= 32);
1088    struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
1089    struct mi_value dst = mi_new_gpr(b);
1090    _mi_copy_no_unref(b, mi_value_half(dst, false),
1091                             mi_value_half(tmp, true));
1092    _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
1093    mi_value_unref(b, tmp);
1094    return dst;
1095 }
1096 
1097 static inline struct mi_value
mi_udiv32_imm(struct mi_builder * b,struct mi_value N,uint32_t D)1098 mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
1099 {
1100    if (N.type == MI_VALUE_TYPE_IMM) {
1101       assert(mi_value_to_u64(N) <= UINT32_MAX);
1102       return mi_imm(mi_value_to_u64(N) / D);
1103    }
1104 
1105    /* We implicitly assume that N is only a 32-bit value */
1106    if (D == 0) {
1107       /* This is invalid but we should do something */
1108       return mi_imm(0);
1109    } else if (util_is_power_of_two_or_zero(D)) {
1110       return mi_ushr32_imm(b, N, util_logbase2(D));
1111    } else {
1112       struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
1113       assert(m.multiplier <= UINT32_MAX);
1114 
1115       if (m.pre_shift)
1116          N = mi_ushr32_imm(b, N, m.pre_shift);
1117 
1118       /* Do the 32x32 multiply  into gpr0 */
1119       N = mi_imul_imm(b, N, m.multiplier);
1120 
1121       if (m.increment)
1122          N = mi_iadd(b, N, mi_imm(m.multiplier));
1123 
1124       N = mi_ushr32_imm(b, N, 32);
1125 
1126       if (m.post_shift)
1127          N = mi_ushr32_imm(b, N, m.post_shift);
1128 
1129       return N;
1130    }
1131 }
1132 
1133 #endif /* MI_MATH section */
1134 
1135 /* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
1136 #if MI_BUILDER_CAN_WRITE_BATCH
1137 
1138 struct mi_address_token {
1139    /* Pointers to address memory fields in the batch. */
1140    uint64_t *ptrs[2];
1141 };
1142 
1143 static inline struct mi_address_token
mi_store_address(struct mi_builder * b,struct mi_value addr_reg)1144 mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
1145 {
1146    mi_builder_flush_math(b);
1147 
1148    assert(addr_reg.type == MI_VALUE_TYPE_REG64);
1149 
1150    struct mi_address_token token = {};
1151 
1152    for (unsigned i = 0; i < 2; i++) {
1153       mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
1154          srm.RegisterAddress = addr_reg.reg + (i * 4);
1155 
1156          const unsigned addr_dw =
1157             GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
1158          token.ptrs[i] = (void *)_dst + addr_dw;
1159       }
1160    }
1161 
1162    mi_value_unref(b, addr_reg);
1163    return token;
1164 }
1165 
1166 static inline void
mi_self_mod_barrier(struct mi_builder * b)1167 mi_self_mod_barrier(struct mi_builder *b)
1168 {
1169    /* First make sure all the memory writes from previous modifying commands
1170     * have landed. We want to do this before going through the CS cache,
1171     * otherwise we could be fetching memory that hasn't been written to yet.
1172     */
1173    mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
1174       pc.CommandStreamerStallEnable = true;
1175    }
1176    /* Documentation says Gfx11+ should be able to invalidate the command cache
1177     * but experiment show it doesn't work properly, so for now just get over
1178     * the CS prefetch.
1179     */
1180    for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
1181       mi_builder_emit(b, GENX(MI_NOOP), noop);
1182 }
1183 
1184 static inline void
_mi_resolve_address_token(struct mi_builder * b,struct mi_address_token token,void * batch_location)1185 _mi_resolve_address_token(struct mi_builder *b,
1186                           struct mi_address_token token,
1187                           void *batch_location)
1188 {
1189    __gen_address_type addr = __gen_get_batch_address(b->user_data,
1190                                                     batch_location);
1191    uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
1192                                                   addr, 0);
1193    *(token.ptrs[0]) = addr_addr_u64;
1194    *(token.ptrs[1]) = addr_addr_u64 + 4;
1195 }
1196 
1197 #endif /* MI_BUILDER_CAN_WRITE_BATCH */
1198 
1199 #if GFX_VERx10 >= 125
1200 
1201 /*
1202  * Indirect load/store.  Only available on XE_HP+
1203  */
1204 
1205 MUST_CHECK static inline struct mi_value
mi_load_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset)1206 mi_load_mem64_offset(struct mi_builder *b,
1207                      __gen_address_type addr, struct mi_value offset)
1208 {
1209    uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1210    struct mi_value addr_val = mi_imm(addr_u64);
1211 
1212    struct mi_value dst = mi_new_gpr(b);
1213 
1214    uint32_t dw[5];
1215    dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1216    dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1217    dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1218    dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
1219    dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
1220    _mi_builder_push_math(b, dw, 5);
1221 
1222    mi_value_unref(b, addr_val);
1223    mi_value_unref(b, offset);
1224 
1225    return dst;
1226 }
1227 
1228 static inline void
mi_store_mem64_offset(struct mi_builder * b,__gen_address_type addr,struct mi_value offset,struct mi_value data)1229 mi_store_mem64_offset(struct mi_builder *b,
1230                           __gen_address_type addr, struct mi_value offset,
1231                           struct mi_value data)
1232 {
1233    uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1234    struct mi_value addr_val = mi_imm(addr_u64);
1235 
1236    data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
1237 
1238    uint32_t dw[5];
1239    dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1240    dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1241    dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1242    dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
1243    dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
1244    _mi_builder_push_math(b, dw, 5);
1245 
1246    mi_value_unref(b, addr_val);
1247    mi_value_unref(b, offset);
1248    mi_value_unref(b, data);
1249 
1250    /* This is the only math case which has side-effects outside of regular
1251     * registers to flush math afterwards so we don't confuse anyone.
1252     */
1253    mi_builder_flush_math(b);
1254 }
1255 
1256 /*
1257  * Control-flow Section.  Only available on XE_HP+
1258  */
1259 
1260 struct _mi_goto {
1261    bool predicated;
1262    void *mi_bbs;
1263 };
1264 
1265 struct mi_goto_target {
1266    bool placed;
1267    unsigned num_gotos;
1268    struct _mi_goto gotos[8];
1269    __gen_address_type addr;
1270 };
1271 
1272 #define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
1273 
1274 #define MI_BUILDER_MI_PREDICATE_RESULT_num  0x2418
1275 
1276 static inline void
mi_goto_if(struct mi_builder * b,struct mi_value cond,struct mi_goto_target * t)1277 mi_goto_if(struct mi_builder *b, struct mi_value cond,
1278            struct mi_goto_target *t)
1279 {
1280    /* First, set up the predicate, if any */
1281    bool predicated;
1282    if (cond.type == MI_VALUE_TYPE_IMM) {
1283       /* If it's an immediate, the goto either doesn't happen or happens
1284        * unconditionally.
1285        */
1286       if (mi_value_to_u64(cond) == 0)
1287          return;
1288 
1289       assert(mi_value_to_u64(cond) == ~0ull);
1290       predicated = false;
1291    } else if (mi_value_is_reg(cond) &&
1292               cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
1293       /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
1294        * provided us with
1295        */
1296       assert(cond.type == MI_VALUE_TYPE_REG32);
1297       predicated = true;
1298    } else {
1299       mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
1300       predicated = true;
1301    }
1302 
1303    if (predicated) {
1304       mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1305          sp.PredicateEnable = NOOPOnResultClear;
1306       }
1307    }
1308    if (t->placed) {
1309       mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
1310          bbs.PredicationEnable         = predicated;
1311          bbs.AddressSpaceIndicator     = ASI_PPGTT;
1312          bbs.BatchBufferStartAddress   = t->addr;
1313       }
1314    } else {
1315       assert(t->num_gotos < ARRAY_SIZE(t->gotos));
1316       struct _mi_goto g = {
1317          .predicated = predicated,
1318          .mi_bbs = __gen_get_batch_dwords(b->user_data,
1319                                           GENX(MI_BATCH_BUFFER_START_length)),
1320       };
1321       memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
1322       t->gotos[t->num_gotos++] = g;
1323    }
1324    if (predicated) {
1325       mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1326          sp.PredicateEnable = NOOPNever;
1327       }
1328    }
1329 }
1330 
1331 static inline void
mi_goto(struct mi_builder * b,struct mi_goto_target * t)1332 mi_goto(struct mi_builder *b, struct mi_goto_target *t)
1333 {
1334    mi_goto_if(b, mi_imm(-1), t);
1335 }
1336 
1337 static inline void
mi_goto_target(struct mi_builder * b,struct mi_goto_target * t)1338 mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
1339 {
1340    mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1341       sp.PredicateEnable = NOOPNever;
1342       t->addr = __gen_get_batch_address(b->user_data, _dst);
1343    }
1344    t->placed = true;
1345 
1346    struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
1347    bbs.AddressSpaceIndicator     = ASI_PPGTT;
1348    bbs.BatchBufferStartAddress   = t->addr;
1349 
1350    for (unsigned i = 0; i < t->num_gotos; i++) {
1351       bbs.PredicationEnable = t->gotos[i].predicated;
1352       GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
1353    }
1354 }
1355 
1356 static inline struct mi_goto_target
mi_goto_target_init_and_place(struct mi_builder * b)1357 mi_goto_target_init_and_place(struct mi_builder *b)
1358 {
1359    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1360    mi_goto_target(b, &t);
1361    return t;
1362 }
1363 
1364 #define mi_loop(b) \
1365    for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
1366         __continue = mi_goto_target_init_and_place(b); !__break.placed; \
1367         mi_goto(b, &__continue), mi_goto_target(b, &__break))
1368 
1369 #define mi_break(b) mi_goto(b, &__break)
1370 #define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
1371 #define mi_continue(b) mi_goto(b, &__continue)
1372 #define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
1373 
1374 #endif /* GFX_VERx10 >= 125 */
1375 
1376 #endif /* MI_BUILDER_H */
1377