1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef GEN_MI_BUILDER_H
25 #define GEN_MI_BUILDER_H
26 
27 #include "genxml/genX_bits.h"
28 #include "util/bitscan.h"
29 #include "util/fast_idiv_by_const.h"
30 #include "util/u_math.h"
31 
32 #ifndef GEN_MI_BUILDER_NUM_ALLOC_GPRS
33 /** The number of GPRs the MI builder is allowed to allocate
34  *
35  * This may be set by a user of this API so that it can reserve some GPRs at
36  * the top end for its own use.
37  */
38 #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 16
39 #endif
40 
41 /** These must be defined by the user of the builder
42  *
43  * void *__gen_get_batch_dwords(__gen_user_data *user_data,
44  *                              unsigned num_dwords);
45  *
46  * __gen_address_type
47  * __gen_address_offset(__gen_address_type addr, uint64_t offset);
48  *
49  *
50  * If self-modifying batches are supported, we must be able to pass batch
51  * addresses around as void*s so pinning as well as batch chaining or some
52  * other mechanism for ensuring batch pointers remain valid during building is
53  * required. The following function must also be defined, it returns an
54  * address in canonical form:
55  *
56  * uint64_t
57  * __gen_get_batch_address(__gen_user_data *user_data, void *location);
58  *
59  * Also, __gen_combine_address must accept a location value of NULL and return
60  * a fully valid 64-bit address.
61  */
62 
63 /*
64  * Start of the actual MI builder
65  */
66 
67 #define __genxml_cmd_length(cmd) cmd ## _length
68 #define __genxml_cmd_header(cmd) cmd ## _header
69 #define __genxml_cmd_pack(cmd) cmd ## _pack
70 
71 #define gen_mi_builder_pack(b, cmd, dst, name)                          \
72    for (struct cmd name = { __genxml_cmd_header(cmd) },                 \
73         *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
74         __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name),    \
75         _dst = NULL)
76 
77 #define gen_mi_builder_emit(b, cmd, name)                               \
78    gen_mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
79 
80 
81 enum gen_mi_value_type {
82    GEN_MI_VALUE_TYPE_IMM,
83    GEN_MI_VALUE_TYPE_MEM32,
84    GEN_MI_VALUE_TYPE_MEM64,
85    GEN_MI_VALUE_TYPE_REG32,
86    GEN_MI_VALUE_TYPE_REG64,
87 };
88 
89 struct gen_mi_value {
90    enum gen_mi_value_type type;
91 
92    union {
93       uint64_t imm;
94       __gen_address_type addr;
95       uint32_t reg;
96    };
97 
98 #if GEN_GEN >= 7 || GEN_IS_HASWELL
99    bool invert;
100 #endif
101 };
102 
103 #if GEN_GEN >= 9
104 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 256
105 #else
106 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 64
107 #endif
108 
109 struct gen_mi_builder {
110    __gen_user_data *user_data;
111 
112 #if GEN_GEN >= 8 || GEN_IS_HASWELL
113    uint32_t gprs;
114    uint8_t gpr_refs[GEN_MI_BUILDER_NUM_ALLOC_GPRS];
115 
116    unsigned num_math_dwords;
117    uint32_t math_dwords[GEN_MI_BUILDER_MAX_MATH_DWORDS];
118 #endif
119 };
120 
121 static inline void
gen_mi_builder_init(struct gen_mi_builder * b,__gen_user_data * user_data)122 gen_mi_builder_init(struct gen_mi_builder *b, __gen_user_data *user_data)
123 {
124    memset(b, 0, sizeof(*b));
125    b->user_data = user_data;
126 
127 #if GEN_GEN >= 8 || GEN_IS_HASWELL
128    b->gprs = 0;
129    b->num_math_dwords = 0;
130 #endif
131 }
132 
133 static inline void
gen_mi_builder_flush_math(struct gen_mi_builder * b)134 gen_mi_builder_flush_math(struct gen_mi_builder *b)
135 {
136 #if GEN_GEN >= 8 || GEN_IS_HASWELL
137    if (b->num_math_dwords == 0)
138       return;
139 
140    uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
141                                                      1 + b->num_math_dwords);
142    gen_mi_builder_pack(b, GENX(MI_MATH), dw, math) {
143       math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
144    }
145    memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
146    b->num_math_dwords = 0;
147 #endif
148 }
149 
150 #define _GEN_MI_BUILDER_GPR_BASE 0x2600
151 /* The actual hardware limit on GPRs */
152 #define _GEN_MI_BUILDER_NUM_HW_GPRS 16
153 
154 #if GEN_GEN >= 8 || GEN_IS_HASWELL
155 
156 static inline bool
gen_mi_value_is_gpr(struct gen_mi_value val)157 gen_mi_value_is_gpr(struct gen_mi_value val)
158 {
159    return (val.type == GEN_MI_VALUE_TYPE_REG32 ||
160            val.type == GEN_MI_VALUE_TYPE_REG64) &&
161           val.reg >= _GEN_MI_BUILDER_GPR_BASE &&
162           val.reg < _GEN_MI_BUILDER_GPR_BASE +
163                     _GEN_MI_BUILDER_NUM_HW_GPRS * 8;
164 }
165 
166 static inline bool
_gen_mi_value_is_allocated_gpr(struct gen_mi_value val)167 _gen_mi_value_is_allocated_gpr(struct gen_mi_value val)
168 {
169    return (val.type == GEN_MI_VALUE_TYPE_REG32 ||
170            val.type == GEN_MI_VALUE_TYPE_REG64) &&
171           val.reg >= _GEN_MI_BUILDER_GPR_BASE &&
172           val.reg < _GEN_MI_BUILDER_GPR_BASE +
173                     GEN_MI_BUILDER_NUM_ALLOC_GPRS * 8;
174 }
175 
176 static inline uint32_t
_gen_mi_value_as_gpr(struct gen_mi_value val)177 _gen_mi_value_as_gpr(struct gen_mi_value val)
178 {
179    assert(gen_mi_value_is_gpr(val));
180    assert(val.reg % 8 == 0);
181    return (val.reg - _GEN_MI_BUILDER_GPR_BASE) / 8;
182 }
183 
184 static inline struct gen_mi_value
gen_mi_new_gpr(struct gen_mi_builder * b)185 gen_mi_new_gpr(struct gen_mi_builder *b)
186 {
187    unsigned gpr = ffs(~b->gprs) - 1;
188    assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
189    assert(b->gpr_refs[gpr] == 0);
190    b->gprs |= (1u << gpr);
191    b->gpr_refs[gpr] = 1;
192 
193    return (struct gen_mi_value) {
194       .type = GEN_MI_VALUE_TYPE_REG64,
195       .reg = _GEN_MI_BUILDER_GPR_BASE + gpr * 8,
196    };
197 }
198 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
199 
200 /** Take a reference to a gen_mi_value
201  *
202  * The MI builder uses reference counting to automatically free ALU GPRs for
203  * re-use in calculations.  All gen_mi_* math functions consume the reference
204  * they are handed for each source and return a reference to a value which the
205  * caller must consume.  In particular, if you pas the same value into a
206  * single gen_mi_* math function twice (say to add a number to itself), you
207  * are responsible for calling gen_mi_value_ref() to get a second reference
208  * because the gen_mi_* math function will consume it twice.
209  */
210 static inline struct gen_mi_value
gen_mi_value_ref(struct gen_mi_builder * b,struct gen_mi_value val)211 gen_mi_value_ref(struct gen_mi_builder *b, struct gen_mi_value val)
212 {
213 #if GEN_GEN >= 8 || GEN_IS_HASWELL
214    if (_gen_mi_value_is_allocated_gpr(val)) {
215       unsigned gpr = _gen_mi_value_as_gpr(val);
216       assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
217       assert(b->gprs & (1u << gpr));
218       assert(b->gpr_refs[gpr] < UINT8_MAX);
219       b->gpr_refs[gpr]++;
220    }
221 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
222 
223    return val;
224 }
225 
226 /** Drop a reference to a gen_mi_value
227  *
228  * See also gen_mi_value_ref.
229  */
230 static inline void
gen_mi_value_unref(struct gen_mi_builder * b,struct gen_mi_value val)231 gen_mi_value_unref(struct gen_mi_builder *b, struct gen_mi_value val)
232 {
233 #if GEN_GEN >= 8 || GEN_IS_HASWELL
234    if (_gen_mi_value_is_allocated_gpr(val)) {
235       unsigned gpr = _gen_mi_value_as_gpr(val);
236       assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
237       assert(b->gprs & (1u << gpr));
238       assert(b->gpr_refs[gpr] > 0);
239       if (--b->gpr_refs[gpr] == 0)
240          b->gprs &= ~(1u << gpr);
241    }
242 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
243 }
244 
245 static inline struct gen_mi_value
gen_mi_imm(uint64_t imm)246 gen_mi_imm(uint64_t imm)
247 {
248    return (struct gen_mi_value) {
249       .type = GEN_MI_VALUE_TYPE_IMM,
250       .imm = imm,
251    };
252 }
253 
254 static inline struct gen_mi_value
gen_mi_reg32(uint32_t reg)255 gen_mi_reg32(uint32_t reg)
256 {
257    struct gen_mi_value val = {
258       .type = GEN_MI_VALUE_TYPE_REG32,
259       .reg = reg,
260    };
261 #if GEN_GEN >= 8 || GEN_IS_HASWELL
262    assert(!_gen_mi_value_is_allocated_gpr(val));
263 #endif
264    return val;
265 }
266 
267 static inline struct gen_mi_value
gen_mi_reg64(uint32_t reg)268 gen_mi_reg64(uint32_t reg)
269 {
270    struct gen_mi_value val = {
271       .type = GEN_MI_VALUE_TYPE_REG64,
272       .reg = reg,
273    };
274 #if GEN_GEN >= 8 || GEN_IS_HASWELL
275    assert(!_gen_mi_value_is_allocated_gpr(val));
276 #endif
277    return val;
278 }
279 
280 static inline struct gen_mi_value
gen_mi_mem32(__gen_address_type addr)281 gen_mi_mem32(__gen_address_type addr)
282 {
283    return (struct gen_mi_value) {
284       .type = GEN_MI_VALUE_TYPE_MEM32,
285       .addr = addr,
286    };
287 }
288 
289 static inline struct gen_mi_value
gen_mi_mem64(__gen_address_type addr)290 gen_mi_mem64(__gen_address_type addr)
291 {
292    return (struct gen_mi_value) {
293       .type = GEN_MI_VALUE_TYPE_MEM64,
294       .addr = addr,
295    };
296 }
297 
298 static inline struct gen_mi_value
gen_mi_value_half(struct gen_mi_value value,bool top_32_bits)299 gen_mi_value_half(struct gen_mi_value value, bool top_32_bits)
300 {
301    switch (value.type) {
302    case GEN_MI_VALUE_TYPE_IMM:
303       if (top_32_bits)
304          value.imm >>= 32;
305       else
306          value.imm &= 0xffffffffu;
307       return value;
308 
309    case GEN_MI_VALUE_TYPE_MEM32:
310       assert(!top_32_bits);
311       return value;
312 
313    case GEN_MI_VALUE_TYPE_MEM64:
314       if (top_32_bits)
315          value.addr = __gen_address_offset(value.addr, 4);
316       value.type = GEN_MI_VALUE_TYPE_MEM32;
317       return value;
318 
319    case GEN_MI_VALUE_TYPE_REG32:
320       assert(!top_32_bits);
321       return value;
322 
323    case GEN_MI_VALUE_TYPE_REG64:
324       if (top_32_bits)
325          value.reg += 4;
326       value.type = GEN_MI_VALUE_TYPE_REG32;
327       return value;
328    }
329 
330    unreachable("Invalid gen_mi_value type");
331 }
332 
333 static inline void
_gen_mi_copy_no_unref(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)334 _gen_mi_copy_no_unref(struct gen_mi_builder *b,
335                       struct gen_mi_value dst, struct gen_mi_value src)
336 {
337 #if GEN_GEN >= 7 || GEN_IS_HASWELL
338    /* TODO: We could handle src.invert by emitting a bit of math if we really
339     * wanted to.
340     */
341    assert(!dst.invert && !src.invert);
342 #endif
343    gen_mi_builder_flush_math(b);
344 
345    switch (dst.type) {
346    case GEN_MI_VALUE_TYPE_IMM:
347       unreachable("Cannot copy to an immediate");
348 
349    case GEN_MI_VALUE_TYPE_MEM64:
350    case GEN_MI_VALUE_TYPE_REG64:
351       /* If the destination is 64 bits, we have to copy in two halves */
352       _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false),
353                                gen_mi_value_half(src, false));
354       switch (src.type) {
355       case GEN_MI_VALUE_TYPE_IMM:
356       case GEN_MI_VALUE_TYPE_MEM64:
357       case GEN_MI_VALUE_TYPE_REG64:
358          /* TODO: Use MI_STORE_DATA_IMM::StoreQWord when we have it */
359          _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true),
360                                   gen_mi_value_half(src, true));
361          break;
362       default:
363          _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true),
364                                   gen_mi_imm(0));
365          break;
366       }
367       break;
368 
369    case GEN_MI_VALUE_TYPE_MEM32:
370       switch (src.type) {
371       case GEN_MI_VALUE_TYPE_IMM:
372          gen_mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
373             sdi.Address = dst.addr;
374 #if GEN_GEN >= 12
375             sdi.ForceWriteCompletionCheck = true;
376 #endif
377             sdi.ImmediateData = src.imm;
378          }
379          break;
380 
381       case GEN_MI_VALUE_TYPE_MEM32:
382       case GEN_MI_VALUE_TYPE_MEM64:
383 #if GEN_GEN >= 8
384          gen_mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
385             cmm.DestinationMemoryAddress = dst.addr;
386             cmm.SourceMemoryAddress = src.addr;
387          }
388 #elif GEN_IS_HASWELL
389          {
390             struct gen_mi_value tmp = gen_mi_new_gpr(b);
391             _gen_mi_copy_no_unref(b, tmp, src);
392             _gen_mi_copy_no_unref(b, dst, tmp);
393             gen_mi_value_unref(b, tmp);
394          }
395 #else
396          unreachable("Cannot do mem <-> mem copy on IVB and earlier");
397 #endif
398          break;
399 
400       case GEN_MI_VALUE_TYPE_REG32:
401       case GEN_MI_VALUE_TYPE_REG64:
402          gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
403             srm.RegisterAddress = src.reg;
404             srm.MemoryAddress = dst.addr;
405          }
406          break;
407 
408       default:
409          unreachable("Invalid gen_mi_value type");
410       }
411       break;
412 
413    case GEN_MI_VALUE_TYPE_REG32:
414       switch (src.type) {
415       case GEN_MI_VALUE_TYPE_IMM:
416          gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
417             lri.RegisterOffset = dst.reg;
418             lri.DataDWord = src.imm;
419          }
420          break;
421 
422       case GEN_MI_VALUE_TYPE_MEM32:
423       case GEN_MI_VALUE_TYPE_MEM64:
424          gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
425             lrm.RegisterAddress = dst.reg;
426             lrm.MemoryAddress = src.addr;
427          }
428          break;
429 
430       case GEN_MI_VALUE_TYPE_REG32:
431       case GEN_MI_VALUE_TYPE_REG64:
432 #if GEN_GEN >= 8 || GEN_IS_HASWELL
433          if (src.reg != dst.reg) {
434             gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
435                lrr.SourceRegisterAddress = src.reg;
436                lrr.DestinationRegisterAddress = dst.reg;
437             }
438          }
439 #else
440          unreachable("Cannot do reg <-> reg copy on IVB and earlier");
441 #endif
442          break;
443 
444       default:
445          unreachable("Invalid gen_mi_value type");
446       }
447       break;
448 
449    default:
450       unreachable("Invalid gen_mi_value type");
451    }
452 }
453 
454 /** Store the value in src to the value represented by dst
455  *
456  * If the bit size of src and dst mismatch, this function does an unsigned
457  * integer cast.  If src has more bits than dst, it takes the bottom bits.  If
458  * src has fewer bits then dst, it fills the top bits with zeros.
459  *
460  * This function consumes one reference for each of src and dst.
461  */
462 static inline void
gen_mi_store(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)463 gen_mi_store(struct gen_mi_builder *b,
464              struct gen_mi_value dst, struct gen_mi_value src)
465 {
466    _gen_mi_copy_no_unref(b, dst, src);
467    gen_mi_value_unref(b, src);
468    gen_mi_value_unref(b, dst);
469 }
470 
471 static inline void
gen_mi_memset(struct gen_mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)472 gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
473               uint32_t value, uint32_t size)
474 {
475 #if GEN_GEN >= 8 || GEN_IS_HASWELL
476    assert(b->num_math_dwords == 0);
477 #endif
478 
479    /* This memset operates in units of dwords. */
480    assert(size % 4 == 0);
481 
482    for (uint32_t i = 0; i < size; i += 4) {
483       gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
484                       gen_mi_imm(value));
485    }
486 }
487 
488 /* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
489 static inline void
gen_mi_memcpy(struct gen_mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)490 gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
491               __gen_address_type src, uint32_t size)
492 {
493 #if GEN_GEN >= 8 || GEN_IS_HASWELL
494    assert(b->num_math_dwords == 0);
495 #endif
496 
497    /* This memcpy operates in units of dwords. */
498    assert(size % 4 == 0);
499 
500    for (uint32_t i = 0; i < size; i += 4) {
501       struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
502       struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
503 #if GEN_GEN >= 8 || GEN_IS_HASWELL
504       gen_mi_store(b, dst_val, src_val);
505 #else
506       /* IVB does not have a general purpose register for command streamer
507        * commands. Therefore, we use an alternate temporary register.
508        */
509       struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
510       gen_mi_store(b, tmp_reg, src_val);
511       gen_mi_store(b, dst_val, tmp_reg);
512 #endif
513    }
514 }
515 
516 /*
517  * MI_MATH Section.  Only available on Haswell+
518  */
519 
520 #if GEN_GEN >= 8 || GEN_IS_HASWELL
521 
522 /**
523  * Perform a predicated store (assuming the condition is already loaded
524  * in the MI_PREDICATE_RESULT register) of the value in src to the memory
525  * location specified by dst.  Non-memory destinations are not supported.
526  *
527  * This function consumes one reference for each of src and dst.
528  */
529 static inline void
gen_mi_store_if(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)530 gen_mi_store_if(struct gen_mi_builder *b,
531                 struct gen_mi_value dst,
532                 struct gen_mi_value src)
533 {
534    assert(!dst.invert && !src.invert);
535 
536    gen_mi_builder_flush_math(b);
537 
538    /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
539     * destination to be memory, and resolve the source to a temporary
540     * register if it isn't in one already.
541     */
542    assert(dst.type == GEN_MI_VALUE_TYPE_MEM64 ||
543           dst.type == GEN_MI_VALUE_TYPE_MEM32);
544 
545    if (src.type != GEN_MI_VALUE_TYPE_REG32 &&
546        src.type != GEN_MI_VALUE_TYPE_REG64) {
547       struct gen_mi_value tmp = gen_mi_new_gpr(b);
548       _gen_mi_copy_no_unref(b, tmp, src);
549       src = tmp;
550    }
551 
552    if (dst.type == GEN_MI_VALUE_TYPE_MEM64) {
553       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
554          srm.RegisterAddress = src.reg;
555          srm.MemoryAddress = dst.addr;
556          srm.PredicateEnable = true;
557       }
558       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
559          srm.RegisterAddress = src.reg + 4;
560          srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
561          srm.PredicateEnable = true;
562       }
563    } else {
564       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
565          srm.RegisterAddress = src.reg;
566          srm.MemoryAddress = dst.addr;
567          srm.PredicateEnable = true;
568       }
569    }
570 
571    gen_mi_value_unref(b, src);
572    gen_mi_value_unref(b, dst);
573 }
574 
575 static inline void
_gen_mi_builder_push_math(struct gen_mi_builder * b,const uint32_t * dwords,unsigned num_dwords)576 _gen_mi_builder_push_math(struct gen_mi_builder *b,
577                           const uint32_t *dwords,
578                           unsigned num_dwords)
579 {
580    assert(num_dwords < GEN_MI_BUILDER_MAX_MATH_DWORDS);
581    if (b->num_math_dwords + num_dwords > GEN_MI_BUILDER_MAX_MATH_DWORDS)
582       gen_mi_builder_flush_math(b);
583 
584    memcpy(&b->math_dwords[b->num_math_dwords],
585           dwords, num_dwords * sizeof(*dwords));
586    b->num_math_dwords += num_dwords;
587 }
588 
589 static inline uint32_t
_gen_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)590 _gen_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
591 {
592    struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
593       .Operand2 = operand2,
594       .Operand1 = operand1,
595       .ALUOpcode = opcode,
596    };
597 
598    uint32_t dw;
599    GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
600 
601    return dw;
602 }
603 
604 static inline struct gen_mi_value
gen_mi_value_to_gpr(struct gen_mi_builder * b,struct gen_mi_value val)605 gen_mi_value_to_gpr(struct gen_mi_builder *b, struct gen_mi_value val)
606 {
607    if (gen_mi_value_is_gpr(val))
608       return val;
609 
610    /* Save off the invert flag because it makes copy() grumpy */
611    bool invert = val.invert;
612    val.invert = false;
613 
614    struct gen_mi_value tmp = gen_mi_new_gpr(b);
615    _gen_mi_copy_no_unref(b, tmp, val);
616    tmp.invert = invert;
617 
618    return tmp;
619 }
620 
621 static inline uint32_t
_gen_mi_math_load_src(struct gen_mi_builder * b,unsigned src,struct gen_mi_value * val)622 _gen_mi_math_load_src(struct gen_mi_builder *b,
623                       unsigned src, struct gen_mi_value *val)
624 {
625    if (val->type == GEN_MI_VALUE_TYPE_IMM &&
626        (val->imm == 0 || val->imm == UINT64_MAX)) {
627       uint64_t imm = val->invert ? ~val->imm : val->imm;
628       return _gen_mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
629    } else {
630       *val = gen_mi_value_to_gpr(b, *val);
631       return _gen_mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
632                               src, _gen_mi_value_as_gpr(*val));
633    }
634 }
635 
636 static inline struct gen_mi_value
gen_mi_math_binop(struct gen_mi_builder * b,uint32_t opcode,struct gen_mi_value src0,struct gen_mi_value src1,uint32_t store_op,uint32_t store_src)637 gen_mi_math_binop(struct gen_mi_builder *b, uint32_t opcode,
638                   struct gen_mi_value src0, struct gen_mi_value src1,
639                   uint32_t store_op, uint32_t store_src)
640 {
641    struct gen_mi_value dst = gen_mi_new_gpr(b);
642 
643    uint32_t dw[4];
644    dw[0] = _gen_mi_math_load_src(b, MI_ALU_SRCA, &src0);
645    dw[1] = _gen_mi_math_load_src(b, MI_ALU_SRCB, &src1);
646    dw[2] = _gen_mi_pack_alu(opcode, 0, 0);
647    dw[3] = _gen_mi_pack_alu(store_op, _gen_mi_value_as_gpr(dst), store_src);
648    _gen_mi_builder_push_math(b, dw, 4);
649 
650    gen_mi_value_unref(b, src0);
651    gen_mi_value_unref(b, src1);
652 
653    return dst;
654 }
655 
656 static inline struct gen_mi_value
gen_mi_inot(struct gen_mi_builder * b,struct gen_mi_value val)657 gen_mi_inot(struct gen_mi_builder *b, struct gen_mi_value val)
658 {
659    /* TODO These currently can't be passed into gen_mi_copy */
660    val.invert = !val.invert;
661    return val;
662 }
663 
664 static inline struct gen_mi_value
gen_mi_iadd(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)665 gen_mi_iadd(struct gen_mi_builder *b,
666             struct gen_mi_value src0, struct gen_mi_value src1)
667 {
668    return gen_mi_math_binop(b, MI_ALU_ADD, src0, src1,
669                             MI_ALU_STORE, MI_ALU_ACCU);
670 }
671 
672 static inline struct gen_mi_value
gen_mi_iadd_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint64_t N)673 gen_mi_iadd_imm(struct gen_mi_builder *b,
674                 struct gen_mi_value src, uint64_t N)
675 {
676    if (N == 0)
677       return src;
678 
679    return gen_mi_iadd(b, src, gen_mi_imm(N));
680 }
681 
682 static inline struct gen_mi_value
gen_mi_isub(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)683 gen_mi_isub(struct gen_mi_builder *b,
684             struct gen_mi_value src0, struct gen_mi_value src1)
685 {
686    return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
687                             MI_ALU_STORE, MI_ALU_ACCU);
688 }
689 
690 static inline struct gen_mi_value
gen_mi_ult(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)691 gen_mi_ult(struct gen_mi_builder *b,
692            struct gen_mi_value src0, struct gen_mi_value src1)
693 {
694    /* Compute "less than" by subtracting and storing the carry bit */
695    return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
696                             MI_ALU_STORE, MI_ALU_CF);
697 }
698 
699 static inline struct gen_mi_value
gen_mi_uge(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)700 gen_mi_uge(struct gen_mi_builder *b,
701            struct gen_mi_value src0, struct gen_mi_value src1)
702 {
703    /* Compute "less than" by subtracting and storing the carry bit */
704    return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
705                             MI_ALU_STOREINV, MI_ALU_CF);
706 }
707 
708 static inline struct gen_mi_value
gen_mi_iand(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)709 gen_mi_iand(struct gen_mi_builder *b,
710             struct gen_mi_value src0, struct gen_mi_value src1)
711 {
712    return gen_mi_math_binop(b, MI_ALU_AND, src0, src1,
713                             MI_ALU_STORE, MI_ALU_ACCU);
714 }
715 
716 /**
717  * Returns (src != 0) ? 1 : 0.
718  */
719 static inline struct gen_mi_value
gen_mi_nz(struct gen_mi_builder * b,struct gen_mi_value src)720 gen_mi_nz(struct gen_mi_builder *b, struct gen_mi_value src)
721 {
722    return gen_mi_math_binop(b, MI_ALU_ADD, src, gen_mi_imm(0),
723                             MI_ALU_STOREINV, MI_ALU_ZF);
724 }
725 
726 /**
727  * Returns (src == 0) ? 1 : 0.
728  */
729 static inline struct gen_mi_value
gen_mi_z(struct gen_mi_builder * b,struct gen_mi_value src)730 gen_mi_z(struct gen_mi_builder *b, struct gen_mi_value src)
731 {
732    return gen_mi_math_binop(b, MI_ALU_ADD, src, gen_mi_imm(0),
733                             MI_ALU_STORE, MI_ALU_ZF);
734 }
735 
736 static inline struct gen_mi_value
gen_mi_ior(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)737 gen_mi_ior(struct gen_mi_builder *b,
738            struct gen_mi_value src0, struct gen_mi_value src1)
739 {
740    return gen_mi_math_binop(b, MI_ALU_OR, src0, src1,
741                             MI_ALU_STORE, MI_ALU_ACCU);
742 }
743 
744 static inline struct gen_mi_value
gen_mi_imul_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t N)745 gen_mi_imul_imm(struct gen_mi_builder *b,
746                 struct gen_mi_value src, uint32_t N)
747 {
748    if (N == 0) {
749       gen_mi_value_unref(b, src);
750       return gen_mi_imm(0);
751    }
752 
753    if (N == 1)
754       return src;
755 
756    src = gen_mi_value_to_gpr(b, src);
757 
758    struct gen_mi_value res = gen_mi_value_ref(b, src);
759 
760    unsigned top_bit = 31 - __builtin_clz(N);
761    for (int i = top_bit - 1; i >= 0; i--) {
762       res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res));
763       if (N & (1 << i))
764          res = gen_mi_iadd(b, res, gen_mi_value_ref(b, src));
765    }
766 
767    gen_mi_value_unref(b, src);
768 
769    return res;
770 }
771 
772 static inline struct gen_mi_value
gen_mi_ishl_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t shift)773 gen_mi_ishl_imm(struct gen_mi_builder *b,
774                 struct gen_mi_value src, uint32_t shift)
775 {
776    struct gen_mi_value res = gen_mi_value_to_gpr(b, src);
777 
778    for (unsigned i = 0; i < shift; i++)
779       res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res));
780 
781    return res;
782 }
783 
784 static inline struct gen_mi_value
gen_mi_ushr32_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t shift)785 gen_mi_ushr32_imm(struct gen_mi_builder *b,
786                   struct gen_mi_value src, uint32_t shift)
787 {
788    /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
789     * of the result.  This assumes the top 32 bits are zero.
790     */
791    if (shift > 64)
792       return gen_mi_imm(0);
793 
794    if (shift > 32) {
795       struct gen_mi_value tmp = gen_mi_new_gpr(b);
796       _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, false),
797                                gen_mi_value_half(src, true));
798       _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, true), gen_mi_imm(0));
799       gen_mi_value_unref(b, src);
800       src = tmp;
801       shift -= 32;
802    }
803    assert(shift <= 32);
804    struct gen_mi_value tmp = gen_mi_ishl_imm(b, src, 32 - shift);
805    struct gen_mi_value dst = gen_mi_new_gpr(b);
806    _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false),
807                             gen_mi_value_half(tmp, true));
808    _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true), gen_mi_imm(0));
809    gen_mi_value_unref(b, tmp);
810    return dst;
811 }
812 
813 static inline struct gen_mi_value
gen_mi_udiv32_imm(struct gen_mi_builder * b,struct gen_mi_value N,uint32_t D)814 gen_mi_udiv32_imm(struct gen_mi_builder *b,
815                   struct gen_mi_value N, uint32_t D)
816 {
817    /* We implicitly assume that N is only a 32-bit value */
818    if (D == 0) {
819       /* This is invalid but we should do something */
820       return gen_mi_imm(0);
821    } else if (util_is_power_of_two_or_zero(D)) {
822       return gen_mi_ushr32_imm(b, N, util_logbase2(D));
823    } else {
824       struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
825       assert(m.multiplier <= UINT32_MAX);
826 
827       if (m.pre_shift)
828          N = gen_mi_ushr32_imm(b, N, m.pre_shift);
829 
830       /* Do the 32x32 multiply  into gpr0 */
831       N = gen_mi_imul_imm(b, N, m.multiplier);
832 
833       if (m.increment)
834          N = gen_mi_iadd(b, N, gen_mi_imm(m.multiplier));
835 
836       N = gen_mi_ushr32_imm(b, N, 32);
837 
838       if (m.post_shift)
839          N = gen_mi_ushr32_imm(b, N, m.post_shift);
840 
841       return N;
842    }
843 }
844 
845 #endif /* MI_MATH section */
846 
847 /* This assumes addresses of strictly more than 32bits (aka. Gen8+). */
848 #if GEN_MI_BUILDER_CAN_WRITE_BATCH
849 
850 struct gen_mi_address_token {
851    /* Pointers to address memory fields in the batch. */
852    uint64_t *ptrs[2];
853 };
854 
855 static inline struct gen_mi_address_token
gen_mi_store_address(struct gen_mi_builder * b,struct gen_mi_value addr_reg)856 gen_mi_store_address(struct gen_mi_builder *b,
857                      struct gen_mi_value addr_reg)
858 {
859    gen_mi_builder_flush_math(b);
860 
861    assert(addr_reg.type == GEN_MI_VALUE_TYPE_REG64);
862 
863    struct gen_mi_address_token token = {};
864 
865    for (unsigned i = 0; i < 2; i++) {
866       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
867          srm.RegisterAddress = addr_reg.reg + (i * 4);
868 
869          const unsigned addr_dw =
870             GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
871          token.ptrs[i] = (void *)_dst + addr_dw;
872       }
873    }
874 
875    gen_mi_value_unref(b, addr_reg);
876    return token;
877 }
878 
879 static inline void
gen_mi_self_mod_barrier(struct gen_mi_builder * b)880 gen_mi_self_mod_barrier(struct gen_mi_builder *b)
881 {
882    /* Documentation says Gen11+ should be able to invalidate the command cache
883     * but experiment show it doesn't work properly, so for now just get over
884     * the CS prefetch.
885     */
886    for (uint32_t i = 0; i < 128; i++)
887       gen_mi_builder_emit(b, GENX(MI_NOOP), noop);
888 }
889 
890 static inline void
_gen_mi_resolve_address_token(struct gen_mi_builder * b,struct gen_mi_address_token token,void * batch_location)891 _gen_mi_resolve_address_token(struct gen_mi_builder *b,
892                               struct gen_mi_address_token token,
893                               void *batch_location)
894 {
895    uint64_t addr_addr_u64 = __gen_get_batch_address(b->user_data,
896                                                     batch_location);
897    *(token.ptrs[0]) = addr_addr_u64;
898    *(token.ptrs[1]) = addr_addr_u64 + 4;
899 }
900 
901 #endif /* GEN_MI_BUILDER_CAN_WRITE_BATCH */
902 
903 #endif /* GEN_MI_BUILDER_H */
904