1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef GEN_MI_BUILDER_H
25 #define GEN_MI_BUILDER_H
26
27 #include "genxml/genX_bits.h"
28 #include "util/bitscan.h"
29 #include "util/fast_idiv_by_const.h"
30 #include "util/u_math.h"
31
32 #ifndef GEN_MI_BUILDER_NUM_ALLOC_GPRS
33 /** The number of GPRs the MI builder is allowed to allocate
34 *
35 * This may be set by a user of this API so that it can reserve some GPRs at
36 * the top end for its own use.
37 */
38 #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 16
39 #endif
40
41 /** These must be defined by the user of the builder
42 *
43 * void *__gen_get_batch_dwords(__gen_user_data *user_data,
44 * unsigned num_dwords);
45 *
46 * __gen_address_type
47 * __gen_address_offset(__gen_address_type addr, uint64_t offset);
48 *
49 *
50 * If self-modifying batches are supported, we must be able to pass batch
51 * addresses around as void*s so pinning as well as batch chaining or some
52 * other mechanism for ensuring batch pointers remain valid during building is
53 * required. The following function must also be defined, it returns an
54 * address in canonical form:
55 *
56 * uint64_t
57 * __gen_get_batch_address(__gen_user_data *user_data, void *location);
58 *
59 * Also, __gen_combine_address must accept a location value of NULL and return
60 * a fully valid 64-bit address.
61 */
62
63 /*
64 * Start of the actual MI builder
65 */
66
67 #define __genxml_cmd_length(cmd) cmd ## _length
68 #define __genxml_cmd_header(cmd) cmd ## _header
69 #define __genxml_cmd_pack(cmd) cmd ## _pack
70
71 #define gen_mi_builder_pack(b, cmd, dst, name) \
72 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
73 *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
74 __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
75 _dst = NULL)
76
77 #define gen_mi_builder_emit(b, cmd, name) \
78 gen_mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
79
80
81 enum gen_mi_value_type {
82 GEN_MI_VALUE_TYPE_IMM,
83 GEN_MI_VALUE_TYPE_MEM32,
84 GEN_MI_VALUE_TYPE_MEM64,
85 GEN_MI_VALUE_TYPE_REG32,
86 GEN_MI_VALUE_TYPE_REG64,
87 };
88
89 struct gen_mi_value {
90 enum gen_mi_value_type type;
91
92 union {
93 uint64_t imm;
94 __gen_address_type addr;
95 uint32_t reg;
96 };
97
98 #if GEN_GEN >= 7 || GEN_IS_HASWELL
99 bool invert;
100 #endif
101 };
102
103 #if GEN_GEN >= 9
104 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 256
105 #else
106 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 64
107 #endif
108
109 struct gen_mi_builder {
110 __gen_user_data *user_data;
111
112 #if GEN_GEN >= 8 || GEN_IS_HASWELL
113 uint32_t gprs;
114 uint8_t gpr_refs[GEN_MI_BUILDER_NUM_ALLOC_GPRS];
115
116 unsigned num_math_dwords;
117 uint32_t math_dwords[GEN_MI_BUILDER_MAX_MATH_DWORDS];
118 #endif
119 };
120
121 static inline void
gen_mi_builder_init(struct gen_mi_builder * b,__gen_user_data * user_data)122 gen_mi_builder_init(struct gen_mi_builder *b, __gen_user_data *user_data)
123 {
124 memset(b, 0, sizeof(*b));
125 b->user_data = user_data;
126
127 #if GEN_GEN >= 8 || GEN_IS_HASWELL
128 b->gprs = 0;
129 b->num_math_dwords = 0;
130 #endif
131 }
132
133 static inline void
gen_mi_builder_flush_math(struct gen_mi_builder * b)134 gen_mi_builder_flush_math(struct gen_mi_builder *b)
135 {
136 #if GEN_GEN >= 8 || GEN_IS_HASWELL
137 if (b->num_math_dwords == 0)
138 return;
139
140 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
141 1 + b->num_math_dwords);
142 gen_mi_builder_pack(b, GENX(MI_MATH), dw, math) {
143 math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
144 }
145 memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
146 b->num_math_dwords = 0;
147 #endif
148 }
149
150 #define _GEN_MI_BUILDER_GPR_BASE 0x2600
151 /* The actual hardware limit on GPRs */
152 #define _GEN_MI_BUILDER_NUM_HW_GPRS 16
153
154 #if GEN_GEN >= 8 || GEN_IS_HASWELL
155
156 static inline bool
gen_mi_value_is_gpr(struct gen_mi_value val)157 gen_mi_value_is_gpr(struct gen_mi_value val)
158 {
159 return (val.type == GEN_MI_VALUE_TYPE_REG32 ||
160 val.type == GEN_MI_VALUE_TYPE_REG64) &&
161 val.reg >= _GEN_MI_BUILDER_GPR_BASE &&
162 val.reg < _GEN_MI_BUILDER_GPR_BASE +
163 _GEN_MI_BUILDER_NUM_HW_GPRS * 8;
164 }
165
166 static inline bool
_gen_mi_value_is_allocated_gpr(struct gen_mi_value val)167 _gen_mi_value_is_allocated_gpr(struct gen_mi_value val)
168 {
169 return (val.type == GEN_MI_VALUE_TYPE_REG32 ||
170 val.type == GEN_MI_VALUE_TYPE_REG64) &&
171 val.reg >= _GEN_MI_BUILDER_GPR_BASE &&
172 val.reg < _GEN_MI_BUILDER_GPR_BASE +
173 GEN_MI_BUILDER_NUM_ALLOC_GPRS * 8;
174 }
175
176 static inline uint32_t
_gen_mi_value_as_gpr(struct gen_mi_value val)177 _gen_mi_value_as_gpr(struct gen_mi_value val)
178 {
179 assert(gen_mi_value_is_gpr(val));
180 assert(val.reg % 8 == 0);
181 return (val.reg - _GEN_MI_BUILDER_GPR_BASE) / 8;
182 }
183
184 static inline struct gen_mi_value
gen_mi_new_gpr(struct gen_mi_builder * b)185 gen_mi_new_gpr(struct gen_mi_builder *b)
186 {
187 unsigned gpr = ffs(~b->gprs) - 1;
188 assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
189 assert(b->gpr_refs[gpr] == 0);
190 b->gprs |= (1u << gpr);
191 b->gpr_refs[gpr] = 1;
192
193 return (struct gen_mi_value) {
194 .type = GEN_MI_VALUE_TYPE_REG64,
195 .reg = _GEN_MI_BUILDER_GPR_BASE + gpr * 8,
196 };
197 }
198 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
199
200 /** Take a reference to a gen_mi_value
201 *
202 * The MI builder uses reference counting to automatically free ALU GPRs for
203 * re-use in calculations. All gen_mi_* math functions consume the reference
204 * they are handed for each source and return a reference to a value which the
205 * caller must consume. In particular, if you pas the same value into a
206 * single gen_mi_* math function twice (say to add a number to itself), you
207 * are responsible for calling gen_mi_value_ref() to get a second reference
208 * because the gen_mi_* math function will consume it twice.
209 */
210 static inline struct gen_mi_value
gen_mi_value_ref(struct gen_mi_builder * b,struct gen_mi_value val)211 gen_mi_value_ref(struct gen_mi_builder *b, struct gen_mi_value val)
212 {
213 #if GEN_GEN >= 8 || GEN_IS_HASWELL
214 if (_gen_mi_value_is_allocated_gpr(val)) {
215 unsigned gpr = _gen_mi_value_as_gpr(val);
216 assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
217 assert(b->gprs & (1u << gpr));
218 assert(b->gpr_refs[gpr] < UINT8_MAX);
219 b->gpr_refs[gpr]++;
220 }
221 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
222
223 return val;
224 }
225
226 /** Drop a reference to a gen_mi_value
227 *
228 * See also gen_mi_value_ref.
229 */
230 static inline void
gen_mi_value_unref(struct gen_mi_builder * b,struct gen_mi_value val)231 gen_mi_value_unref(struct gen_mi_builder *b, struct gen_mi_value val)
232 {
233 #if GEN_GEN >= 8 || GEN_IS_HASWELL
234 if (_gen_mi_value_is_allocated_gpr(val)) {
235 unsigned gpr = _gen_mi_value_as_gpr(val);
236 assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
237 assert(b->gprs & (1u << gpr));
238 assert(b->gpr_refs[gpr] > 0);
239 if (--b->gpr_refs[gpr] == 0)
240 b->gprs &= ~(1u << gpr);
241 }
242 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
243 }
244
245 static inline struct gen_mi_value
gen_mi_imm(uint64_t imm)246 gen_mi_imm(uint64_t imm)
247 {
248 return (struct gen_mi_value) {
249 .type = GEN_MI_VALUE_TYPE_IMM,
250 .imm = imm,
251 };
252 }
253
254 static inline struct gen_mi_value
gen_mi_reg32(uint32_t reg)255 gen_mi_reg32(uint32_t reg)
256 {
257 struct gen_mi_value val = {
258 .type = GEN_MI_VALUE_TYPE_REG32,
259 .reg = reg,
260 };
261 #if GEN_GEN >= 8 || GEN_IS_HASWELL
262 assert(!_gen_mi_value_is_allocated_gpr(val));
263 #endif
264 return val;
265 }
266
267 static inline struct gen_mi_value
gen_mi_reg64(uint32_t reg)268 gen_mi_reg64(uint32_t reg)
269 {
270 struct gen_mi_value val = {
271 .type = GEN_MI_VALUE_TYPE_REG64,
272 .reg = reg,
273 };
274 #if GEN_GEN >= 8 || GEN_IS_HASWELL
275 assert(!_gen_mi_value_is_allocated_gpr(val));
276 #endif
277 return val;
278 }
279
280 static inline struct gen_mi_value
gen_mi_mem32(__gen_address_type addr)281 gen_mi_mem32(__gen_address_type addr)
282 {
283 return (struct gen_mi_value) {
284 .type = GEN_MI_VALUE_TYPE_MEM32,
285 .addr = addr,
286 };
287 }
288
289 static inline struct gen_mi_value
gen_mi_mem64(__gen_address_type addr)290 gen_mi_mem64(__gen_address_type addr)
291 {
292 return (struct gen_mi_value) {
293 .type = GEN_MI_VALUE_TYPE_MEM64,
294 .addr = addr,
295 };
296 }
297
298 static inline struct gen_mi_value
gen_mi_value_half(struct gen_mi_value value,bool top_32_bits)299 gen_mi_value_half(struct gen_mi_value value, bool top_32_bits)
300 {
301 switch (value.type) {
302 case GEN_MI_VALUE_TYPE_IMM:
303 if (top_32_bits)
304 value.imm >>= 32;
305 else
306 value.imm &= 0xffffffffu;
307 return value;
308
309 case GEN_MI_VALUE_TYPE_MEM32:
310 assert(!top_32_bits);
311 return value;
312
313 case GEN_MI_VALUE_TYPE_MEM64:
314 if (top_32_bits)
315 value.addr = __gen_address_offset(value.addr, 4);
316 value.type = GEN_MI_VALUE_TYPE_MEM32;
317 return value;
318
319 case GEN_MI_VALUE_TYPE_REG32:
320 assert(!top_32_bits);
321 return value;
322
323 case GEN_MI_VALUE_TYPE_REG64:
324 if (top_32_bits)
325 value.reg += 4;
326 value.type = GEN_MI_VALUE_TYPE_REG32;
327 return value;
328 }
329
330 unreachable("Invalid gen_mi_value type");
331 }
332
333 static inline void
_gen_mi_copy_no_unref(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)334 _gen_mi_copy_no_unref(struct gen_mi_builder *b,
335 struct gen_mi_value dst, struct gen_mi_value src)
336 {
337 #if GEN_GEN >= 7 || GEN_IS_HASWELL
338 /* TODO: We could handle src.invert by emitting a bit of math if we really
339 * wanted to.
340 */
341 assert(!dst.invert && !src.invert);
342 #endif
343 gen_mi_builder_flush_math(b);
344
345 switch (dst.type) {
346 case GEN_MI_VALUE_TYPE_IMM:
347 unreachable("Cannot copy to an immediate");
348
349 case GEN_MI_VALUE_TYPE_MEM64:
350 case GEN_MI_VALUE_TYPE_REG64:
351 /* If the destination is 64 bits, we have to copy in two halves */
352 _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false),
353 gen_mi_value_half(src, false));
354 switch (src.type) {
355 case GEN_MI_VALUE_TYPE_IMM:
356 case GEN_MI_VALUE_TYPE_MEM64:
357 case GEN_MI_VALUE_TYPE_REG64:
358 /* TODO: Use MI_STORE_DATA_IMM::StoreQWord when we have it */
359 _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true),
360 gen_mi_value_half(src, true));
361 break;
362 default:
363 _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true),
364 gen_mi_imm(0));
365 break;
366 }
367 break;
368
369 case GEN_MI_VALUE_TYPE_MEM32:
370 switch (src.type) {
371 case GEN_MI_VALUE_TYPE_IMM:
372 gen_mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
373 sdi.Address = dst.addr;
374 #if GEN_GEN >= 12
375 sdi.ForceWriteCompletionCheck = true;
376 #endif
377 sdi.ImmediateData = src.imm;
378 }
379 break;
380
381 case GEN_MI_VALUE_TYPE_MEM32:
382 case GEN_MI_VALUE_TYPE_MEM64:
383 #if GEN_GEN >= 8
384 gen_mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
385 cmm.DestinationMemoryAddress = dst.addr;
386 cmm.SourceMemoryAddress = src.addr;
387 }
388 #elif GEN_IS_HASWELL
389 {
390 struct gen_mi_value tmp = gen_mi_new_gpr(b);
391 _gen_mi_copy_no_unref(b, tmp, src);
392 _gen_mi_copy_no_unref(b, dst, tmp);
393 gen_mi_value_unref(b, tmp);
394 }
395 #else
396 unreachable("Cannot do mem <-> mem copy on IVB and earlier");
397 #endif
398 break;
399
400 case GEN_MI_VALUE_TYPE_REG32:
401 case GEN_MI_VALUE_TYPE_REG64:
402 gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
403 srm.RegisterAddress = src.reg;
404 srm.MemoryAddress = dst.addr;
405 }
406 break;
407
408 default:
409 unreachable("Invalid gen_mi_value type");
410 }
411 break;
412
413 case GEN_MI_VALUE_TYPE_REG32:
414 switch (src.type) {
415 case GEN_MI_VALUE_TYPE_IMM:
416 gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
417 lri.RegisterOffset = dst.reg;
418 lri.DataDWord = src.imm;
419 }
420 break;
421
422 case GEN_MI_VALUE_TYPE_MEM32:
423 case GEN_MI_VALUE_TYPE_MEM64:
424 gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
425 lrm.RegisterAddress = dst.reg;
426 lrm.MemoryAddress = src.addr;
427 }
428 break;
429
430 case GEN_MI_VALUE_TYPE_REG32:
431 case GEN_MI_VALUE_TYPE_REG64:
432 #if GEN_GEN >= 8 || GEN_IS_HASWELL
433 if (src.reg != dst.reg) {
434 gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
435 lrr.SourceRegisterAddress = src.reg;
436 lrr.DestinationRegisterAddress = dst.reg;
437 }
438 }
439 #else
440 unreachable("Cannot do reg <-> reg copy on IVB and earlier");
441 #endif
442 break;
443
444 default:
445 unreachable("Invalid gen_mi_value type");
446 }
447 break;
448
449 default:
450 unreachable("Invalid gen_mi_value type");
451 }
452 }
453
454 /** Store the value in src to the value represented by dst
455 *
456 * If the bit size of src and dst mismatch, this function does an unsigned
457 * integer cast. If src has more bits than dst, it takes the bottom bits. If
458 * src has fewer bits then dst, it fills the top bits with zeros.
459 *
460 * This function consumes one reference for each of src and dst.
461 */
462 static inline void
gen_mi_store(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)463 gen_mi_store(struct gen_mi_builder *b,
464 struct gen_mi_value dst, struct gen_mi_value src)
465 {
466 _gen_mi_copy_no_unref(b, dst, src);
467 gen_mi_value_unref(b, src);
468 gen_mi_value_unref(b, dst);
469 }
470
471 static inline void
gen_mi_memset(struct gen_mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)472 gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
473 uint32_t value, uint32_t size)
474 {
475 #if GEN_GEN >= 8 || GEN_IS_HASWELL
476 assert(b->num_math_dwords == 0);
477 #endif
478
479 /* This memset operates in units of dwords. */
480 assert(size % 4 == 0);
481
482 for (uint32_t i = 0; i < size; i += 4) {
483 gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
484 gen_mi_imm(value));
485 }
486 }
487
488 /* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
489 static inline void
gen_mi_memcpy(struct gen_mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)490 gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
491 __gen_address_type src, uint32_t size)
492 {
493 #if GEN_GEN >= 8 || GEN_IS_HASWELL
494 assert(b->num_math_dwords == 0);
495 #endif
496
497 /* This memcpy operates in units of dwords. */
498 assert(size % 4 == 0);
499
500 for (uint32_t i = 0; i < size; i += 4) {
501 struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
502 struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
503 #if GEN_GEN >= 8 || GEN_IS_HASWELL
504 gen_mi_store(b, dst_val, src_val);
505 #else
506 /* IVB does not have a general purpose register for command streamer
507 * commands. Therefore, we use an alternate temporary register.
508 */
509 struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
510 gen_mi_store(b, tmp_reg, src_val);
511 gen_mi_store(b, dst_val, tmp_reg);
512 #endif
513 }
514 }
515
516 /*
517 * MI_MATH Section. Only available on Haswell+
518 */
519
520 #if GEN_GEN >= 8 || GEN_IS_HASWELL
521
522 /**
523 * Perform a predicated store (assuming the condition is already loaded
524 * in the MI_PREDICATE_RESULT register) of the value in src to the memory
525 * location specified by dst. Non-memory destinations are not supported.
526 *
527 * This function consumes one reference for each of src and dst.
528 */
529 static inline void
gen_mi_store_if(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)530 gen_mi_store_if(struct gen_mi_builder *b,
531 struct gen_mi_value dst,
532 struct gen_mi_value src)
533 {
534 assert(!dst.invert && !src.invert);
535
536 gen_mi_builder_flush_math(b);
537
538 /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
539 * destination to be memory, and resolve the source to a temporary
540 * register if it isn't in one already.
541 */
542 assert(dst.type == GEN_MI_VALUE_TYPE_MEM64 ||
543 dst.type == GEN_MI_VALUE_TYPE_MEM32);
544
545 if (src.type != GEN_MI_VALUE_TYPE_REG32 &&
546 src.type != GEN_MI_VALUE_TYPE_REG64) {
547 struct gen_mi_value tmp = gen_mi_new_gpr(b);
548 _gen_mi_copy_no_unref(b, tmp, src);
549 src = tmp;
550 }
551
552 if (dst.type == GEN_MI_VALUE_TYPE_MEM64) {
553 gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
554 srm.RegisterAddress = src.reg;
555 srm.MemoryAddress = dst.addr;
556 srm.PredicateEnable = true;
557 }
558 gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
559 srm.RegisterAddress = src.reg + 4;
560 srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
561 srm.PredicateEnable = true;
562 }
563 } else {
564 gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
565 srm.RegisterAddress = src.reg;
566 srm.MemoryAddress = dst.addr;
567 srm.PredicateEnable = true;
568 }
569 }
570
571 gen_mi_value_unref(b, src);
572 gen_mi_value_unref(b, dst);
573 }
574
575 static inline void
_gen_mi_builder_push_math(struct gen_mi_builder * b,const uint32_t * dwords,unsigned num_dwords)576 _gen_mi_builder_push_math(struct gen_mi_builder *b,
577 const uint32_t *dwords,
578 unsigned num_dwords)
579 {
580 assert(num_dwords < GEN_MI_BUILDER_MAX_MATH_DWORDS);
581 if (b->num_math_dwords + num_dwords > GEN_MI_BUILDER_MAX_MATH_DWORDS)
582 gen_mi_builder_flush_math(b);
583
584 memcpy(&b->math_dwords[b->num_math_dwords],
585 dwords, num_dwords * sizeof(*dwords));
586 b->num_math_dwords += num_dwords;
587 }
588
589 static inline uint32_t
_gen_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)590 _gen_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
591 {
592 struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
593 .Operand2 = operand2,
594 .Operand1 = operand1,
595 .ALUOpcode = opcode,
596 };
597
598 uint32_t dw;
599 GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
600
601 return dw;
602 }
603
604 static inline struct gen_mi_value
gen_mi_value_to_gpr(struct gen_mi_builder * b,struct gen_mi_value val)605 gen_mi_value_to_gpr(struct gen_mi_builder *b, struct gen_mi_value val)
606 {
607 if (gen_mi_value_is_gpr(val))
608 return val;
609
610 /* Save off the invert flag because it makes copy() grumpy */
611 bool invert = val.invert;
612 val.invert = false;
613
614 struct gen_mi_value tmp = gen_mi_new_gpr(b);
615 _gen_mi_copy_no_unref(b, tmp, val);
616 tmp.invert = invert;
617
618 return tmp;
619 }
620
621 static inline uint32_t
_gen_mi_math_load_src(struct gen_mi_builder * b,unsigned src,struct gen_mi_value * val)622 _gen_mi_math_load_src(struct gen_mi_builder *b,
623 unsigned src, struct gen_mi_value *val)
624 {
625 if (val->type == GEN_MI_VALUE_TYPE_IMM &&
626 (val->imm == 0 || val->imm == UINT64_MAX)) {
627 uint64_t imm = val->invert ? ~val->imm : val->imm;
628 return _gen_mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
629 } else {
630 *val = gen_mi_value_to_gpr(b, *val);
631 return _gen_mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
632 src, _gen_mi_value_as_gpr(*val));
633 }
634 }
635
636 static inline struct gen_mi_value
gen_mi_math_binop(struct gen_mi_builder * b,uint32_t opcode,struct gen_mi_value src0,struct gen_mi_value src1,uint32_t store_op,uint32_t store_src)637 gen_mi_math_binop(struct gen_mi_builder *b, uint32_t opcode,
638 struct gen_mi_value src0, struct gen_mi_value src1,
639 uint32_t store_op, uint32_t store_src)
640 {
641 struct gen_mi_value dst = gen_mi_new_gpr(b);
642
643 uint32_t dw[4];
644 dw[0] = _gen_mi_math_load_src(b, MI_ALU_SRCA, &src0);
645 dw[1] = _gen_mi_math_load_src(b, MI_ALU_SRCB, &src1);
646 dw[2] = _gen_mi_pack_alu(opcode, 0, 0);
647 dw[3] = _gen_mi_pack_alu(store_op, _gen_mi_value_as_gpr(dst), store_src);
648 _gen_mi_builder_push_math(b, dw, 4);
649
650 gen_mi_value_unref(b, src0);
651 gen_mi_value_unref(b, src1);
652
653 return dst;
654 }
655
656 static inline struct gen_mi_value
gen_mi_inot(struct gen_mi_builder * b,struct gen_mi_value val)657 gen_mi_inot(struct gen_mi_builder *b, struct gen_mi_value val)
658 {
659 /* TODO These currently can't be passed into gen_mi_copy */
660 val.invert = !val.invert;
661 return val;
662 }
663
664 static inline struct gen_mi_value
gen_mi_iadd(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)665 gen_mi_iadd(struct gen_mi_builder *b,
666 struct gen_mi_value src0, struct gen_mi_value src1)
667 {
668 return gen_mi_math_binop(b, MI_ALU_ADD, src0, src1,
669 MI_ALU_STORE, MI_ALU_ACCU);
670 }
671
672 static inline struct gen_mi_value
gen_mi_iadd_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint64_t N)673 gen_mi_iadd_imm(struct gen_mi_builder *b,
674 struct gen_mi_value src, uint64_t N)
675 {
676 if (N == 0)
677 return src;
678
679 return gen_mi_iadd(b, src, gen_mi_imm(N));
680 }
681
682 static inline struct gen_mi_value
gen_mi_isub(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)683 gen_mi_isub(struct gen_mi_builder *b,
684 struct gen_mi_value src0, struct gen_mi_value src1)
685 {
686 return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
687 MI_ALU_STORE, MI_ALU_ACCU);
688 }
689
690 static inline struct gen_mi_value
gen_mi_ult(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)691 gen_mi_ult(struct gen_mi_builder *b,
692 struct gen_mi_value src0, struct gen_mi_value src1)
693 {
694 /* Compute "less than" by subtracting and storing the carry bit */
695 return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
696 MI_ALU_STORE, MI_ALU_CF);
697 }
698
699 static inline struct gen_mi_value
gen_mi_uge(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)700 gen_mi_uge(struct gen_mi_builder *b,
701 struct gen_mi_value src0, struct gen_mi_value src1)
702 {
703 /* Compute "less than" by subtracting and storing the carry bit */
704 return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
705 MI_ALU_STOREINV, MI_ALU_CF);
706 }
707
708 static inline struct gen_mi_value
gen_mi_iand(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)709 gen_mi_iand(struct gen_mi_builder *b,
710 struct gen_mi_value src0, struct gen_mi_value src1)
711 {
712 return gen_mi_math_binop(b, MI_ALU_AND, src0, src1,
713 MI_ALU_STORE, MI_ALU_ACCU);
714 }
715
716 /**
717 * Returns (src != 0) ? 1 : 0.
718 */
719 static inline struct gen_mi_value
gen_mi_nz(struct gen_mi_builder * b,struct gen_mi_value src)720 gen_mi_nz(struct gen_mi_builder *b, struct gen_mi_value src)
721 {
722 return gen_mi_math_binop(b, MI_ALU_ADD, src, gen_mi_imm(0),
723 MI_ALU_STOREINV, MI_ALU_ZF);
724 }
725
726 /**
727 * Returns (src == 0) ? 1 : 0.
728 */
729 static inline struct gen_mi_value
gen_mi_z(struct gen_mi_builder * b,struct gen_mi_value src)730 gen_mi_z(struct gen_mi_builder *b, struct gen_mi_value src)
731 {
732 return gen_mi_math_binop(b, MI_ALU_ADD, src, gen_mi_imm(0),
733 MI_ALU_STORE, MI_ALU_ZF);
734 }
735
736 static inline struct gen_mi_value
gen_mi_ior(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)737 gen_mi_ior(struct gen_mi_builder *b,
738 struct gen_mi_value src0, struct gen_mi_value src1)
739 {
740 return gen_mi_math_binop(b, MI_ALU_OR, src0, src1,
741 MI_ALU_STORE, MI_ALU_ACCU);
742 }
743
744 static inline struct gen_mi_value
gen_mi_imul_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t N)745 gen_mi_imul_imm(struct gen_mi_builder *b,
746 struct gen_mi_value src, uint32_t N)
747 {
748 if (N == 0) {
749 gen_mi_value_unref(b, src);
750 return gen_mi_imm(0);
751 }
752
753 if (N == 1)
754 return src;
755
756 src = gen_mi_value_to_gpr(b, src);
757
758 struct gen_mi_value res = gen_mi_value_ref(b, src);
759
760 unsigned top_bit = 31 - __builtin_clz(N);
761 for (int i = top_bit - 1; i >= 0; i--) {
762 res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res));
763 if (N & (1 << i))
764 res = gen_mi_iadd(b, res, gen_mi_value_ref(b, src));
765 }
766
767 gen_mi_value_unref(b, src);
768
769 return res;
770 }
771
772 static inline struct gen_mi_value
gen_mi_ishl_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t shift)773 gen_mi_ishl_imm(struct gen_mi_builder *b,
774 struct gen_mi_value src, uint32_t shift)
775 {
776 struct gen_mi_value res = gen_mi_value_to_gpr(b, src);
777
778 for (unsigned i = 0; i < shift; i++)
779 res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res));
780
781 return res;
782 }
783
784 static inline struct gen_mi_value
gen_mi_ushr32_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t shift)785 gen_mi_ushr32_imm(struct gen_mi_builder *b,
786 struct gen_mi_value src, uint32_t shift)
787 {
788 /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
789 * of the result. This assumes the top 32 bits are zero.
790 */
791 if (shift > 64)
792 return gen_mi_imm(0);
793
794 if (shift > 32) {
795 struct gen_mi_value tmp = gen_mi_new_gpr(b);
796 _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, false),
797 gen_mi_value_half(src, true));
798 _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, true), gen_mi_imm(0));
799 gen_mi_value_unref(b, src);
800 src = tmp;
801 shift -= 32;
802 }
803 assert(shift <= 32);
804 struct gen_mi_value tmp = gen_mi_ishl_imm(b, src, 32 - shift);
805 struct gen_mi_value dst = gen_mi_new_gpr(b);
806 _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false),
807 gen_mi_value_half(tmp, true));
808 _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true), gen_mi_imm(0));
809 gen_mi_value_unref(b, tmp);
810 return dst;
811 }
812
813 static inline struct gen_mi_value
gen_mi_udiv32_imm(struct gen_mi_builder * b,struct gen_mi_value N,uint32_t D)814 gen_mi_udiv32_imm(struct gen_mi_builder *b,
815 struct gen_mi_value N, uint32_t D)
816 {
817 /* We implicitly assume that N is only a 32-bit value */
818 if (D == 0) {
819 /* This is invalid but we should do something */
820 return gen_mi_imm(0);
821 } else if (util_is_power_of_two_or_zero(D)) {
822 return gen_mi_ushr32_imm(b, N, util_logbase2(D));
823 } else {
824 struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
825 assert(m.multiplier <= UINT32_MAX);
826
827 if (m.pre_shift)
828 N = gen_mi_ushr32_imm(b, N, m.pre_shift);
829
830 /* Do the 32x32 multiply into gpr0 */
831 N = gen_mi_imul_imm(b, N, m.multiplier);
832
833 if (m.increment)
834 N = gen_mi_iadd(b, N, gen_mi_imm(m.multiplier));
835
836 N = gen_mi_ushr32_imm(b, N, 32);
837
838 if (m.post_shift)
839 N = gen_mi_ushr32_imm(b, N, m.post_shift);
840
841 return N;
842 }
843 }
844
845 #endif /* MI_MATH section */
846
847 /* This assumes addresses of strictly more than 32bits (aka. Gen8+). */
848 #if GEN_MI_BUILDER_CAN_WRITE_BATCH
849
850 struct gen_mi_address_token {
851 /* Pointers to address memory fields in the batch. */
852 uint64_t *ptrs[2];
853 };
854
855 static inline struct gen_mi_address_token
gen_mi_store_address(struct gen_mi_builder * b,struct gen_mi_value addr_reg)856 gen_mi_store_address(struct gen_mi_builder *b,
857 struct gen_mi_value addr_reg)
858 {
859 gen_mi_builder_flush_math(b);
860
861 assert(addr_reg.type == GEN_MI_VALUE_TYPE_REG64);
862
863 struct gen_mi_address_token token = {};
864
865 for (unsigned i = 0; i < 2; i++) {
866 gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
867 srm.RegisterAddress = addr_reg.reg + (i * 4);
868
869 const unsigned addr_dw =
870 GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
871 token.ptrs[i] = (void *)_dst + addr_dw;
872 }
873 }
874
875 gen_mi_value_unref(b, addr_reg);
876 return token;
877 }
878
879 static inline void
gen_mi_self_mod_barrier(struct gen_mi_builder * b)880 gen_mi_self_mod_barrier(struct gen_mi_builder *b)
881 {
882 /* Documentation says Gen11+ should be able to invalidate the command cache
883 * but experiment show it doesn't work properly, so for now just get over
884 * the CS prefetch.
885 */
886 for (uint32_t i = 0; i < 128; i++)
887 gen_mi_builder_emit(b, GENX(MI_NOOP), noop);
888 }
889
890 static inline void
_gen_mi_resolve_address_token(struct gen_mi_builder * b,struct gen_mi_address_token token,void * batch_location)891 _gen_mi_resolve_address_token(struct gen_mi_builder *b,
892 struct gen_mi_address_token token,
893 void *batch_location)
894 {
895 uint64_t addr_addr_u64 = __gen_get_batch_address(b->user_data,
896 batch_location);
897 *(token.ptrs[0]) = addr_addr_u64;
898 *(token.ptrs[1]) = addr_addr_u64 + 4;
899 }
900
901 #endif /* GEN_MI_BUILDER_CAN_WRITE_BATCH */
902
903 #endif /* GEN_MI_BUILDER_H */
904