1 /*
2  * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  * Copyright (C) 2019-2020 Collabora, Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "compiler.h"
26 #include "midgard_ops.h"
27 #include "midgard_quirks.h"
28 
29 static midgard_int_mod
mir_get_imod(bool shift,nir_alu_type T,bool half,bool scalar)30 mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
31 {
32         if (!half) {
33                 assert(!shift);
34                 /* Doesn't matter, src mods are only used when expanding */
35                 return midgard_int_sign_extend;
36         }
37 
38         if (shift)
39                 return midgard_int_left_shift;
40 
41         if (nir_alu_type_get_base_type(T) == nir_type_int)
42                 return midgard_int_sign_extend;
43         else
44                 return midgard_int_zero_extend;
45 }
46 
47 void
midgard_pack_ubo_index_imm(midgard_load_store_word * word,unsigned index)48 midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
49 {
50         word->arg_comp = index & 0x3;
51         word->arg_reg = (index >> 2) & 0x7;
52         word->bitsize_toggle = (index >> 5) & 0x1;
53         word->index_format = (index >> 6) & 0x3;
54 }
55 
midgard_pack_varying_params(midgard_load_store_word * word,midgard_varying_params p)56 void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
57 {
58         /* Currently these parameters are not supported. */
59         assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
60 
61         unsigned u;
62         memcpy(&u, &p, sizeof(p));
63 
64         word->signed_offset |= u & 0x1FF;
65 }
66 
midgard_unpack_varying_params(midgard_load_store_word word)67 midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
68 {
69         unsigned params = word.signed_offset & 0x1FF;
70 
71         midgard_varying_params p;
72         memcpy(&p, &params, sizeof(p));
73 
74         return p;
75 }
76 
77 unsigned
mir_pack_mod(midgard_instruction * ins,unsigned i,bool scalar)78 mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
79 {
80         bool integer = midgard_is_integer_op(ins->op);
81         unsigned base_size = max_bitsize_for_alu(ins);
82         unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
83         bool half = (sz == (base_size >> 1));
84 
85         return integer ?
86                 mir_get_imod(ins->src_shift[i], ins->src_types[i], half, scalar) :
87                 ((ins->src_abs[i] << 0) |
88                  ((ins->src_neg[i] << 1)));
89 }
90 
91 /* Midgard IR only knows vector ALU types, but we sometimes need to actually
92  * use scalar ALU instructions, for functional or performance reasons. To do
93  * this, we just demote vector ALU payloads to scalar. */
94 
95 static int
component_from_mask(unsigned mask)96 component_from_mask(unsigned mask)
97 {
98         for (int c = 0; c < 8; ++c) {
99                 if (mask & (1 << c))
100                         return c;
101         }
102 
103         assert(0);
104         return 0;
105 }
106 
107 static unsigned
mir_pack_scalar_source(unsigned mod,bool is_full,unsigned component)108 mir_pack_scalar_source(unsigned mod, bool is_full, unsigned component)
109 {
110         midgard_scalar_alu_src s = {
111                 .mod = mod,
112                 .full = is_full,
113                 .component = component << (is_full ? 1 : 0)
114         };
115 
116         unsigned o;
117         memcpy(&o, &s, sizeof(s));
118 
119         return o & ((1 << 6) - 1);
120 }
121 
122 static midgard_scalar_alu
vector_to_scalar_alu(midgard_vector_alu v,midgard_instruction * ins)123 vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins)
124 {
125         bool is_full = nir_alu_type_get_type_size(ins->dest_type) == 32;
126 
127         bool half_0 = nir_alu_type_get_type_size(ins->src_types[0]) == 16;
128         bool half_1 = nir_alu_type_get_type_size(ins->src_types[1]) == 16;
129         unsigned comp = component_from_mask(ins->mask);
130 
131         unsigned packed_src[2] = {
132                 mir_pack_scalar_source(mir_pack_mod(ins, 0, true), !half_0, ins->swizzle[0][comp]),
133                 mir_pack_scalar_source(mir_pack_mod(ins, 1, true), !half_1, ins->swizzle[1][comp])
134         };
135 
136         /* The output component is from the mask */
137         midgard_scalar_alu s = {
138                 .op = v.op,
139                 .src1 = packed_src[0],
140                 .src2 = packed_src[1],
141                 .outmod = v.outmod,
142                 .output_full = is_full,
143                 .output_component = comp
144         };
145 
146         /* Full components are physically spaced out */
147         if (is_full) {
148                 assert(s.output_component < 4);
149                 s.output_component <<= 1;
150         }
151 
152         /* Inline constant is passed along rather than trying to extract it
153          * from v */
154 
155         if (ins->has_inline_constant) {
156                 uint16_t imm = 0;
157                 int lower_11 = ins->inline_constant & ((1 << 12) - 1);
158                 imm |= (lower_11 >> 9) & 3;
159                 imm |= (lower_11 >> 6) & 4;
160                 imm |= (lower_11 >> 2) & 0x38;
161                 imm |= (lower_11 & 63) << 6;
162 
163                 s.src2 = imm;
164         }
165 
166         return s;
167 }
168 
169 /* 64-bit swizzles are super easy since there are 2 components of 2 components
170  * in an 8-bit field ... lots of duplication to go around!
171  *
172  * Swizzles of 32-bit vectors accessed from 64-bit instructions are a little
173  * funny -- pack them *as if* they were native 64-bit, using rep_* flags to
174  * flag upper. For instance, xy would become 64-bit XY but that's just xyzw
175  * native. Likewise, zz would become 64-bit XX with rep* so it would be xyxy
176  * with rep. Pretty nifty, huh? */
177 
178 static unsigned
mir_pack_swizzle_64(unsigned * swizzle,unsigned max_component)179 mir_pack_swizzle_64(unsigned *swizzle, unsigned max_component)
180 {
181         unsigned packed = 0;
182 
183         for (unsigned i = 0; i < 2; ++i) {
184                 assert(swizzle[i] <= max_component);
185 
186                 unsigned a = (swizzle[i] & 1) ?
187                         (COMPONENT_W << 2) | COMPONENT_Z :
188                         (COMPONENT_Y << 2) | COMPONENT_X;
189 
190                 packed |= a << (i * 4);
191         }
192 
193         return packed;
194 }
195 
196 static void
mir_pack_mask_alu(midgard_instruction * ins,midgard_vector_alu * alu)197 mir_pack_mask_alu(midgard_instruction *ins, midgard_vector_alu *alu)
198 {
199         unsigned effective = ins->mask;
200 
201         /* If we have a destination override, we need to figure out whether to
202          * override to the lower or upper half, shifting the effective mask in
203          * the latter, so AAAA.... becomes AAAA */
204 
205         unsigned inst_size = max_bitsize_for_alu(ins);
206         signed upper_shift = mir_upper_override(ins, inst_size);
207 
208         if (upper_shift >= 0) {
209                 effective >>= upper_shift;
210                 alu->shrink_mode = upper_shift ?
211                         midgard_shrink_mode_upper :
212                         midgard_shrink_mode_lower;
213         } else {
214                 alu->shrink_mode = midgard_shrink_mode_none;
215         }
216 
217         if (inst_size == 32)
218                 alu->mask = expand_writemask(effective, 2);
219         else if (inst_size == 64)
220                 alu->mask = expand_writemask(effective, 1);
221         else
222                 alu->mask = effective;
223 }
224 
225 static unsigned
mir_pack_swizzle(unsigned mask,unsigned * swizzle,unsigned sz,unsigned base_size,bool op_channeled,midgard_src_expand_mode * expand_mode)226 mir_pack_swizzle(unsigned mask, unsigned *swizzle,
227                  unsigned sz, unsigned base_size,
228                  bool op_channeled, midgard_src_expand_mode *expand_mode)
229 {
230         unsigned packed = 0;
231 
232         *expand_mode = midgard_src_passthrough;
233 
234         midgard_reg_mode reg_mode = reg_mode_for_bitsize(base_size);
235 
236         if (reg_mode == midgard_reg_mode_64) {
237                 assert(sz == 64 || sz == 32);
238                 unsigned components = (sz == 32) ? 4 : 2;
239 
240                 packed = mir_pack_swizzle_64(swizzle, components);
241 
242                 if (sz == 32) {
243                         bool lo = swizzle[0] >= COMPONENT_Z;
244                         bool hi = swizzle[1] >= COMPONENT_Z;
245 
246                         assert(!(mask & ~0xf));
247                         assert(!(mask & 0x3) || !(mask & 0xc));
248 
249                         if (mask > 3)
250                                 mask >>= 2;
251 
252                         if (mask & 0x1) {
253                                 /* We can't mix halves... */
254                                 if (mask & 2)
255                                         assert(lo == hi);
256 
257                                 *expand_mode = lo ? midgard_src_expand_high :
258                                                     midgard_src_expand_low;
259                         } else {
260                                 *expand_mode = hi ? midgard_src_expand_high :
261                                                     midgard_src_expand_low;
262                         }
263                 } else if (sz < 32) {
264                         unreachable("Cannot encode 8/16 swizzle in 64-bit");
265                 }
266         } else {
267                 /* For 32-bit, swizzle packing is stupid-simple. For 16-bit,
268                  * the strategy is to check whether the nibble we're on is
269                  * upper or lower. We need all components to be on the same
270                  * "side"; that much is enforced by the ISA and should have
271                  * been lowered. TODO: 8-bit packing. TODO: vec8 */
272 
273                 unsigned first = mask ? ffs(mask) - 1 : 0;
274                 bool upper = swizzle[first] > 3;
275 
276                 if (upper && mask)
277                         assert(sz <= 16);
278 
279                 bool dest_up = !op_channeled && (first >= 4);
280 
281                 for (unsigned c = (dest_up ? 4 : 0); c < (dest_up ? 8 : 4); ++c) {
282                         unsigned v = swizzle[c];
283 
284                         ASSERTED bool t_upper = v > (sz == 8 ? 7 : 3);
285 
286                         /* Ensure we're doing something sane */
287 
288                         if (mask & (1 << c)) {
289                                 assert(t_upper == upper);
290                                 assert(v <= (sz == 8 ? 15 : 7));
291                         }
292 
293                         /* Use the non upper part */
294                         v &= 0x3;
295 
296                         packed |= v << (2 * (c % 4));
297                 }
298 
299 
300                 /* Replicate for now.. should really pick a side for
301                  * dot products */
302 
303                 if (reg_mode == midgard_reg_mode_16 && sz == 16) {
304                         *expand_mode = upper ? midgard_src_rep_high :
305                                                midgard_src_rep_low;
306                 } else if (reg_mode == midgard_reg_mode_16 && sz == 8) {
307                         if (base_size == 16) {
308                                 *expand_mode = upper ? midgard_src_expand_high :
309                                                        midgard_src_expand_low;
310                         } else if (upper) {
311                                 *expand_mode = midgard_src_swap;
312                         }
313                 } else if (reg_mode == midgard_reg_mode_32 && sz == 16) {
314                         *expand_mode = upper ? midgard_src_expand_high :
315                                                midgard_src_expand_low;
316                 } else if (reg_mode == midgard_reg_mode_8) {
317                         unreachable("Unhandled reg mode");
318                 }
319         }
320 
321         return packed;
322 }
323 
324 static void
mir_pack_vector_srcs(midgard_instruction * ins,midgard_vector_alu * alu)325 mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
326 {
327         bool channeled = GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props);
328 
329         unsigned base_size = max_bitsize_for_alu(ins);
330 
331         for (unsigned i = 0; i < 2; ++i) {
332                 if (ins->has_inline_constant && (i == 1))
333                         continue;
334 
335                 if (ins->src[i] == ~0)
336                         continue;
337 
338                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
339                 assert((sz == base_size) || (sz == base_size / 2));
340 
341                 /* Promote 8bit moves to 16bit ones so we can support any swizzles. */
342                 if (sz == 8 && base_size == 8 && ins->op == midgard_alu_op_imov) {
343                         ins->outmod = midgard_outmod_keeplo;
344                         base_size = 16;
345                 }
346 
347                 midgard_src_expand_mode expand_mode = midgard_src_passthrough;
348                 unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i],
349                                                     sz, base_size, channeled,
350                                                     &expand_mode);
351 
352                 midgard_vector_alu_src pack = {
353                         .mod = mir_pack_mod(ins, i, false),
354                         .expand_mode = expand_mode,
355                         .swizzle = swizzle
356                 };
357 
358                 unsigned p = vector_alu_srco_unsigned(pack);
359 
360                 if (i == 0)
361                         alu->src1 = p;
362                 else
363                         alu->src2 = p;
364         }
365 }
366 
367 static void
mir_pack_swizzle_ldst(midgard_instruction * ins)368 mir_pack_swizzle_ldst(midgard_instruction *ins)
369 {
370         unsigned compsz = OP_IS_STORE(ins->op) ?
371                           nir_alu_type_get_type_size(ins->src_types[0]) :
372                           nir_alu_type_get_type_size(ins->dest_type);
373         unsigned maxcomps = 128 / compsz;
374         unsigned step = DIV_ROUND_UP(32, compsz);
375 
376         for (unsigned c = 0; c < maxcomps; c += step) {
377                 unsigned v = ins->swizzle[0][c];
378 
379                 /* Make sure the component index doesn't exceed the maximum
380                  * number of components. */
381                 assert(v <= maxcomps);
382 
383                 if (compsz <= 32)
384                         ins->load_store.swizzle |= (v / step) << (2 * (c / step));
385                 else
386                         ins->load_store.swizzle |= ((v / step) << (4 * c)) |
387                                                    (((v / step) + 1) << ((4 * c) + 2));
388         }
389 
390         /* TODO: arg_1/2 */
391 }
392 
393 static void
mir_pack_swizzle_tex(midgard_instruction * ins)394 mir_pack_swizzle_tex(midgard_instruction *ins)
395 {
396         for (unsigned i = 0; i < 2; ++i) {
397                 unsigned packed = 0;
398 
399                 for (unsigned c = 0; c < 4; ++c) {
400                         unsigned v = ins->swizzle[i][c];
401 
402                         /* Check vec4 */
403                         assert(v <= 3);
404 
405                         packed |= v << (2 * c);
406                 }
407 
408                 if (i == 0)
409                         ins->texture.swizzle = packed;
410                 else
411                         ins->texture.in_reg_swizzle = packed;
412         }
413 
414         /* TODO: bias component */
415 }
416 
417 /* Up to 3 { ALU, LDST } bundles can execute in parallel with a texture op.
418  * Given a texture op, lookahead to see how many such bundles we can flag for
419  * OoO execution */
420 
421 static bool
mir_can_run_ooo(midgard_block * block,midgard_bundle * bundle,unsigned dependency)422 mir_can_run_ooo(midgard_block *block, midgard_bundle *bundle,
423                 unsigned dependency)
424 {
425         /* Don't read out of bounds */
426         if (bundle >= (midgard_bundle *) ((char *) block->bundles.data + block->bundles.size))
427                 return false;
428 
429         /* Texture ops can't execute with other texture ops */
430         if (!IS_ALU(bundle->tag) && bundle->tag != TAG_LOAD_STORE_4)
431                 return false;
432 
433         /* Ensure there is no read-after-write dependency */
434 
435         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
436                 midgard_instruction *ins = bundle->instructions[i];
437 
438                 mir_foreach_src(ins, s) {
439                         if (ins->src[s] == dependency)
440                                 return false;
441                 }
442         }
443 
444         /* Otherwise, we're okay */
445         return true;
446 }
447 
448 static void
mir_pack_tex_ooo(midgard_block * block,midgard_bundle * bundle,midgard_instruction * ins)449 mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instruction *ins)
450 {
451         unsigned count = 0;
452 
453         for (count = 0; count < 3; ++count) {
454                 if (!mir_can_run_ooo(block, bundle + count + 1, ins->dest))
455                         break;
456         }
457 
458         ins->texture.out_of_order = count;
459 }
460 
461 /* Load store masks are 4-bits. Load/store ops pack for that.
462  * For most operations, vec4 is the natural mask width; vec8 is constrained to
463  * be in pairs, vec2 is duplicated. TODO: 8-bit?
464  * For common stores (i.e. ST.*), each bit masks a single byte in the 32-bit
465  * case, 2 bytes in the 64-bit case and 4 bytes in the 128-bit case.
466  */
467 
468 static unsigned
midgard_pack_common_store_mask(midgard_instruction * ins)469 midgard_pack_common_store_mask(midgard_instruction *ins) {
470         ASSERTED unsigned comp_sz = nir_alu_type_get_type_size(ins->src_types[0]);
471         unsigned bytemask = mir_bytemask(ins);
472         unsigned packed = 0;
473 
474         switch (ins->op) {
475         case midgard_op_st_u8:
476                 return mir_bytemask(ins) & 1;
477         case midgard_op_st_u16:
478                 return mir_bytemask(ins) & 3;
479         case midgard_op_st_32:
480                 return mir_bytemask(ins);
481         case midgard_op_st_64:
482                 assert(comp_sz >= 16);
483                 for (unsigned i = 0; i < 4; i++) {
484                         if (bytemask & (3 << (i * 2)))
485                                 packed |= 1 << i;
486                 }
487                 return packed;
488         case midgard_op_st_128:
489                 assert(comp_sz >= 32);
490                 for (unsigned i = 0; i < 4; i++) {
491                         if (bytemask & (0xf << (i * 4)))
492                                 packed |= 1 << i;
493                 }
494                 return packed;
495         default:
496                 unreachable("unexpected ldst opcode");
497         }
498 }
499 
500 static void
mir_pack_ldst_mask(midgard_instruction * ins)501 mir_pack_ldst_mask(midgard_instruction *ins)
502 {
503         unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
504         unsigned packed = ins->mask;
505 
506         if (OP_IS_COMMON_STORE(ins->op)) {
507                 packed = midgard_pack_common_store_mask(ins);
508         } else {
509                 if (sz == 64) {
510                         packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
511                                 ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
512                 } else if (sz < 32) {
513                         unsigned comps_per_32b = 32 / sz;
514 
515                         packed = 0;
516 
517                         for (unsigned i = 0; i < 4; ++i) {
518                                 unsigned submask = (ins->mask >> (i * comps_per_32b)) &
519                                                    BITFIELD_MASK(comps_per_32b);
520 
521                                 /* Make sure we're duplicated */
522                                 assert(submask == 0 || submask == BITFIELD_MASK(comps_per_32b));
523                                 packed |= (submask != 0) << i;
524                         }
525                 } else {
526                         assert(sz == 32);
527                 }
528         }
529 
530         ins->load_store.mask = packed;
531 }
532 
533 static void
mir_lower_inverts(midgard_instruction * ins)534 mir_lower_inverts(midgard_instruction *ins)
535 {
536         bool inv[3] = {
537                 ins->src_invert[0],
538                 ins->src_invert[1],
539                 ins->src_invert[2]
540         };
541 
542         switch (ins->op) {
543         case midgard_alu_op_iand:
544                 /* a & ~b = iandnot(a, b) */
545                 /* ~a & ~b = ~(a | b) = inor(a, b) */
546 
547                 if (inv[0] && inv[1])
548                         ins->op = midgard_alu_op_inor;
549                 else if (inv[1])
550                         ins->op = midgard_alu_op_iandnot;
551 
552                 break;
553         case midgard_alu_op_ior:
554                 /*  a | ~b = iornot(a, b) */
555                 /* ~a | ~b = ~(a & b) = inand(a, b) */
556 
557                 if (inv[0] && inv[1])
558                         ins->op = midgard_alu_op_inand;
559                 else if (inv[1])
560                         ins->op = midgard_alu_op_iornot;
561 
562                 break;
563 
564         case midgard_alu_op_ixor:
565                 /* ~a ^ b = a ^ ~b = ~(a ^ b) = inxor(a, b) */
566                 /* ~a ^ ~b = a ^ b */
567 
568                 if (inv[0] ^ inv[1])
569                         ins->op = midgard_alu_op_inxor;
570 
571                 break;
572 
573         default:
574                 break;
575         }
576 }
577 
578 /* Opcodes with ROUNDS are the base (rte/0) type so we can just add */
579 
580 static void
mir_lower_roundmode(midgard_instruction * ins)581 mir_lower_roundmode(midgard_instruction *ins)
582 {
583         if (alu_opcode_props[ins->op].props & MIDGARD_ROUNDS) {
584                 assert(ins->roundmode <= 0x3);
585                 ins->op += ins->roundmode;
586         }
587 }
588 
589 static midgard_load_store_word
load_store_from_instr(midgard_instruction * ins)590 load_store_from_instr(midgard_instruction *ins)
591 {
592         midgard_load_store_word ldst = ins->load_store;
593         ldst.op = ins->op;
594 
595         if (OP_IS_STORE(ldst.op)) {
596                 ldst.reg = SSA_REG_FROM_FIXED(ins->src[0]) & 1;
597         } else {
598                 ldst.reg = SSA_REG_FROM_FIXED(ins->dest);
599         }
600 
601         /* Atomic opcode swizzles have a special meaning:
602          *   - The first two bits say which component of the implicit register should be used
603          *   - The next two bits say if the implicit register is r26 or r27 */
604         if (OP_IS_ATOMIC(ins->op)) {
605                 ldst.swizzle = 0;
606                 ldst.swizzle |= ins->swizzle[3][0] & 3;
607                 ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2;
608         }
609 
610         if (ins->src[1] != ~0) {
611                 ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
612                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
613                 ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
614         }
615 
616         if (ins->src[2] != ~0) {
617                 ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
618                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
619                 ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
620         }
621 
622         return ldst;
623 }
624 
625 static midgard_texture_word
texture_word_from_instr(midgard_instruction * ins)626 texture_word_from_instr(midgard_instruction *ins)
627 {
628         midgard_texture_word tex = ins->texture;
629         tex.op = ins->op;
630 
631         unsigned src1 = ins->src[1] == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->src[1]);
632         tex.in_reg_select = src1 & 1;
633 
634         unsigned dest = ins->dest == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->dest);
635         tex.out_reg_select = dest & 1;
636 
637         if (ins->src[2] != ~0) {
638                 midgard_tex_register_select sel = {
639                         .select = SSA_REG_FROM_FIXED(ins->src[2]) & 1,
640                         .full = 1,
641                         .component = ins->swizzle[2][0]
642                 };
643                 uint8_t packed;
644                 memcpy(&packed, &sel, sizeof(packed));
645                 tex.bias = packed;
646         }
647 
648         if (ins->src[3] != ~0) {
649                 unsigned x = ins->swizzle[3][0];
650                 unsigned y = x + 1;
651                 unsigned z = x + 2;
652 
653                 /* Check range, TODO: half-registers */
654                 assert(z < 4);
655 
656                 unsigned offset_reg = SSA_REG_FROM_FIXED(ins->src[3]);
657                 tex.offset =
658                         (1)                   | /* full */
659                         (offset_reg & 1) << 1 | /* select */
660                         (0 << 2)              | /* upper */
661                         (x << 3)              | /* swizzle */
662                         (y << 5)              | /* swizzle */
663                         (z << 7);               /* swizzle */
664         }
665 
666         return tex;
667 }
668 
669 static midgard_vector_alu
vector_alu_from_instr(midgard_instruction * ins)670 vector_alu_from_instr(midgard_instruction *ins)
671 {
672         midgard_vector_alu alu = {
673                 .op = ins->op,
674                 .outmod = ins->outmod,
675                 .reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins))
676         };
677 
678         if (ins->has_inline_constant) {
679                 /* Encode inline 16-bit constant. See disassembler for
680                  * where the algorithm is from */
681 
682                 int lower_11 = ins->inline_constant & ((1 << 12) - 1);
683                 uint16_t imm = ((lower_11 >> 8) & 0x7) |
684                                ((lower_11 & 0xFF) << 3);
685 
686                 alu.src2 = imm << 2;
687         }
688 
689         return alu;
690 }
691 
692 static midgard_branch_extended
midgard_create_branch_extended(midgard_condition cond,midgard_jmp_writeout_op op,unsigned dest_tag,signed quadword_offset)693 midgard_create_branch_extended( midgard_condition cond,
694                                 midgard_jmp_writeout_op op,
695                                 unsigned dest_tag,
696                                 signed quadword_offset)
697 {
698         /* The condition code is actually a LUT describing a function to
699          * combine multiple condition codes. However, we only support a single
700          * condition code at the moment, so we just duplicate over a bunch of
701          * times. */
702 
703         uint16_t duplicated_cond =
704                 (cond << 14) |
705                 (cond << 12) |
706                 (cond << 10) |
707                 (cond << 8) |
708                 (cond << 6) |
709                 (cond << 4) |
710                 (cond << 2) |
711                 (cond << 0);
712 
713         midgard_branch_extended branch = {
714                 .op = op,
715                 .dest_tag = dest_tag,
716                 .offset = quadword_offset,
717                 .cond = duplicated_cond
718         };
719 
720         return branch;
721 }
722 
723 static void
emit_branch(midgard_instruction * ins,compiler_context * ctx,midgard_block * block,midgard_bundle * bundle,struct util_dynarray * emission)724 emit_branch(midgard_instruction *ins,
725             compiler_context *ctx,
726             midgard_block *block,
727             midgard_bundle *bundle,
728             struct util_dynarray *emission)
729 {
730         /* Parse some basic branch info */
731         bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT;
732         bool is_conditional = ins->branch.conditional;
733         bool is_inverted = ins->branch.invert_conditional;
734         bool is_discard = ins->branch.target_type == TARGET_DISCARD;
735         bool is_tilebuf_wait = ins->branch.target_type == TARGET_TILEBUF_WAIT;
736         bool is_special = is_discard || is_tilebuf_wait;
737         bool is_writeout = ins->writeout;
738 
739         /* Determine the block we're jumping to */
740         int target_number = ins->branch.target_block;
741 
742         /* Report the destination tag */
743         int dest_tag = is_discard ? 0 :
744                 is_tilebuf_wait ? bundle->tag :
745                 midgard_get_first_tag_from_block(ctx, target_number);
746 
747         /* Count up the number of quadwords we're
748          * jumping over = number of quadwords until
749          * (br_block_idx, target_number) */
750 
751         int quadword_offset = 0;
752 
753         if (is_discard) {
754                 /* Fixed encoding, not actually an offset */
755                 quadword_offset = 0x2;
756         } else if (is_tilebuf_wait) {
757                 quadword_offset = -1;
758         } else if (target_number > block->base.name) {
759                 /* Jump forward */
760 
761                 for (int idx = block->base.name+1; idx < target_number; ++idx) {
762                         midgard_block *blk = mir_get_block(ctx, idx);
763                         assert(blk);
764 
765                         quadword_offset += blk->quadword_count;
766                 }
767         } else {
768                 /* Jump backwards */
769 
770                 for (int idx = block->base.name; idx >= target_number; --idx) {
771                         midgard_block *blk = mir_get_block(ctx, idx);
772                         assert(blk);
773 
774                         quadword_offset -= blk->quadword_count;
775                 }
776         }
777 
778         /* Unconditional extended branches (far jumps)
779          * have issues, so we always use a conditional
780          * branch, setting the condition to always for
781          * unconditional. For compact unconditional
782          * branches, cond isn't used so it doesn't
783          * matter what we pick. */
784 
785         midgard_condition cond =
786                 !is_conditional ? midgard_condition_always :
787                 is_inverted ? midgard_condition_false :
788                 midgard_condition_true;
789 
790         midgard_jmp_writeout_op op =
791                 is_discard ? midgard_jmp_writeout_op_discard :
792                 is_tilebuf_wait ? midgard_jmp_writeout_op_tilebuffer_pending :
793                 is_writeout ? midgard_jmp_writeout_op_writeout :
794                 (is_compact && !is_conditional) ?
795                 midgard_jmp_writeout_op_branch_uncond :
796                 midgard_jmp_writeout_op_branch_cond;
797 
798         if (is_compact) {
799                 unsigned size = sizeof(midgard_branch_cond);
800 
801                 if (is_conditional || is_special) {
802                         midgard_branch_cond branch = {
803                                 .op = op,
804                                 .dest_tag = dest_tag,
805                                 .offset = quadword_offset,
806                                 .cond = cond
807                         };
808                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
809                 } else {
810                         assert(op == midgard_jmp_writeout_op_branch_uncond);
811                         midgard_branch_uncond branch = {
812                                 .op = op,
813                                 .dest_tag = dest_tag,
814                                 .offset = quadword_offset,
815                                 .call_mode = midgard_call_mode_default
816                         };
817                         assert(branch.offset == quadword_offset);
818                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
819                 }
820         } else { /* `ins->compact_branch`,  misnomer */
821                 unsigned size = sizeof(midgard_branch_extended);
822 
823                 midgard_branch_extended branch =
824                         midgard_create_branch_extended(
825                                         cond, op,
826                                         dest_tag,
827                                         quadword_offset);
828 
829                 memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
830         }
831 }
832 
833 static void
emit_alu_bundle(compiler_context * ctx,midgard_block * block,midgard_bundle * bundle,struct util_dynarray * emission,unsigned lookahead)834 emit_alu_bundle(compiler_context *ctx,
835                 midgard_block *block,
836                 midgard_bundle *bundle,
837                 struct util_dynarray *emission,
838                 unsigned lookahead)
839 {
840         /* Emit the control word */
841         util_dynarray_append(emission, uint32_t, bundle->control | lookahead);
842 
843         /* Next up, emit register words */
844         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
845                 midgard_instruction *ins = bundle->instructions[i];
846 
847                 /* Check if this instruction has registers */
848                 if (ins->compact_branch) continue;
849 
850                 unsigned src2_reg = REGISTER_UNUSED;
851                 if (ins->has_inline_constant)
852                         src2_reg = ins->inline_constant >> 11;
853                 else if (ins->src[1] != ~0)
854                         src2_reg = SSA_REG_FROM_FIXED(ins->src[1]);
855 
856                 /* Otherwise, just emit the registers */
857                 uint16_t reg_word = 0;
858                 midgard_reg_info registers = {
859                         .src1_reg = (ins->src[0] == ~0 ?
860                                         REGISTER_UNUSED :
861                                         SSA_REG_FROM_FIXED(ins->src[0])),
862                         .src2_reg = src2_reg,
863                         .src2_imm = ins->has_inline_constant,
864                         .out_reg = (ins->dest == ~0 ?
865                                         REGISTER_UNUSED :
866                                         SSA_REG_FROM_FIXED(ins->dest)),
867                 };
868                 memcpy(&reg_word, &registers, sizeof(uint16_t));
869                 util_dynarray_append(emission, uint16_t, reg_word);
870         }
871 
872         /* Now, we emit the body itself */
873         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
874                 midgard_instruction *ins = bundle->instructions[i];
875 
876                 if (!ins->compact_branch) {
877                         mir_lower_inverts(ins);
878                         mir_lower_roundmode(ins);
879                 }
880 
881                 if (midgard_is_branch_unit(ins->unit)) {
882                         emit_branch(ins, ctx, block, bundle, emission);
883                 } else if (ins->unit & UNITS_ANY_VECTOR) {
884                         midgard_vector_alu source = vector_alu_from_instr(ins);
885                         mir_pack_mask_alu(ins, &source);
886                         mir_pack_vector_srcs(ins, &source);
887                         unsigned size = sizeof(source);
888                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
889                 } else {
890                         midgard_scalar_alu source = vector_to_scalar_alu(vector_alu_from_instr(ins), ins);
891                         unsigned size = sizeof(source);
892                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
893                 }
894         }
895 
896         /* Emit padding (all zero) */
897         if (bundle->padding) {
898                 memset(util_dynarray_grow_bytes(emission, bundle->padding, 1),
899                                 0, bundle->padding);
900         }
901 
902         /* Tack on constants */
903 
904         if (bundle->has_embedded_constants)
905                 util_dynarray_append(emission, midgard_constants, bundle->constants);
906 }
907 
908 /* Shift applied to the immediate used as an offset. Probably this is papering
909  * over some other semantic distinction else well, but it unifies things in the
910  * compiler so I don't mind. */
911 
912 static void
mir_ldst_pack_offset(midgard_instruction * ins,int offset)913 mir_ldst_pack_offset(midgard_instruction *ins, int offset)
914 {
915         /* These opcodes don't support offsets */
916         assert(!OP_IS_REG2REG_LDST(ins->op) ||
917                ins->op == midgard_op_lea    ||
918                ins->op == midgard_op_lea_image);
919 
920         if (OP_IS_UBO_READ(ins->op))
921                 ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
922         else if (OP_IS_IMAGE(ins->op))
923                 ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
924         else if (OP_IS_SPECIAL(ins->op))
925                 ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
926         else
927                 ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
928 }
929 
930 static enum mali_sampler_type
midgard_sampler_type(nir_alu_type t)931 midgard_sampler_type(nir_alu_type t) {
932         switch (nir_alu_type_get_base_type(t))
933         {
934         case nir_type_float:
935                 return MALI_SAMPLER_FLOAT;
936         case nir_type_int:
937                 return MALI_SAMPLER_SIGNED;
938         case nir_type_uint:
939                 return MALI_SAMPLER_UNSIGNED;
940         default:
941                 unreachable("Unknown sampler type");
942         }
943 }
944 
945 /* After everything is scheduled, emit whole bundles at a time */
946 
947 void
emit_binary_bundle(compiler_context * ctx,midgard_block * block,midgard_bundle * bundle,struct util_dynarray * emission,int next_tag)948 emit_binary_bundle(compiler_context *ctx,
949                    midgard_block *block,
950                    midgard_bundle *bundle,
951                    struct util_dynarray *emission,
952                    int next_tag)
953 {
954         int lookahead = next_tag << 4;
955 
956         switch (bundle->tag) {
957         case TAG_ALU_4:
958         case TAG_ALU_8:
959         case TAG_ALU_12:
960         case TAG_ALU_16:
961         case TAG_ALU_4 + 4:
962         case TAG_ALU_8 + 4:
963         case TAG_ALU_12 + 4:
964         case TAG_ALU_16 + 4:
965                 emit_alu_bundle(ctx, block, bundle, emission, lookahead);
966                 break;
967 
968         case TAG_LOAD_STORE_4: {
969                 /* One or two composing instructions */
970 
971                 uint64_t current64, next64 = LDST_NOP;
972 
973                 /* Copy masks */
974 
975                 for (unsigned i = 0; i < bundle->instruction_count; ++i) {
976                         midgard_instruction *ins = bundle->instructions[i];
977                         mir_pack_ldst_mask(ins);
978 
979                         /* Atomic ops don't use this swizzle the same way as other ops */
980                         if (!OP_IS_ATOMIC(ins->op))
981                                 mir_pack_swizzle_ldst(ins);
982 
983                         /* Apply a constant offset */
984                         unsigned offset = ins->constants.u32[0];
985                         if (offset)
986                                 mir_ldst_pack_offset(ins, offset);
987                 }
988 
989                 midgard_load_store_word ldst0 =
990                         load_store_from_instr(bundle->instructions[0]);
991                 memcpy(&current64, &ldst0, sizeof(current64));
992 
993                 if (bundle->instruction_count == 2) {
994                         midgard_load_store_word ldst1 =
995                                 load_store_from_instr(bundle->instructions[1]);
996                         memcpy(&next64, &ldst1, sizeof(next64));
997                 }
998 
999                 midgard_load_store instruction = {
1000                         .type = bundle->tag,
1001                         .next_type = next_tag,
1002                         .word1 = current64,
1003                         .word2 = next64
1004                 };
1005 
1006                 util_dynarray_append(emission, midgard_load_store, instruction);
1007 
1008                 break;
1009         }
1010 
1011         case TAG_TEXTURE_4:
1012         case TAG_TEXTURE_4_VTX:
1013         case TAG_TEXTURE_4_BARRIER: {
1014                 /* Texture instructions are easy, since there is no pipelining
1015                  * nor VLIW to worry about. We may need to set .cont/.last
1016                  * flags. */
1017 
1018                 midgard_instruction *ins = bundle->instructions[0];
1019 
1020                 ins->texture.type = bundle->tag;
1021                 ins->texture.next_type = next_tag;
1022                 ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_NONE; /* default */
1023 
1024                 /* Nothing else to pack for barriers */
1025                 if (ins->op == midgard_tex_op_barrier) {
1026                         ins->texture.op = ins->op;
1027                         util_dynarray_append(emission, midgard_texture_word, ins->texture);
1028                         return;
1029                 }
1030 
1031                 signed override = mir_upper_override(ins, 32);
1032 
1033                 ins->texture.mask = override > 0 ?
1034                         ins->mask >> override :
1035                         ins->mask;
1036 
1037                 mir_pack_swizzle_tex(ins);
1038 
1039                 if (!(ctx->quirks & MIDGARD_NO_OOO))
1040                         mir_pack_tex_ooo(block, bundle, ins);
1041 
1042                 unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
1043                 unsigned isz = nir_alu_type_get_type_size(ins->src_types[1]);
1044 
1045                 assert(osz == 32 || osz == 16);
1046                 assert(isz == 32 || isz == 16);
1047 
1048                 ins->texture.out_full = (osz == 32);
1049                 ins->texture.out_upper = override > 0;
1050                 ins->texture.in_reg_full = (isz == 32);
1051                 ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
1052                 ins->texture.outmod = ins->outmod;
1053 
1054                 if (mir_op_computes_derivatives(ctx->stage, ins->op)) {
1055                         if (ins->helper_terminate)
1056                                 ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_KILL;
1057                         else if (!ins->helper_execute)
1058                                 ins->texture.exec = MIDGARD_PARTIAL_EXECUTION_SKIP;
1059                 }
1060 
1061                 midgard_texture_word texture = texture_word_from_instr(ins);
1062                 util_dynarray_append(emission, midgard_texture_word, texture);
1063                 break;
1064         }
1065 
1066         default:
1067                 unreachable("Unknown midgard instruction type\n");
1068         }
1069 }
1070