1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/ralloc.h"
26 #include "util/half_float.h"
27 #include "util/bitscan.h"
28 
29 #include "ppir.h"
30 #include "codegen.h"
31 #include "lima_context.h"
32 
encode_swizzle(uint8_t * swizzle,int shift,int dest_shift)33 static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34 {
35    unsigned ret = 0;
36    for (int i = 0; i < 4; i++)
37       ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38    return ret;
39 }
40 
get_scl_reg_index(ppir_src * src,int component)41 static int get_scl_reg_index(ppir_src *src, int component)
42 {
43    int ret = ppir_target_get_src_reg_index(src);
44    ret += src->swizzle[component];
45    return ret;
46 }
47 
ppir_codegen_encode_varying(ppir_node * node,void * code)48 static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49 {
50    ppir_codegen_field_varying *f = code;
51    ppir_load_node *load = ppir_node_to_load(node);
52    ppir_dest *dest = &load->dest;
53    int index = ppir_target_get_dest_reg_index(dest);
54    int num_components = load->num_components;
55 
56    if (node->op != ppir_op_load_coords_reg) {
57       assert(node->op == ppir_op_load_varying ||
58              node->op == ppir_op_load_coords ||
59              node->op == ppir_op_load_fragcoord ||
60              node->op == ppir_op_load_pointcoord ||
61              node->op == ppir_op_load_frontface);
62 
63       f->imm.dest = index >> 2;
64       f->imm.mask = dest->write_mask << (index & 0x3);
65 
66       int alignment = num_components == 3 ? 3 : num_components - 1;
67       f->imm.alignment = alignment;
68 
69       if (load->num_src) {
70          index = ppir_target_get_src_reg_index(&load->src);
71          f->imm.offset_vector = index >> 2;
72          f->imm.offset_scalar = index & 0x3;
73       } else
74          f->imm.offset_vector = 0xf;
75 
76       if (alignment == 3)
77          f->imm.index = load->index >> 2;
78       else
79          f->imm.index = load->index >> alignment;
80 
81       switch (node->op) {
82          case ppir_op_load_fragcoord:
83             f->imm.source_type = 2;
84             f->imm.perspective = 3;
85             break;
86          case ppir_op_load_pointcoord:
87             f->imm.source_type = 3;
88             break;
89          case ppir_op_load_frontface:
90             f->imm.source_type = 3;
91             f->imm.perspective = 1;
92             break;
93          case ppir_op_load_coords:
94             /* num_components == 3 implies cubemap as we don't support 3D textures */
95             f->imm.source_type = num_components == 3 ? 2 : 0;
96             break;
97          default:
98             break;
99       }
100    }
101    else {  /* node->op == ppir_op_load_coords_reg */
102       f->reg.dest = index >> 2;
103       f->reg.mask = dest->write_mask << (index & 0x3);
104 
105       if (load->num_src) {
106          /* num_components == 3 implies cubemap as we don't support 3D textures */
107          if (num_components == 3) {
108             f->reg.source_type = 2;
109             f->reg.perspective = 1;
110          } else {
111             f->reg.source_type = 1;
112          }
113          ppir_src *src = &load->src;
114          index = ppir_target_get_src_reg_index(src);
115          f->reg.source = index >> 2;
116          f->reg.negate = src->negate;
117          f->reg.absolute = src->absolute;
118          f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
119       }
120    }
121 }
122 
ppir_codegen_encode_texld(ppir_node * node,void * code)123 static void ppir_codegen_encode_texld(ppir_node *node, void *code)
124 {
125    ppir_codegen_field_sampler *f = code;
126    ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
127 
128    f->index = ldtex->sampler;
129 
130    f->lod_bias_en = ldtex->lod_bias_en;
131    f->explicit_lod = ldtex->explicit_lod;
132    if (ldtex->lod_bias_en)
133       f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
134 
135    switch (ldtex->sampler_dim) {
136    case GLSL_SAMPLER_DIM_2D:
137    case GLSL_SAMPLER_DIM_RECT:
138    case GLSL_SAMPLER_DIM_EXTERNAL:
139       f->type = ppir_codegen_sampler_type_2d;
140       break;
141    case GLSL_SAMPLER_DIM_CUBE:
142       f->type = ppir_codegen_sampler_type_cube;
143       break;
144    default:
145       break;
146    }
147 
148    f->offset_en = 0;
149    f->unknown_2 = 0x39001;
150 }
151 
ppir_codegen_encode_uniform(ppir_node * node,void * code)152 static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
153 {
154    ppir_codegen_field_uniform *f = code;
155    ppir_load_node *load = ppir_node_to_load(node);
156 
157    switch (node->op) {
158       case ppir_op_load_uniform:
159          f->source = ppir_codegen_uniform_src_uniform;
160          break;
161       case ppir_op_load_temp:
162          f->source = ppir_codegen_uniform_src_temporary;
163          break;
164       default:
165          assert(0);
166    }
167 
168    /* Uniforms are always aligned to vec4 boundary */
169    f->alignment = 2;
170    f->index = load->index;
171 
172    if (load->num_src) {
173       f->offset_en = 1;
174       f->offset_reg = ppir_target_get_src_reg_index(&load->src);
175    }
176 }
177 
shift_to_op(int shift)178 static unsigned shift_to_op(int shift)
179 {
180    assert(shift >= -3 && shift <= 3);
181    return shift < 0 ? shift + 8 : shift;
182 }
183 
ppir_codegen_encode_vec_mul(ppir_node * node,void * code)184 static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
185 {
186    ppir_codegen_field_vec4_mul *f = code;
187    ppir_alu_node *alu = ppir_node_to_alu(node);
188 
189    ppir_dest *dest = &alu->dest;
190    int dest_shift = 0;
191    if (dest->type != ppir_target_pipeline) {
192       int index = ppir_target_get_dest_reg_index(dest);
193       dest_shift = index & 0x3;
194       f->dest = index >> 2;
195       f->mask = dest->write_mask << dest_shift;
196    }
197    f->dest_modifier = dest->modifier;
198 
199    switch (node->op) {
200    case ppir_op_mul:
201       f->op = shift_to_op(alu->shift);
202       break;
203    case ppir_op_mov:
204       f->op = ppir_codegen_vec4_mul_op_mov;
205       break;
206    case ppir_op_max:
207       f->op = ppir_codegen_vec4_mul_op_max;
208       break;
209    case ppir_op_min:
210       f->op = ppir_codegen_vec4_mul_op_min;
211       break;
212    case ppir_op_and:
213       f->op = ppir_codegen_vec4_mul_op_and;
214       break;
215    case ppir_op_or:
216       f->op = ppir_codegen_vec4_mul_op_or;
217       break;
218    case ppir_op_xor:
219       f->op = ppir_codegen_vec4_mul_op_xor;
220       break;
221    case ppir_op_gt:
222       f->op = ppir_codegen_vec4_mul_op_gt;
223       break;
224    case ppir_op_ge:
225       f->op = ppir_codegen_vec4_mul_op_ge;
226       break;
227    case ppir_op_eq:
228       f->op = ppir_codegen_vec4_mul_op_eq;
229       break;
230    case ppir_op_ne:
231       f->op = ppir_codegen_vec4_mul_op_ne;
232       break;
233    case ppir_op_not:
234       f->op = ppir_codegen_vec4_mul_op_not;
235       break;
236    default:
237       break;
238    }
239 
240    ppir_src *src = alu->src;
241    int index = ppir_target_get_src_reg_index(src);
242    f->arg0_source = index >> 2;
243    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
244    f->arg0_absolute = src->absolute;
245    f->arg0_negate = src->negate;
246 
247    if (alu->num_src == 2) {
248       src = alu->src + 1;
249       index = ppir_target_get_src_reg_index(src);
250       f->arg1_source = index >> 2;
251       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
252       f->arg1_absolute = src->absolute;
253       f->arg1_negate = src->negate;
254    }
255 }
256 
ppir_codegen_encode_scl_mul(ppir_node * node,void * code)257 static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
258 {
259    ppir_codegen_field_float_mul *f = code;
260    ppir_alu_node *alu = ppir_node_to_alu(node);
261 
262    ppir_dest *dest = &alu->dest;
263    int dest_component = ffs(dest->write_mask) - 1;
264    assert(dest_component >= 0);
265 
266    if (dest->type != ppir_target_pipeline) {
267       f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
268       f->output_en = true;
269    }
270    f->dest_modifier = dest->modifier;
271 
272    switch (node->op) {
273    case ppir_op_mul:
274       f->op = shift_to_op(alu->shift);
275       break;
276    case ppir_op_mov:
277       f->op = ppir_codegen_float_mul_op_mov;
278       break;
279    case ppir_op_max:
280       f->op = ppir_codegen_float_mul_op_max;
281       break;
282    case ppir_op_min:
283       f->op = ppir_codegen_float_mul_op_min;
284       break;
285    case ppir_op_and:
286       f->op = ppir_codegen_float_mul_op_and;
287       break;
288    case ppir_op_or:
289       f->op = ppir_codegen_float_mul_op_or;
290       break;
291    case ppir_op_xor:
292       f->op = ppir_codegen_float_mul_op_xor;
293       break;
294    case ppir_op_gt:
295       f->op = ppir_codegen_float_mul_op_gt;
296       break;
297    case ppir_op_ge:
298       f->op = ppir_codegen_float_mul_op_ge;
299       break;
300    case ppir_op_eq:
301       f->op = ppir_codegen_float_mul_op_eq;
302       break;
303    case ppir_op_ne:
304       f->op = ppir_codegen_float_mul_op_ne;
305       break;
306    case ppir_op_not:
307       f->op = ppir_codegen_float_mul_op_not;
308       break;
309    default:
310       break;
311    }
312 
313    ppir_src *src = alu->src;
314    f->arg0_source = get_scl_reg_index(src, dest_component);
315    f->arg0_absolute = src->absolute;
316    f->arg0_negate = src->negate;
317 
318    if (alu->num_src == 2) {
319       src = alu->src + 1;
320       f->arg1_source = get_scl_reg_index(src, dest_component);
321       f->arg1_absolute = src->absolute;
322       f->arg1_negate = src->negate;
323    }
324 }
325 
ppir_codegen_encode_vec_add(ppir_node * node,void * code)326 static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
327 {
328    ppir_codegen_field_vec4_acc *f = code;
329    ppir_alu_node *alu = ppir_node_to_alu(node);
330 
331    ppir_dest *dest = &alu->dest;
332    int index = ppir_target_get_dest_reg_index(dest);
333    int dest_shift = index & 0x3;
334    f->dest = index >> 2;
335    f->mask = dest->write_mask << dest_shift;
336    f->dest_modifier = dest->modifier;
337 
338    switch (node->op) {
339    case ppir_op_add:
340       f->op = ppir_codegen_vec4_acc_op_add;
341       break;
342    case ppir_op_mov:
343       f->op = ppir_codegen_vec4_acc_op_mov;
344       break;
345    case ppir_op_sum3:
346       f->op = ppir_codegen_vec4_acc_op_sum3;
347       dest_shift = 0;
348       break;
349    case ppir_op_sum4:
350       f->op = ppir_codegen_vec4_acc_op_sum4;
351       dest_shift = 0;
352       break;
353    case ppir_op_floor:
354       f->op = ppir_codegen_vec4_acc_op_floor;
355       break;
356    case ppir_op_ceil:
357       f->op = ppir_codegen_vec4_acc_op_ceil;
358       break;
359    case ppir_op_fract:
360       f->op = ppir_codegen_vec4_acc_op_fract;
361       break;
362    case ppir_op_gt:
363       f->op = ppir_codegen_vec4_acc_op_gt;
364       break;
365    case ppir_op_ge:
366       f->op = ppir_codegen_vec4_acc_op_ge;
367       break;
368    case ppir_op_eq:
369       f->op = ppir_codegen_vec4_acc_op_eq;
370       break;
371    case ppir_op_ne:
372       f->op = ppir_codegen_vec4_acc_op_ne;
373       break;
374    case ppir_op_select:
375       f->op = ppir_codegen_vec4_acc_op_sel;
376       break;
377    case ppir_op_max:
378       f->op = ppir_codegen_vec4_acc_op_max;
379       break;
380    case ppir_op_min:
381       f->op = ppir_codegen_vec4_acc_op_min;
382       break;
383    case ppir_op_ddx:
384       f->op = ppir_codegen_vec4_acc_op_dFdx;
385       break;
386    case ppir_op_ddy:
387       f->op = ppir_codegen_vec4_acc_op_dFdy;
388       break;
389    default:
390       break;
391    }
392 
393    ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
394    index = ppir_target_get_src_reg_index(src);
395 
396    if (src->type == ppir_target_pipeline &&
397        src->pipeline == ppir_pipeline_reg_vmul)
398       f->mul_in = true;
399    else
400       f->arg0_source = index >> 2;
401 
402    f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
403    f->arg0_absolute = src->absolute;
404    f->arg0_negate = src->negate;
405 
406    if (++src < alu->src + alu->num_src) {
407       index = ppir_target_get_src_reg_index(src);
408       f->arg1_source = index >> 2;
409       f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
410       f->arg1_absolute = src->absolute;
411       f->arg1_negate = src->negate;
412    }
413 }
414 
ppir_codegen_encode_scl_add(ppir_node * node,void * code)415 static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
416 {
417    ppir_codegen_field_float_acc *f = code;
418    ppir_alu_node *alu = ppir_node_to_alu(node);
419 
420    ppir_dest *dest = &alu->dest;
421    int dest_component = ffs(dest->write_mask) - 1;
422    assert(dest_component >= 0);
423 
424    f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
425    f->output_en = true;
426    f->dest_modifier = dest->modifier;
427 
428    switch (node->op) {
429    case ppir_op_add:
430       f->op = shift_to_op(alu->shift);
431       break;
432    case ppir_op_mov:
433       f->op = ppir_codegen_float_acc_op_mov;
434       break;
435    case ppir_op_max:
436       f->op = ppir_codegen_float_acc_op_max;
437       break;
438    case ppir_op_min:
439       f->op = ppir_codegen_float_acc_op_min;
440       break;
441    case ppir_op_floor:
442       f->op = ppir_codegen_float_acc_op_floor;
443       break;
444    case ppir_op_ceil:
445       f->op = ppir_codegen_float_acc_op_ceil;
446       break;
447    case ppir_op_fract:
448       f->op = ppir_codegen_float_acc_op_fract;
449       break;
450    case ppir_op_gt:
451       f->op = ppir_codegen_float_acc_op_gt;
452       break;
453    case ppir_op_ge:
454       f->op = ppir_codegen_float_acc_op_ge;
455       break;
456    case ppir_op_eq:
457       f->op = ppir_codegen_float_acc_op_eq;
458       break;
459    case ppir_op_ne:
460       f->op = ppir_codegen_float_acc_op_ne;
461       break;
462    case ppir_op_select:
463       f->op = ppir_codegen_float_acc_op_sel;
464       break;
465    case ppir_op_ddx:
466       f->op = ppir_codegen_float_acc_op_dFdx;
467       break;
468    case ppir_op_ddy:
469       f->op = ppir_codegen_float_acc_op_dFdy;
470       break;
471    default:
472       break;
473    }
474 
475    ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
476    if (src->type == ppir_target_pipeline &&
477        src->pipeline == ppir_pipeline_reg_fmul)
478       f->mul_in = true;
479    else
480       f->arg0_source = get_scl_reg_index(src, dest_component);
481    f->arg0_absolute = src->absolute;
482    f->arg0_negate = src->negate;
483 
484    if (++src < alu->src + alu->num_src) {
485       f->arg1_source = get_scl_reg_index(src, dest_component);
486       f->arg1_absolute = src->absolute;
487       f->arg1_negate = src->negate;
488    }
489 }
490 
ppir_codegen_encode_combine(ppir_node * node,void * code)491 static void ppir_codegen_encode_combine(ppir_node *node, void *code)
492 {
493    ppir_codegen_field_combine *f = code;
494    ppir_alu_node *alu = ppir_node_to_alu(node);
495 
496    switch (node->op) {
497    case ppir_op_rsqrt:
498    case ppir_op_log2:
499    case ppir_op_exp2:
500    case ppir_op_rcp:
501    case ppir_op_sqrt:
502    case ppir_op_sin:
503    case ppir_op_cos:
504    {
505       f->scalar.dest_vec = false;
506       f->scalar.arg1_en = false;
507 
508       ppir_dest *dest = &alu->dest;
509       int dest_component = ffs(dest->write_mask) - 1;
510       assert(dest_component >= 0);
511       f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
512       f->scalar.dest_modifier = dest->modifier;
513 
514       ppir_src *src = alu->src;
515       f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
516       f->scalar.arg0_absolute = src->absolute;
517       f->scalar.arg0_negate = src->negate;
518 
519       switch (node->op) {
520       case ppir_op_rsqrt:
521          f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
522          break;
523       case ppir_op_log2:
524          f->scalar.op = ppir_codegen_combine_scalar_op_log2;
525          break;
526       case ppir_op_exp2:
527          f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
528          break;
529       case ppir_op_rcp:
530          f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
531          break;
532       case ppir_op_sqrt:
533          f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
534          break;
535       case ppir_op_sin:
536          f->scalar.op = ppir_codegen_combine_scalar_op_sin;
537          break;
538       case ppir_op_cos:
539          f->scalar.op = ppir_codegen_combine_scalar_op_cos;
540          break;
541       default:
542          break;
543       }
544       break;
545    }
546    default:
547       break;
548    }
549 }
550 
ppir_codegen_encode_store_temp(ppir_node * node,void * code)551 static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
552 {
553    assert(node->op == ppir_op_store_temp);
554 
555    ppir_codegen_field_temp_write *f = code;
556    ppir_store_node *snode = ppir_node_to_store(node);
557    int num_components = snode->num_components;
558 
559    f->temp_write.dest = 0x03; // 11 - temporary
560    f->temp_write.source = snode->src.reg->index;
561 
562    int alignment = num_components == 4 ? 2 : num_components - 1;
563    f->temp_write.alignment = alignment;
564    f->temp_write.index = snode->index << (2 - alignment);
565 
566    f->temp_write.offset_reg = snode->index >> 2;
567 }
568 
ppir_codegen_encode_const(ppir_const * constant,uint16_t * code)569 static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
570 {
571    for (int i = 0; i < constant->num; i++)
572       code[i] = _mesa_float_to_half(constant->value[i].f);
573 }
574 
ppir_codegen_encode_discard(ppir_node * node,void * code)575 static void ppir_codegen_encode_discard(ppir_node *node, void *code)
576 {
577    ppir_codegen_field_branch *b = code;
578    assert(node->op == ppir_op_discard);
579 
580    b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
581    b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
582    b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
583 }
584 
ppir_codegen_encode_branch(ppir_node * node,void * code)585 static void ppir_codegen_encode_branch(ppir_node *node, void *code)
586 {
587    ppir_codegen_field_branch *b = code;
588    ppir_branch_node *branch;
589    ppir_instr *target_instr;
590    ppir_block *target;
591    if (node->op == ppir_op_discard) {
592       ppir_codegen_encode_discard(node, code);
593       return;
594    }
595 
596    assert(node->op == ppir_op_branch);
597    branch = ppir_node_to_branch(node);
598 
599    b->branch.unknown_0 = 0x0;
600    b->branch.unknown_1 = 0x0;
601 
602    if (branch->num_src == 2) {
603       b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
604       b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
605       b->branch.cond_gt = branch->cond_gt;
606       b->branch.cond_eq = branch->cond_eq;
607       b->branch.cond_lt = branch->cond_lt;
608    } else if (branch->num_src == 0) {
609       /* Unconditional branch */
610       b->branch.arg0_source = 0;
611       b->branch.arg1_source = 0;
612       b->branch.cond_gt = true;
613       b->branch.cond_eq = true;
614       b->branch.cond_lt = true;
615    } else {
616       assert(false);
617    }
618 
619    target = branch->target;
620    while (list_is_empty(&target->instr_list)) {
621       if (!target->list.next)
622          break;
623       target = LIST_ENTRY(ppir_block, target->list.next, list);
624    }
625 
626    assert(!list_is_empty(&target->instr_list));
627 
628    target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
629    b->branch.target = target_instr->offset - node->instr->offset;
630    b->branch.next_count = target_instr->encode_size;
631 }
632 
633 typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
634 
635 static const ppir_codegen_instr_slot_encode_func
636 ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
637    [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
638    [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
639    [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
640    [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
641    [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
642    [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
643    [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
644    [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
645    [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
646    [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
647 };
648 
649 static const int ppir_codegen_field_size[] = {
650    34, 62, 41, 43, 30, 44, 31, 30, 41, 73
651 };
652 
align_to_word(int size)653 static inline int align_to_word(int size)
654 {
655    return ((size + 0x1f) >> 5);
656 }
657 
get_instr_encode_size(ppir_instr * instr)658 static int get_instr_encode_size(ppir_instr *instr)
659 {
660    int size = 0;
661 
662    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
663       if (instr->slots[i])
664          size += ppir_codegen_field_size[i];
665    }
666 
667    for (int i = 0; i < 2; i++) {
668       if (instr->constant[i].num)
669          size += 64;
670    }
671 
672    return align_to_word(size) + 1;
673 }
674 
bitcopy(void * dst,int dst_offset,void * src,int src_size)675 static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
676 {
677    int off1 = dst_offset & 0x1f;
678    uint32_t *cpy_dst = dst, *cpy_src = src;
679 
680    cpy_dst += (dst_offset >> 5);
681 
682    if (off1) {
683       int off2 = 32 - off1;
684       int cpy_size = 0;
685       while (1) {
686          *cpy_dst |= *cpy_src << off1;
687          cpy_dst++;
688 
689          cpy_size += off2;
690          if (cpy_size >= src_size)
691             break;
692 
693          *cpy_dst |= *cpy_src >> off2;
694          cpy_src++;
695 
696          cpy_size += off1;
697          if (cpy_size >= src_size)
698             break;
699       }
700    }
701    else
702       memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
703 }
704 
encode_instr(ppir_instr * instr,void * code,void * last_code)705 static int encode_instr(ppir_instr *instr, void *code, void *last_code)
706 {
707    int size = 0;
708    ppir_codegen_ctrl *ctrl = code;
709 
710    for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
711       if (instr->slots[i]) {
712          /* max field size (73), align to dword */
713          uint8_t output[12] = {0};
714 
715          ppir_codegen_encode_slot[i](instr->slots[i], output);
716          bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
717 
718          size += ppir_codegen_field_size[i];
719          ctrl->fields |= 1 << i;
720       }
721    }
722 
723    if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
724       ctrl->sync = true;
725 
726    if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
727       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
728       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
729          ctrl->sync = true;
730    }
731 
732    if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
733       ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
734       if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
735          ctrl->sync = true;
736    }
737 
738    for (int i = 0; i < 2; i++) {
739       if (instr->constant[i].num) {
740          uint16_t output[4] = {0};
741 
742          ppir_codegen_encode_const(instr->constant + i, output);
743          bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
744 
745          size += 64;
746          ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
747       }
748    }
749 
750    size = align_to_word(size) + 1;
751 
752    ctrl->count = size;
753    if (instr->is_end)
754       ctrl->stop = true;
755 
756    if (last_code) {
757       ppir_codegen_ctrl *last_ctrl = last_code;
758       last_ctrl->next_count = size;
759       last_ctrl->prefetch = true;
760    }
761 
762    return size;
763 }
764 
ppir_codegen_print_prog(ppir_compiler * comp)765 static void ppir_codegen_print_prog(ppir_compiler *comp)
766 {
767    uint32_t *prog = comp->prog->shader;
768    unsigned offset = 0;
769 
770    printf("========ppir codegen========\n");
771    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
772       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
773          printf("%03d (@%6d): ", instr->index, instr->offset);
774          int n = prog[0] & 0x1f;
775          for (int i = 0; i < n; i++) {
776             if (i && i % 6 == 0)
777                printf("\n    ");
778             printf("%08x ", prog[i]);
779          }
780          printf("\n");
781          ppir_disassemble_instr(prog, offset, stdout);
782          prog += n;
783          offset += n;
784       }
785    }
786    printf("-----------------------\n");
787 }
788 
ppir_codegen_prog(ppir_compiler * comp)789 bool ppir_codegen_prog(ppir_compiler *comp)
790 {
791    int size = 0;
792    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
793       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
794          instr->offset = size;
795          instr->encode_size = get_instr_encode_size(instr);
796          size += instr->encode_size;
797       }
798    }
799 
800    uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
801    if (!prog)
802       return false;
803 
804    uint32_t *code = prog, *last_code = NULL;
805    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
806       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
807          int offset = encode_instr(instr, code, last_code);
808          last_code = code;
809          code += offset;
810       }
811    }
812 
813    if (comp->prog->shader)
814       ralloc_free(comp->prog->shader);
815 
816    comp->prog->shader = prog;
817    comp->prog->state.shader_size = size * sizeof(uint32_t);
818 
819    if (lima_debug & LIMA_DEBUG_PP)
820       ppir_codegen_print_prog(comp);
821 
822    return true;
823 }
824