1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "compiler.h"
25 #include "bi_builder.h"
26 
27 /* This optimization pass, intended to run once after code emission but before
28  * copy propagation, analyzes direct word-aligned UBO reads and promotes a
29  * subset to moves from FAU. It is the sole populator of the UBO push data
30  * structure returned back to the command stream. */
31 
32 static bool
bi_is_ubo(bi_instr * ins)33 bi_is_ubo(bi_instr *ins)
34 {
35         return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
36                 (ins->seg == BI_SEG_UBO);
37 }
38 
39 static bool
bi_is_direct_aligned_ubo(bi_instr * ins)40 bi_is_direct_aligned_ubo(bi_instr *ins)
41 {
42         return bi_is_ubo(ins) &&
43                 (ins->src[0].type == BI_INDEX_CONSTANT) &&
44                 (ins->src[1].type == BI_INDEX_CONSTANT) &&
45                 ((ins->src[0].value & 0x3) == 0);
46 }
47 
48 /* Represents use data for a single UBO */
49 
50 #define MAX_UBO_WORDS (65536 / 16)
51 
52 struct bi_ubo_block {
53         BITSET_DECLARE(pushed, MAX_UBO_WORDS);
54         uint8_t range[MAX_UBO_WORDS];
55 };
56 
57 struct bi_ubo_analysis {
58         /* Per block analysis */
59         unsigned nr_blocks;
60         struct bi_ubo_block *blocks;
61 };
62 
63 static struct bi_ubo_analysis
bi_analyze_ranges(bi_context * ctx)64 bi_analyze_ranges(bi_context *ctx)
65 {
66         struct bi_ubo_analysis res = {
67                 .nr_blocks = ctx->nir->info.num_ubos + 1,
68         };
69 
70         res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
71 
72         bi_foreach_instr_global(ctx, ins) {
73                 if (!bi_is_direct_aligned_ubo(ins)) continue;
74 
75                 unsigned ubo = ins->src[1].value;
76                 unsigned word = ins->src[0].value / 4;
77                 unsigned channels = bi_opcode_props[ins->op].sr_count;
78 
79                 assert(ubo < res.nr_blocks);
80                 assert(channels > 0 && channels <= 4);
81 
82                 if (word >= MAX_UBO_WORDS) continue;
83 
84                 /* Must use max if the same base is read with different channel
85                  * counts, which is possible with nir_opt_shrink_vectors */
86                 uint8_t *range = res.blocks[ubo].range;
87                 range[word] = MAX2(range[word], channels);
88         }
89 
90         return res;
91 }
92 
93 /* Select UBO words to push. A sophisticated implementation would consider the
94  * number of uses and perhaps the control flow to estimate benefit. This is not
95  * sophisticated. Select from the last UBO first to prioritize sysvals. */
96 
97 static void
bi_pick_ubo(struct panfrost_ubo_push * push,struct bi_ubo_analysis * analysis)98 bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
99 {
100         for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
101                 struct bi_ubo_block *block = &analysis->blocks[ubo];
102 
103                 for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
104                         unsigned range = block->range[r];
105 
106                         /* Don't push something we don't access */
107                         if (range == 0) continue;
108 
109                         /* Don't push more than possible */
110                         if (push->count > PAN_MAX_PUSH - range)
111                                 return;
112 
113                         for (unsigned offs = 0; offs < range; ++offs) {
114                                 struct panfrost_ubo_word word = {
115                                         .ubo = ubo,
116                                         .offset = (r + offs) * 4
117                                 };
118 
119                                 push->words[push->count++] = word;
120                         }
121 
122                         /* Mark it as pushed so we can rewrite */
123                         BITSET_SET(block->pushed, r);
124                 }
125         }
126 }
127 
128 void
bi_opt_push_ubo(bi_context * ctx)129 bi_opt_push_ubo(bi_context *ctx)
130 {
131         /* This pass only runs once */
132         assert(ctx->info->push.count == 0);
133 
134         struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
135         bi_pick_ubo(&ctx->info->push, &analysis);
136 
137         ctx->ubo_mask = 0;
138 
139         bi_foreach_instr_global_safe(ctx, ins) {
140                 if (!bi_is_ubo(ins)) continue;
141 
142                 unsigned ubo = ins->src[1].value;
143                 unsigned offset = ins->src[0].value;
144 
145                 if (!bi_is_direct_aligned_ubo(ins)) {
146                         /* The load can't be pushed, so this UBO needs to be
147                          * uploaded conventionally */
148                         if (ins->src[1].type == BI_INDEX_CONSTANT)
149                                 ctx->ubo_mask |= BITSET_BIT(ubo);
150                         else
151                                 ctx->ubo_mask = ~0;
152 
153                         continue;
154                 }
155 
156                 /* Check if we decided to push this */
157                 assert(ubo < analysis.nr_blocks);
158                 if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
159                         ctx->ubo_mask |= BITSET_BIT(ubo);
160                         continue;
161                 }
162 
163                 /* Replace the UBO load with moves from FAU */
164                 bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
165 
166                 unsigned channels = bi_opcode_props[ins->op].sr_count;
167 
168                 for (unsigned w = 0; w < channels; ++w) {
169                         /* FAU is grouped in pairs (2 x 4-byte) */
170                         unsigned base =
171                                 pan_lookup_pushed_ubo(&ctx->info->push, ubo,
172                                                       (offset + 4 * w));
173 
174                         unsigned fau_idx = (base >> 1);
175                         unsigned fau_hi = (base & 1);
176 
177                         bi_mov_i32_to(&b,
178                                 bi_word(ins->dest[0], w),
179                                 bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi));
180                 }
181 
182                 bi_remove_instruction(ins);
183         }
184 
185         free(analysis.blocks);
186 }
187