1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file
26  *
27  * Validates the QPU instruction sequence after register allocation and
28  * scheduling.
29  */
30 
31 #include <assert.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
36 
37 struct v3d_qpu_validate_state {
38         struct v3d_compile *c;
39         const struct v3d_qpu_instr *last;
40         int ip;
41         int last_sfu_write;
42         int last_branch_ip;
43         int last_thrsw_ip;
44 
45         /* Set when we've found the last-THRSW signal, or if we were started
46          * in single-segment mode.
47          */
48         bool last_thrsw_found;
49 
50         /* Set when we've found the THRSW after the last THRSW */
51         bool thrend_found;
52 
53         int thrsw_count;
54 };
55 
56 static void
fail_instr(struct v3d_qpu_validate_state * state,const char * msg)57 fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
58 {
59         struct v3d_compile *c = state->c;
60 
61         fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
62 
63         int dump_ip = 0;
64         vir_for_each_inst_inorder(inst, c) {
65                 v3d_qpu_dump(c->devinfo, &inst->qpu);
66 
67                 if (dump_ip++ == state->ip)
68                         fprintf(stderr, " *** ERROR ***");
69 
70                 fprintf(stderr, "\n");
71         }
72 
73         fprintf(stderr, "\n");
74         abort();
75 }
76 
77 static bool
in_branch_delay_slots(struct v3d_qpu_validate_state * state)78 in_branch_delay_slots(struct v3d_qpu_validate_state *state)
79 {
80         return (state->ip - state->last_branch_ip) < 3;
81 }
82 
83 static bool
in_thrsw_delay_slots(struct v3d_qpu_validate_state * state)84 in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
85 {
86         return (state->ip - state->last_thrsw_ip) < 3;
87 }
88 
89 static bool
qpu_magic_waddr_matches(const struct v3d_qpu_instr * inst,bool (* predicate)(enum v3d_qpu_waddr waddr))90 qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
91                         bool (*predicate)(enum v3d_qpu_waddr waddr))
92 {
93         if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
94                 return false;
95 
96         if (inst->alu.add.op != V3D_QPU_A_NOP &&
97             inst->alu.add.magic_write &&
98             predicate(inst->alu.add.waddr))
99                 return true;
100 
101         if (inst->alu.mul.op != V3D_QPU_M_NOP &&
102             inst->alu.mul.magic_write &&
103             predicate(inst->alu.mul.waddr))
104                 return true;
105 
106         return false;
107 }
108 
109 static void
qpu_validate_inst(struct v3d_qpu_validate_state * state,struct qinst * qinst)110 qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
111 {
112         const struct v3d_device_info *devinfo = state->c->devinfo;
113         const struct v3d_qpu_instr *inst = &qinst->qpu;
114 
115         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
116                 return;
117 
118         /* LDVARY writes r5 two instructions later and LDUNIF writes
119          * r5 one instruction later, which is illegal to have
120          * together.
121          */
122         if (state->last && state->last->sig.ldvary &&
123             (inst->sig.ldunif || inst->sig.ldunifa)) {
124                 fail_instr(state, "LDUNIF after a LDVARY");
125         }
126 
127         /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
128          *
129          * FIXME: This would not check correctly for V3D 4.2 versions lower
130          * than V3D 4.2.14, but that is not a real issue because the simulator
131          * will still catch this, and we are not really targetting any such
132          * versions anyway.
133          */
134         if (state->c->devinfo->ver < 42) {
135                 bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
136                                                           state->last->sig.ldunifrf));
137                 bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
138                                                            state->last->sig.ldunifarf));
139                 bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
140                 bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
141                 if ((last_reads_ldunif && reads_ldunifa) ||
142                     (last_reads_ldunifa && reads_ldunif)) {
143                         fail_instr(state,
144                                    "LDUNIF and LDUNIFA can't be next to each other");
145                 }
146         }
147 
148         int tmu_writes = 0;
149         int sfu_writes = 0;
150         int vpm_writes = 0;
151         int tlb_writes = 0;
152         int tsy_writes = 0;
153 
154         if (inst->alu.add.op != V3D_QPU_A_NOP) {
155                 if (inst->alu.add.magic_write) {
156                         if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
157                                                        inst->alu.add.waddr)) {
158                                 tmu_writes++;
159                         }
160                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
161                                 sfu_writes++;
162                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
163                                 vpm_writes++;
164                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
165                                 tlb_writes++;
166                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
167                                 tsy_writes++;
168                 }
169         }
170 
171         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
172                 if (inst->alu.mul.magic_write) {
173                         if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
174                                                        inst->alu.mul.waddr)) {
175                                 tmu_writes++;
176                         }
177                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
178                                 sfu_writes++;
179                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
180                                 vpm_writes++;
181                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
182                                 tlb_writes++;
183                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
184                                 tsy_writes++;
185                 }
186         }
187 
188         if (in_thrsw_delay_slots(state)) {
189                 /* There's no way you want to start SFU during the THRSW delay
190                  * slots, since the result would land in the other thread.
191                  */
192                 if (sfu_writes) {
193                         fail_instr(state,
194                                    "SFU write started during THRSW delay slots ");
195                 }
196 
197                 if (inst->sig.ldvary)
198                         fail_instr(state, "LDVARY during THRSW delay slots");
199         }
200 
201         (void)qpu_magic_waddr_matches; /* XXX */
202 
203         /* SFU r4 results come back two instructions later.  No doing
204          * r4 read/writes or other SFU lookups until it's done.
205          */
206         if (state->ip - state->last_sfu_write < 2) {
207                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
208                         fail_instr(state, "R4 read too soon after SFU");
209 
210                 if (v3d_qpu_writes_r4(devinfo, inst))
211                         fail_instr(state, "R4 write too soon after SFU");
212 
213                 if (sfu_writes)
214                         fail_instr(state, "SFU write too soon after SFU");
215         }
216 
217         /* XXX: The docs say VPM can happen with the others, but the simulator
218          * disagrees.
219          */
220         if (tmu_writes +
221             sfu_writes +
222             vpm_writes +
223             tlb_writes +
224             tsy_writes +
225             inst->sig.ldtmu +
226             inst->sig.ldtlb +
227             inst->sig.ldvpm +
228             inst->sig.ldtlbu > 1) {
229                 fail_instr(state,
230                            "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
231         }
232 
233         if (sfu_writes)
234                 state->last_sfu_write = state->ip;
235 
236         if (inst->sig.thrsw) {
237                 if (in_branch_delay_slots(state))
238                         fail_instr(state, "THRSW in a branch delay slot.");
239 
240                 if (state->last_thrsw_found)
241                         state->thrend_found = true;
242 
243                 if (state->last_thrsw_ip == state->ip - 1) {
244                         /* If it's the second THRSW in a row, then it's just a
245                          * last-thrsw signal.
246                          */
247                         if (state->last_thrsw_found)
248                                 fail_instr(state, "Two last-THRSW signals");
249                         state->last_thrsw_found = true;
250                 } else {
251                         if (in_thrsw_delay_slots(state)) {
252                                 fail_instr(state,
253                                            "THRSW too close to another THRSW.");
254                         }
255                         state->thrsw_count++;
256                         state->last_thrsw_ip = state->ip;
257                 }
258         }
259 
260         if (state->thrend_found &&
261             state->last_thrsw_ip - state->ip <= 2 &&
262             inst->type == V3D_QPU_INSTR_TYPE_ALU) {
263                 if ((inst->alu.add.op != V3D_QPU_A_NOP &&
264                      !inst->alu.add.magic_write)) {
265                         fail_instr(state, "RF write after THREND");
266                 }
267 
268                 if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
269                      !inst->alu.mul.magic_write)) {
270                         fail_instr(state, "RF write after THREND");
271                 }
272 
273                 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
274                     !inst->sig_magic) {
275                         fail_instr(state, "RF write after THREND");
276                 }
277 
278                 /* GFXH-1625: No TMUWT in the last instruction */
279                 if (state->last_thrsw_ip - state->ip == 2 &&
280                     inst->alu.add.op == V3D_QPU_A_TMUWT)
281                         fail_instr(state, "TMUWT in last instruction");
282         }
283 
284         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
285                 if (in_branch_delay_slots(state))
286                         fail_instr(state, "branch in a branch delay slot.");
287                 if (in_thrsw_delay_slots(state))
288                         fail_instr(state, "branch in a THRSW delay slot.");
289                 state->last_branch_ip = state->ip;
290         }
291 }
292 
293 static void
qpu_validate_block(struct v3d_qpu_validate_state * state,struct qblock * block)294 qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
295 {
296         vir_for_each_inst(qinst, block) {
297                 qpu_validate_inst(state, qinst);
298 
299                 state->last = &qinst->qpu;
300                 state->ip++;
301         }
302 }
303 
304 /**
305  * Checks for the instruction restrictions from page 37 ("Summary of
306  * Instruction Restrictions").
307  */
308 void
qpu_validate(struct v3d_compile * c)309 qpu_validate(struct v3d_compile *c)
310 {
311         /* We don't want to do validation in release builds, but we want to
312          * keep compiling the validation code to make sure it doesn't get
313          * broken.
314          */
315 #ifndef DEBUG
316         return;
317 #endif
318 
319         struct v3d_qpu_validate_state state = {
320                 .c = c,
321                 .last_sfu_write = -10,
322                 .last_thrsw_ip = -10,
323                 .last_branch_ip = -10,
324                 .ip = 0,
325 
326                 .last_thrsw_found = !c->last_thrsw,
327         };
328 
329         vir_for_each_block(block, c) {
330                 qpu_validate_block(&state, block);
331         }
332 
333         if (state.thrsw_count > 1 && !state.last_thrsw_found) {
334                 fail_instr(&state,
335                            "thread switch found without last-THRSW in program");
336         }
337 
338         if (!state.thrend_found)
339                 fail_instr(&state, "No program-end THRSW found");
340 }
341