1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * DOC: Shader validator for VC4.
26  *
27  * The VC4 has no IOMMU between it and system memory, so a user with
28  * access to execute shaders could escalate privilege by overwriting
29  * system memory (using the VPM write address register in the
30  * general-purpose DMA mode) or reading system memory it shouldn't
31  * (reading it as a texture, or uniform data, or vertex data).
32  *
33  * This walks over a shader BO, ensuring that its accesses are
34  * appropriately bounded, and recording how many texture accesses are
35  * made and where so that we can do relocations for them in the
36  * uniform stream.
37  */
38 
39 #include "vc4_drv.h"
40 #include "vc4_qpu.h"
41 #include "vc4_qpu_defines.h"
42 
43 #define LIVE_REG_COUNT (32 + 32 + 4)
44 
45 struct vc4_shader_validation_state {
46 	/* Current IP being validated. */
47 	uint32_t ip;
48 
49 	/* IP at the end of the BO, do not read shader[max_ip] */
50 	uint32_t max_ip;
51 
52 	uint64_t *shader;
53 
54 	struct vc4_texture_sample_info tmu_setup[2];
55 	int tmu_write_count[2];
56 
57 	/* For registers that were last written to by a MIN instruction with
58 	 * one argument being a uniform, the address of the uniform.
59 	 * Otherwise, ~0.
60 	 *
61 	 * This is used for the validation of direct address memory reads.
62 	 */
63 	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
64 	bool live_max_clamp_regs[LIVE_REG_COUNT];
65 	uint32_t live_immediates[LIVE_REG_COUNT];
66 
67 	/* Bitfield of which IPs are used as branch targets.
68 	 *
69 	 * Used for validation that the uniform stream is updated at the right
70 	 * points and clearing the texturing/clamping state.
71 	 */
72 	unsigned long *branch_targets;
73 
74 	/* Set when entering a basic block, and cleared when the uniform
75 	 * address update is found.  This is used to make sure that we don't
76 	 * read uniforms when the address is undefined.
77 	 */
78 	bool needs_uniform_address_update;
79 
80 	/* Set when we find a backwards branch.  If the branch is backwards,
81 	 * the taraget is probably doing an address reset to read uniforms,
82 	 * and so we need to be sure that a uniforms address is present in the
83 	 * stream, even if the shader didn't need to read uniforms in later
84 	 * basic blocks.
85 	 */
86 	bool needs_uniform_address_for_loop;
87 
88 	/* Set when we find an instruction which violates the criterion for a
89 	 * threaded shader. These are:
90 	 * 	- only write the lower half of the register space
91 	 * 	- last thread switch signaled at the end
92 	 * So track the usage of the thread switches and the register usage.
93 	 */
94 	bool all_registers_used;
95 };
96 
97 static uint32_t
waddr_to_live_reg_index(uint32_t waddr,bool is_b)98 waddr_to_live_reg_index(uint32_t waddr, bool is_b)
99 {
100 	if (waddr < 32) {
101 		if (is_b)
102 			return 32 + waddr;
103 		else
104 			return waddr;
105 	} else if (waddr <= QPU_W_ACC3) {
106 		return 64 + waddr - QPU_W_ACC0;
107 	} else {
108 		return ~0;
109 	}
110 }
111 
112 static uint32_t
raddr_add_a_to_live_reg_index(uint64_t inst)113 raddr_add_a_to_live_reg_index(uint64_t inst)
114 {
115 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
116 	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
117 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
118 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
119 
120 	if (add_a == QPU_MUX_A)
121 		return raddr_a;
122 	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
123 		return 32 + raddr_b;
124 	else if (add_a <= QPU_MUX_R3)
125 		return 64 + add_a;
126 	else
127 		return ~0;
128 }
129 
live_reg_is_upper_half(uint32_t lri)130 static bool live_reg_is_upper_half(uint32_t lri)
131 {
132 	return	(lri >=16 && lri < 32) ||
133 		(lri >=32 + 16 && lri < 32 + 32);
134 }
135 
136 static bool
is_tmu_submit(uint32_t waddr)137 is_tmu_submit(uint32_t waddr)
138 {
139 	return (waddr == QPU_W_TMU0_S ||
140 		waddr == QPU_W_TMU1_S);
141 }
142 
143 static bool
is_tmu_write(uint32_t waddr)144 is_tmu_write(uint32_t waddr)
145 {
146 	return (waddr >= QPU_W_TMU0_S &&
147 		waddr <= QPU_W_TMU1_B);
148 }
149 
150 static bool
record_texture_sample(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state,int tmu)151 record_texture_sample(struct vc4_validated_shader_info *validated_shader,
152 		      struct vc4_shader_validation_state *validation_state,
153 		      int tmu)
154 {
155 	uint32_t s = validated_shader->num_texture_samples;
156 	int i;
157 	struct vc4_texture_sample_info *temp_samples;
158 
159 	temp_samples = krealloc(validated_shader->texture_samples,
160 				(s + 1) * sizeof(*temp_samples),
161 				GFP_KERNEL);
162 	if (!temp_samples)
163 		return false;
164 
165 	memcpy(&temp_samples[s],
166 	       &validation_state->tmu_setup[tmu],
167 	       sizeof(*temp_samples));
168 
169 	validated_shader->num_texture_samples = s + 1;
170 	validated_shader->texture_samples = temp_samples;
171 
172 	for (i = 0; i < 4; i++)
173 		validation_state->tmu_setup[tmu].p_offset[i] = ~0;
174 
175 	return true;
176 }
177 
178 static bool
check_tmu_write(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state,bool is_mul)179 check_tmu_write(struct vc4_validated_shader_info *validated_shader,
180 		struct vc4_shader_validation_state *validation_state,
181 		bool is_mul)
182 {
183 	uint64_t inst = validation_state->shader[validation_state->ip];
184 	uint32_t waddr = (is_mul ?
185 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
186 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
187 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
188 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
189 	int tmu = waddr > QPU_W_TMU0_B;
190 	bool submit = is_tmu_submit(waddr);
191 	bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
192 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
193 
194 	if (is_direct) {
195 		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
196 		uint32_t clamp_reg, clamp_offset;
197 
198 		if (sig == QPU_SIG_SMALL_IMM) {
199 			DRM_ERROR("direct TMU read used small immediate\n");
200 			return false;
201 		}
202 
203 		/* Make sure that this texture load is an add of the base
204 		 * address of the UBO to a clamped offset within the UBO.
205 		 */
206 		if (is_mul ||
207 		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
208 			DRM_ERROR("direct TMU load wasn't an add\n");
209 			return false;
210 		}
211 
212 		/* We assert that the clamped address is the first
213 		 * argument, and the UBO base address is the second argument.
214 		 * This is arbitrary, but simpler than supporting flipping the
215 		 * two either way.
216 		 */
217 		clamp_reg = raddr_add_a_to_live_reg_index(inst);
218 		if (clamp_reg == ~0) {
219 			DRM_ERROR("direct TMU load wasn't clamped\n");
220 			return false;
221 		}
222 
223 		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
224 		if (clamp_offset == ~0) {
225 			DRM_ERROR("direct TMU load wasn't clamped\n");
226 			return false;
227 		}
228 
229 		/* Store the clamp value's offset in p1 (see reloc_tex() in
230 		 * vc4_validate.c).
231 		 */
232 		validation_state->tmu_setup[tmu].p_offset[1] =
233 			clamp_offset;
234 
235 		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
236 		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
237 			DRM_ERROR("direct TMU load didn't add to a uniform\n");
238 			return false;
239 		}
240 
241 		validation_state->tmu_setup[tmu].is_direct = true;
242 	} else {
243 		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
244 					      raddr_b == QPU_R_UNIF)) {
245 			DRM_ERROR("uniform read in the same instruction as "
246 				  "texture setup.\n");
247 			return false;
248 		}
249 	}
250 
251 	if (validation_state->tmu_write_count[tmu] >= 4) {
252 		DRM_ERROR("TMU%d got too many parameters before dispatch\n",
253 			  tmu);
254 		return false;
255 	}
256 	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
257 		validated_shader->uniforms_size;
258 	validation_state->tmu_write_count[tmu]++;
259 	/* Since direct uses a RADDR uniform reference, it will get counted in
260 	 * check_instruction_reads()
261 	 */
262 	if (!is_direct) {
263 		if (validation_state->needs_uniform_address_update) {
264 			DRM_ERROR("Texturing with undefined uniform address\n");
265 			return false;
266 		}
267 
268 		validated_shader->uniforms_size += 4;
269 	}
270 
271 	if (submit) {
272 		if (!record_texture_sample(validated_shader,
273 					   validation_state, tmu)) {
274 			return false;
275 		}
276 
277 		validation_state->tmu_write_count[tmu] = 0;
278 	}
279 
280 	return true;
281 }
282 
require_uniform_address_uniform(struct vc4_validated_shader_info * validated_shader)283 static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
284 {
285 	uint32_t o = validated_shader->num_uniform_addr_offsets;
286 	uint32_t num_uniforms = validated_shader->uniforms_size / 4;
287 
288 	validated_shader->uniform_addr_offsets =
289 		krealloc(validated_shader->uniform_addr_offsets,
290 			 (o + 1) *
291 			 sizeof(*validated_shader->uniform_addr_offsets),
292 			 GFP_KERNEL);
293 	if (!validated_shader->uniform_addr_offsets)
294 		return false;
295 
296 	validated_shader->uniform_addr_offsets[o] = num_uniforms;
297 	validated_shader->num_uniform_addr_offsets++;
298 
299 	return true;
300 }
301 
302 static bool
validate_uniform_address_write(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state,bool is_mul)303 validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
304 			       struct vc4_shader_validation_state *validation_state,
305 			       bool is_mul)
306 {
307 	uint64_t inst = validation_state->shader[validation_state->ip];
308 	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
309 	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
310 	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
311 	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
312 	/* We want our reset to be pointing at whatever uniform follows the
313 	 * uniforms base address.
314 	 */
315 	u32 expected_offset = validated_shader->uniforms_size + 4;
316 
317 	/* We only support absolute uniform address changes, and we
318 	 * require that they be in the current basic block before any
319 	 * of its uniform reads.
320 	 *
321 	 * One could potentially emit more efficient QPU code, by
322 	 * noticing that (say) an if statement does uniform control
323 	 * flow for all threads and that the if reads the same number
324 	 * of uniforms on each side.  However, this scheme is easy to
325 	 * validate so it's all we allow for now.
326 	 */
327 
328 	if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
329 		DRM_ERROR("uniforms address change must be "
330 			  "normal math\n");
331 		return false;
332 	}
333 
334 	if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
335 		DRM_ERROR("Uniform address reset must be an ADD.\n");
336 		return false;
337 	}
338 
339 	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
340 		DRM_ERROR("Uniform address reset must be unconditional.\n");
341 		return false;
342 	}
343 
344 	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
345 	    !(inst & QPU_PM)) {
346 		DRM_ERROR("No packing allowed on uniforms reset\n");
347 		return false;
348 	}
349 
350 	if (add_lri == -1) {
351 		DRM_ERROR("First argument of uniform address write must be "
352 			  "an immediate value.\n");
353 		return false;
354 	}
355 
356 	if (validation_state->live_immediates[add_lri] != expected_offset) {
357 		DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
358 			  validation_state->live_immediates[add_lri],
359 			  expected_offset);
360 		return false;
361 	}
362 
363 	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
364 	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
365 		DRM_ERROR("Second argument of uniform address write must be "
366 			  "a uniform.\n");
367 		return false;
368 	}
369 
370 	validation_state->needs_uniform_address_update = false;
371 	validation_state->needs_uniform_address_for_loop = false;
372 	return require_uniform_address_uniform(validated_shader);
373 }
374 
375 static bool
check_reg_write(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state,bool is_mul)376 check_reg_write(struct vc4_validated_shader_info *validated_shader,
377 		struct vc4_shader_validation_state *validation_state,
378 		bool is_mul)
379 {
380 	uint64_t inst = validation_state->shader[validation_state->ip];
381 	uint32_t waddr = (is_mul ?
382 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
383 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
384 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
385 	bool ws = inst & QPU_WS;
386 	bool is_b = is_mul ^ ws;
387 	u32 lri = waddr_to_live_reg_index(waddr, is_b);
388 
389 	if (lri != -1) {
390 		uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
391 		uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
392 
393 		if (sig == QPU_SIG_LOAD_IMM &&
394 		    QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
395 		    ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
396 		     (!is_mul && cond_add == QPU_COND_ALWAYS))) {
397 			validation_state->live_immediates[lri] =
398 				QPU_GET_FIELD(inst, QPU_LOAD_IMM);
399 		} else {
400 			validation_state->live_immediates[lri] = ~0;
401 		}
402 
403 		if (live_reg_is_upper_half(lri))
404 			validation_state->all_registers_used = true;
405 	}
406 
407 	switch (waddr) {
408 	case QPU_W_UNIFORMS_ADDRESS:
409 		if (is_b) {
410 			DRM_ERROR("relative uniforms address change "
411 				  "unsupported\n");
412 			return false;
413 		}
414 
415 		return validate_uniform_address_write(validated_shader,
416 						      validation_state,
417 						      is_mul);
418 
419 	case QPU_W_TLB_COLOR_MS:
420 	case QPU_W_TLB_COLOR_ALL:
421 	case QPU_W_TLB_Z:
422 		/* These only interact with the tile buffer, not main memory,
423 		 * so they're safe.
424 		 */
425 		return true;
426 
427 	case QPU_W_TMU0_S:
428 	case QPU_W_TMU0_T:
429 	case QPU_W_TMU0_R:
430 	case QPU_W_TMU0_B:
431 	case QPU_W_TMU1_S:
432 	case QPU_W_TMU1_T:
433 	case QPU_W_TMU1_R:
434 	case QPU_W_TMU1_B:
435 		return check_tmu_write(validated_shader, validation_state,
436 				       is_mul);
437 
438 	case QPU_W_HOST_INT:
439 	case QPU_W_TMU_NOSWAP:
440 	case QPU_W_TLB_ALPHA_MASK:
441 	case QPU_W_MUTEX_RELEASE:
442 		/* XXX: I haven't thought about these, so don't support them
443 		 * for now.
444 		 */
445 		DRM_ERROR("Unsupported waddr %d\n", waddr);
446 		return false;
447 
448 	case QPU_W_VPM_ADDR:
449 		DRM_ERROR("General VPM DMA unsupported\n");
450 		return false;
451 
452 	case QPU_W_VPM:
453 	case QPU_W_VPMVCD_SETUP:
454 		/* We allow VPM setup in general, even including VPM DMA
455 		 * configuration setup, because the (unsafe) DMA can only be
456 		 * triggered by QPU_W_VPM_ADDR writes.
457 		 */
458 		return true;
459 
460 	case QPU_W_TLB_STENCIL_SETUP:
461 		return true;
462 	}
463 
464 	return true;
465 }
466 
467 static void
track_live_clamps(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state)468 track_live_clamps(struct vc4_validated_shader_info *validated_shader,
469 		  struct vc4_shader_validation_state *validation_state)
470 {
471 	uint64_t inst = validation_state->shader[validation_state->ip];
472 	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
473 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
474 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
475 	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
476 	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
477 	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
478 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
479 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
480 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
481 	bool ws = inst & QPU_WS;
482 	uint32_t lri_add_a, lri_add, lri_mul;
483 	bool add_a_is_min_0;
484 
485 	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
486 	 * before we clear previous live state.
487 	 */
488 	lri_add_a = raddr_add_a_to_live_reg_index(inst);
489 	add_a_is_min_0 = (lri_add_a != ~0 &&
490 			  validation_state->live_max_clamp_regs[lri_add_a]);
491 
492 	/* Clear live state for registers written by our instruction. */
493 	lri_add = waddr_to_live_reg_index(waddr_add, ws);
494 	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
495 	if (lri_mul != ~0) {
496 		validation_state->live_max_clamp_regs[lri_mul] = false;
497 		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
498 	}
499 	if (lri_add != ~0) {
500 		validation_state->live_max_clamp_regs[lri_add] = false;
501 		validation_state->live_min_clamp_offsets[lri_add] = ~0;
502 	} else {
503 		/* Nothing further to do for live tracking, since only ADDs
504 		 * generate new live clamp registers.
505 		 */
506 		return;
507 	}
508 
509 	/* Now, handle remaining live clamp tracking for the ADD operation. */
510 
511 	if (cond_add != QPU_COND_ALWAYS)
512 		return;
513 
514 	if (op_add == QPU_A_MAX) {
515 		/* Track live clamps of a value to a minimum of 0 (in either
516 		 * arg).
517 		 */
518 		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
519 		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
520 			return;
521 		}
522 
523 		validation_state->live_max_clamp_regs[lri_add] = true;
524 	} else if (op_add == QPU_A_MIN) {
525 		/* Track live clamps of a value clamped to a minimum of 0 and
526 		 * a maximum of some uniform's offset.
527 		 */
528 		if (!add_a_is_min_0)
529 			return;
530 
531 		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
532 		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
533 		      sig != QPU_SIG_SMALL_IMM)) {
534 			return;
535 		}
536 
537 		validation_state->live_min_clamp_offsets[lri_add] =
538 			validated_shader->uniforms_size;
539 	}
540 }
541 
542 static bool
check_instruction_writes(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state)543 check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
544 			 struct vc4_shader_validation_state *validation_state)
545 {
546 	uint64_t inst = validation_state->shader[validation_state->ip];
547 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
548 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
549 	bool ok;
550 
551 	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
552 		DRM_ERROR("ADD and MUL both set up textures\n");
553 		return false;
554 	}
555 
556 	ok = (check_reg_write(validated_shader, validation_state, false) &&
557 	      check_reg_write(validated_shader, validation_state, true));
558 
559 	track_live_clamps(validated_shader, validation_state);
560 
561 	return ok;
562 }
563 
564 static bool
check_branch(uint64_t inst,struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state,int ip)565 check_branch(uint64_t inst,
566 	     struct vc4_validated_shader_info *validated_shader,
567 	     struct vc4_shader_validation_state *validation_state,
568 	     int ip)
569 {
570 	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
571 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
572 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
573 
574 	if ((int)branch_imm < 0)
575 		validation_state->needs_uniform_address_for_loop = true;
576 
577 	/* We don't want to have to worry about validation of this, and
578 	 * there's no need for it.
579 	 */
580 	if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
581 		DRM_ERROR("branch instruction at %d wrote a register.\n",
582 			  validation_state->ip);
583 		return false;
584 	}
585 
586 	return true;
587 }
588 
589 static bool
check_instruction_reads(struct vc4_validated_shader_info * validated_shader,struct vc4_shader_validation_state * validation_state)590 check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
591 			struct vc4_shader_validation_state *validation_state)
592 {
593 	uint64_t inst = validation_state->shader[validation_state->ip];
594 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
595 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
596 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
597 
598 	if (raddr_a == QPU_R_UNIF ||
599 	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
600 		/* This can't overflow the uint32_t, because we're reading 8
601 		 * bytes of instruction to increment by 4 here, so we'd
602 		 * already be OOM.
603 		 */
604 		validated_shader->uniforms_size += 4;
605 
606 		if (validation_state->needs_uniform_address_update) {
607 			DRM_ERROR("Uniform read with undefined uniform "
608 				  "address\n");
609 			return false;
610 		}
611 	}
612 
613 	if ((raddr_a >= 16 && raddr_a < 32) ||
614 	    (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
615 		validation_state->all_registers_used = true;
616 	}
617 
618 	return true;
619 }
620 
621 /* Make sure that all branches are absolute and point within the shader, and
622  * note their targets for later.
623  */
624 static bool
vc4_validate_branches(struct vc4_shader_validation_state * validation_state)625 vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
626 {
627 	uint32_t max_branch_target = 0;
628 	int ip;
629 	int last_branch = -2;
630 
631 	for (ip = 0; ip < validation_state->max_ip; ip++) {
632 		uint64_t inst = validation_state->shader[ip];
633 		int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
634 		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
635 		uint32_t after_delay_ip = ip + 4;
636 		uint32_t branch_target_ip;
637 
638 		if (sig == QPU_SIG_PROG_END) {
639 			/* There are two delay slots after program end is
640 			 * signaled that are still executed, then we're
641 			 * finished.  validation_state->max_ip is the
642 			 * instruction after the last valid instruction in the
643 			 * program.
644 			 */
645 			validation_state->max_ip = ip + 3;
646 			continue;
647 		}
648 
649 		if (sig != QPU_SIG_BRANCH)
650 			continue;
651 
652 		if (ip - last_branch < 4) {
653 			DRM_ERROR("Branch at %d during delay slots\n", ip);
654 			return false;
655 		}
656 		last_branch = ip;
657 
658 		if (inst & QPU_BRANCH_REG) {
659 			DRM_ERROR("branching from register relative "
660 				  "not supported\n");
661 			return false;
662 		}
663 
664 		if (!(inst & QPU_BRANCH_REL)) {
665 			DRM_ERROR("relative branching required\n");
666 			return false;
667 		}
668 
669 		/* The actual branch target is the instruction after the delay
670 		 * slots, plus whatever byte offset is in the low 32 bits of
671 		 * the instruction.  Make sure we're not branching beyond the
672 		 * end of the shader object.
673 		 */
674 		if (branch_imm % sizeof(inst) != 0) {
675 			DRM_ERROR("branch target not aligned\n");
676 			return false;
677 		};
678 
679 		branch_target_ip = after_delay_ip + (branch_imm >> 3);
680 		if (branch_target_ip >= validation_state->max_ip) {
681 			DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
682 				  ip, branch_target_ip,
683 				  validation_state->max_ip);
684 			return false;
685 		}
686 		set_bit(branch_target_ip, validation_state->branch_targets);
687 
688 		/* Make sure that the non-branching path is also not outside
689 		 * the shader.
690 		 */
691 		if (after_delay_ip >= validation_state->max_ip) {
692 			DRM_ERROR("Branch at %d continues past shader end "
693 				  "(%d/%d)\n",
694 				  ip, after_delay_ip, validation_state->max_ip);
695 			return false;
696 		}
697 		set_bit(after_delay_ip, validation_state->branch_targets);
698 		max_branch_target = max(max_branch_target, after_delay_ip);
699 	}
700 
701 	if (max_branch_target > validation_state->max_ip - 3) {
702 		DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
703 		return false;
704 	}
705 
706 	return true;
707 }
708 
709 /* Resets any known state for the shader, used when we may be branched to from
710  * multiple locations in the program (or at shader start).
711  */
712 static void
reset_validation_state(struct vc4_shader_validation_state * validation_state)713 reset_validation_state(struct vc4_shader_validation_state *validation_state)
714 {
715 	int i;
716 
717 	for (i = 0; i < 8; i++)
718 		validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
719 
720 	for (i = 0; i < LIVE_REG_COUNT; i++) {
721 		validation_state->live_min_clamp_offsets[i] = ~0;
722 		validation_state->live_max_clamp_regs[i] = false;
723 		validation_state->live_immediates[i] = ~0;
724 	}
725 }
726 
727 static bool
texturing_in_progress(struct vc4_shader_validation_state * validation_state)728 texturing_in_progress(struct vc4_shader_validation_state *validation_state)
729 {
730 	return (validation_state->tmu_write_count[0] != 0 ||
731 		validation_state->tmu_write_count[1] != 0);
732 }
733 
734 static bool
vc4_handle_branch_target(struct vc4_shader_validation_state * validation_state)735 vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
736 {
737 	uint32_t ip = validation_state->ip;
738 
739 	if (!test_bit(ip, validation_state->branch_targets))
740 		return true;
741 
742 	if (texturing_in_progress(validation_state)) {
743 		DRM_ERROR("Branch target landed during TMU setup\n");
744 		return false;
745 	}
746 
747 	/* Reset our live values tracking, since this instruction may have
748 	 * multiple predecessors.
749 	 *
750 	 * One could potentially do analysis to determine that, for
751 	 * example, all predecessors have a live max clamp in the same
752 	 * register, but we don't bother with that.
753 	 */
754 	reset_validation_state(validation_state);
755 
756 	/* Since we've entered a basic block from potentially multiple
757 	 * predecessors, we need the uniforms address to be updated before any
758 	 * unforms are read.  We require that after any branch point, the next
759 	 * uniform to be loaded is a uniform address offset.  That uniform's
760 	 * offset will be marked by the uniform address register write
761 	 * validation, or a one-off the end-of-program check.
762 	 */
763 	validation_state->needs_uniform_address_update = true;
764 
765 	return true;
766 }
767 
768 struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object * shader_obj)769 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
770 {
771 	bool found_shader_end = false;
772 	int shader_end_ip = 0;
773 	uint32_t last_thread_switch_ip = -3;
774 	uint32_t ip;
775 	struct vc4_validated_shader_info *validated_shader = NULL;
776 	struct vc4_shader_validation_state validation_state;
777 
778 	memset(&validation_state, 0, sizeof(validation_state));
779 	validation_state.shader = shader_obj->vaddr;
780 	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
781 
782 	reset_validation_state(&validation_state);
783 
784 	validation_state.branch_targets =
785 		kcalloc(BITS_TO_LONGS(validation_state.max_ip),
786 			sizeof(unsigned long), GFP_KERNEL);
787 	if (!validation_state.branch_targets)
788 		goto fail;
789 
790 	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
791 	if (!validated_shader)
792 		goto fail;
793 
794 	if (!vc4_validate_branches(&validation_state))
795 		goto fail;
796 
797 	for (ip = 0; ip < validation_state.max_ip; ip++) {
798 		uint64_t inst = validation_state.shader[ip];
799 		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
800 
801 		validation_state.ip = ip;
802 
803 		if (!vc4_handle_branch_target(&validation_state))
804 			goto fail;
805 
806 		if (ip == last_thread_switch_ip + 3) {
807 			/* Reset r0-r3 live clamp data */
808 			int i;
809 			for (i = 64; i < LIVE_REG_COUNT; i++) {
810 				validation_state.live_min_clamp_offsets[i] = ~0;
811 				validation_state.live_max_clamp_regs[i] = false;
812 				validation_state.live_immediates[i] = ~0;
813 			}
814 		}
815 
816 		switch (sig) {
817 		case QPU_SIG_NONE:
818 		case QPU_SIG_WAIT_FOR_SCOREBOARD:
819 		case QPU_SIG_SCOREBOARD_UNLOCK:
820 		case QPU_SIG_COLOR_LOAD:
821 		case QPU_SIG_LOAD_TMU0:
822 		case QPU_SIG_LOAD_TMU1:
823 		case QPU_SIG_PROG_END:
824 		case QPU_SIG_SMALL_IMM:
825 		case QPU_SIG_THREAD_SWITCH:
826 		case QPU_SIG_LAST_THREAD_SWITCH:
827 			if (!check_instruction_writes(validated_shader,
828 						      &validation_state)) {
829 				DRM_ERROR("Bad write at ip %d\n", ip);
830 				goto fail;
831 			}
832 
833 			if (!check_instruction_reads(validated_shader,
834 						     &validation_state))
835 				goto fail;
836 
837 			if (sig == QPU_SIG_PROG_END) {
838 				found_shader_end = true;
839 				shader_end_ip = ip;
840 			}
841 
842 			if (sig == QPU_SIG_THREAD_SWITCH ||
843 			    sig == QPU_SIG_LAST_THREAD_SWITCH) {
844 				validated_shader->is_threaded = true;
845 
846 				if (ip < last_thread_switch_ip + 3) {
847 					DRM_ERROR("Thread switch too soon after "
848 						  "last switch at ip %d\n", ip);
849 					goto fail;
850 				}
851 				last_thread_switch_ip = ip;
852 			}
853 
854 			break;
855 
856 		case QPU_SIG_LOAD_IMM:
857 			if (!check_instruction_writes(validated_shader,
858 						      &validation_state)) {
859 				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
860 				goto fail;
861 			}
862 			break;
863 
864 		case QPU_SIG_BRANCH:
865 			if (!check_branch(inst, validated_shader,
866 					  &validation_state, ip))
867 				goto fail;
868 
869 			if (ip < last_thread_switch_ip + 3) {
870 				DRM_ERROR("Branch in thread switch at ip %d",
871 					  ip);
872 				goto fail;
873 			}
874 
875 			break;
876 		default:
877 			DRM_ERROR("Unsupported QPU signal %d at "
878 				  "instruction %d\n", sig, ip);
879 			goto fail;
880 		}
881 
882 		/* There are two delay slots after program end is signaled
883 		 * that are still executed, then we're finished.
884 		 */
885 		if (found_shader_end && ip == shader_end_ip + 2)
886 			break;
887 	}
888 
889 	if (ip == validation_state.max_ip) {
890 		DRM_ERROR("shader failed to terminate before "
891 			  "shader BO end at %zd\n",
892 			  shader_obj->base.size);
893 		goto fail;
894 	}
895 
896 	/* Might corrupt other thread */
897 	if (validated_shader->is_threaded &&
898 	    validation_state.all_registers_used) {
899 		DRM_ERROR("Shader uses threading, but uses the upper "
900 			  "half of the registers, too\n");
901 		goto fail;
902 	}
903 
904 	/* If we did a backwards branch and we haven't emitted a uniforms
905 	 * reset since then, we still need the uniforms stream to have the
906 	 * uniforms address available so that the backwards branch can do its
907 	 * uniforms reset.
908 	 *
909 	 * We could potentially prove that the backwards branch doesn't
910 	 * contain any uses of uniforms until program exit, but that doesn't
911 	 * seem to be worth the trouble.
912 	 */
913 	if (validation_state.needs_uniform_address_for_loop) {
914 		if (!require_uniform_address_uniform(validated_shader))
915 			goto fail;
916 		validated_shader->uniforms_size += 4;
917 	}
918 
919 	/* Again, no chance of integer overflow here because the worst case
920 	 * scenario is 8 bytes of uniforms plus handles per 8-byte
921 	 * instruction.
922 	 */
923 	validated_shader->uniforms_src_size =
924 		(validated_shader->uniforms_size +
925 		 4 * validated_shader->num_texture_samples);
926 
927 	kfree(validation_state.branch_targets);
928 
929 	return validated_shader;
930 
931 fail:
932 	kfree(validation_state.branch_targets);
933 	if (validated_shader) {
934 		kfree(validated_shader->texture_samples);
935 		kfree(validated_shader);
936 	}
937 	return NULL;
938 }
939