1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "util/u_math.h"
30 
31 #include "radeon_dataflow.h"
32 
33 #include "radeon_compiler.h"
34 #include "radeon_compiler_util.h"
35 #include "radeon_list.h"
36 #include "radeon_swizzle.h"
37 #include "radeon_variable.h"
38 
39 struct src_clobbered_reads_cb_data {
40 	rc_register_file File;
41 	unsigned int Index;
42 	unsigned int Mask;
43 	struct rc_reader_data * ReaderData;
44 };
45 
46 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
47 						struct rc_instruction *,
48 						unsigned int);
49 
chain_srcregs(struct rc_src_register outer,struct rc_src_register inner)50 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
51 {
52 	struct rc_src_register combine;
53 	combine.File = inner.File;
54 	combine.Index = inner.Index;
55 	combine.RelAddr = inner.RelAddr;
56 	if (outer.Abs) {
57 		combine.Abs = 1;
58 		combine.Negate = outer.Negate;
59 	} else {
60 		combine.Abs = inner.Abs;
61 		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
62 		combine.Negate ^= outer.Negate;
63 	}
64 	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
65 	return combine;
66 }
67 
copy_propagate_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)68 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
69 						struct rc_src_register * src)
70 {
71 	rc_register_file file = src->File;
72 	struct rc_reader_data * reader_data = data;
73 
74 	if(!rc_inst_can_use_presub(inst,
75 				reader_data->Writer->U.I.PreSub.Opcode,
76 				rc_swizzle_to_writemask(src->Swizzle),
77 				src,
78 				&reader_data->Writer->U.I.PreSub.SrcReg[0],
79 				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
80 		reader_data->Abort = 1;
81 		return;
82 	}
83 
84 	/* XXX This could probably be handled better. */
85 	if (file == RC_FILE_ADDRESS) {
86 		reader_data->Abort = 1;
87 		return;
88 	}
89 
90 	/* These instructions cannot read from the constants file.
91 	 * see radeonTransformTEX()
92 	 */
93 	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
94 			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
95 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
96 				inst->U.I.Opcode == RC_OPCODE_TXB ||
97 				inst->U.I.Opcode == RC_OPCODE_TXP ||
98 				inst->U.I.Opcode == RC_OPCODE_TXD ||
99 				inst->U.I.Opcode == RC_OPCODE_TXL ||
100 				inst->U.I.Opcode == RC_OPCODE_KIL)){
101 		reader_data->Abort = 1;
102 		return;
103 	}
104 }
105 
src_clobbered_reads_cb(void * data,struct rc_instruction * inst,struct rc_src_register * src)106 static void src_clobbered_reads_cb(
107 	void * data,
108 	struct rc_instruction * inst,
109 	struct rc_src_register * src)
110 {
111 	struct src_clobbered_reads_cb_data * sc_data = data;
112 
113 	if (src->File == sc_data->File
114 	    && src->Index == sc_data->Index
115 	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
116 
117 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118 	}
119 
120 	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
121 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
122 	}
123 }
124 
is_src_clobbered_scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)125 static void is_src_clobbered_scan_write(
126 	void * data,
127 	struct rc_instruction * inst,
128 	rc_register_file file,
129 	unsigned int index,
130 	unsigned int mask)
131 {
132 	struct src_clobbered_reads_cb_data sc_data;
133 	struct rc_reader_data * reader_data = data;
134 	sc_data.File = file;
135 	sc_data.Index = index;
136 	sc_data.Mask = mask;
137 	sc_data.ReaderData = reader_data;
138 	rc_for_all_reads_src(reader_data->Writer,
139 					src_clobbered_reads_cb, &sc_data);
140 }
141 
copy_propagate(struct radeon_compiler * c,struct rc_instruction * inst_mov)142 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
143 {
144 	struct rc_reader_data reader_data;
145 	unsigned int i;
146 
147 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
148 	    inst_mov->U.I.WriteALUResult)
149 		return;
150 
151 	/* Get a list of all the readers of this MOV instruction. */
152 	reader_data.ExitOnAbort = 1;
153 	rc_get_readers(c, inst_mov, &reader_data,
154 		       copy_propagate_scan_read, NULL,
155 		       is_src_clobbered_scan_write);
156 
157 	if (reader_data.Abort || reader_data.ReaderCount == 0)
158 		return;
159 
160 	/* We can propagate SaturateMode if all the readers are MOV instructions
161 	 * without a presubtract operation, source negation and absolute.
162 	 * In that case, we just move SaturateMode to all readers. */
163         if (inst_mov->U.I.SaturateMode) {
164 		for (i = 0; i < reader_data.ReaderCount; i++) {
165 			struct rc_instruction * inst = reader_data.Readers[i].Inst;
166 
167 			if (inst->U.I.Opcode != RC_OPCODE_MOV ||
168 			    inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
169 			    inst->U.I.SrcReg[0].Abs ||
170 			    inst->U.I.SrcReg[0].Negate) {
171 				return;
172 			}
173 		}
174 	}
175 
176 	/* Propagate the MOV instruction. */
177 	for (i = 0; i < reader_data.ReaderCount; i++) {
178 		struct rc_instruction * inst = reader_data.Readers[i].Inst;
179 		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
180 
181 		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
182 			inst->U.I.PreSub = inst_mov->U.I.PreSub;
183 		if (!inst->U.I.SaturateMode)
184 			inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
185 	}
186 
187 	/* Finally, remove the original MOV instruction */
188 	rc_remove_instruction(inst_mov);
189 }
190 
191 /**
192  * Check if a source register is actually always the same
193  * swizzle constant.
194  */
is_src_uniform_constant(struct rc_src_register src,rc_swizzle * pswz,unsigned int * pnegate)195 static int is_src_uniform_constant(struct rc_src_register src,
196 		rc_swizzle * pswz, unsigned int * pnegate)
197 {
198 	int have_used = 0;
199 
200 	if (src.File != RC_FILE_NONE) {
201 		*pswz = 0;
202 		return 0;
203 	}
204 
205 	for(unsigned int chan = 0; chan < 4; ++chan) {
206 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
207 		if (swz < 4) {
208 			*pswz = 0;
209 			return 0;
210 		}
211 		if (swz == RC_SWIZZLE_UNUSED)
212 			continue;
213 
214 		if (!have_used) {
215 			*pswz = swz;
216 			*pnegate = GET_BIT(src.Negate, chan);
217 			have_used = 1;
218 		} else {
219 			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
220 				*pswz = 0;
221 				return 0;
222 			}
223 		}
224 	}
225 
226 	return 1;
227 }
228 
constant_folding_mad(struct rc_instruction * inst)229 static void constant_folding_mad(struct rc_instruction * inst)
230 {
231 	rc_swizzle swz = 0;
232 	unsigned int negate= 0;
233 
234 	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
235 		if (swz == RC_SWIZZLE_ZERO) {
236 			inst->U.I.Opcode = RC_OPCODE_MUL;
237 			return;
238 		}
239 	}
240 
241 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
242 		if (swz == RC_SWIZZLE_ONE) {
243 			inst->U.I.Opcode = RC_OPCODE_ADD;
244 			if (negate)
245 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
246 			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
247 			return;
248 		} else if (swz == RC_SWIZZLE_ZERO) {
249 			inst->U.I.Opcode = RC_OPCODE_MOV;
250 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
251 			return;
252 		}
253 	}
254 
255 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
256 		if (swz == RC_SWIZZLE_ONE) {
257 			inst->U.I.Opcode = RC_OPCODE_ADD;
258 			if (negate)
259 				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
260 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
261 			return;
262 		} else if (swz == RC_SWIZZLE_ZERO) {
263 			inst->U.I.Opcode = RC_OPCODE_MOV;
264 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
265 			return;
266 		}
267 	}
268 }
269 
constant_folding_mul(struct rc_instruction * inst)270 static void constant_folding_mul(struct rc_instruction * inst)
271 {
272 	rc_swizzle swz = 0;
273 	unsigned int negate = 0;
274 
275 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
276 		if (swz == RC_SWIZZLE_ONE) {
277 			inst->U.I.Opcode = RC_OPCODE_MOV;
278 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
279 			if (negate)
280 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
281 			return;
282 		} else if (swz == RC_SWIZZLE_ZERO) {
283 			inst->U.I.Opcode = RC_OPCODE_MOV;
284 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
285 			return;
286 		}
287 	}
288 
289 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
290 		if (swz == RC_SWIZZLE_ONE) {
291 			inst->U.I.Opcode = RC_OPCODE_MOV;
292 			if (negate)
293 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
294 			return;
295 		} else if (swz == RC_SWIZZLE_ZERO) {
296 			inst->U.I.Opcode = RC_OPCODE_MOV;
297 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
298 			return;
299 		}
300 	}
301 }
302 
constant_folding_add(struct rc_instruction * inst)303 static void constant_folding_add(struct rc_instruction * inst)
304 {
305 	rc_swizzle swz = 0;
306 	unsigned int negate = 0;
307 
308 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
309 		if (swz == RC_SWIZZLE_ZERO) {
310 			inst->U.I.Opcode = RC_OPCODE_MOV;
311 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
312 			return;
313 		}
314 	}
315 
316 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
317 		if (swz == RC_SWIZZLE_ZERO) {
318 			inst->U.I.Opcode = RC_OPCODE_MOV;
319 			return;
320 		}
321 	}
322 }
323 
324 /**
325  * Replace 0.0, 1.0 and 0.5 immediate constants by their
326  * respective swizzles. Simplify instructions like ADD dst, src, 0;
327  */
constant_folding(struct radeon_compiler * c,struct rc_instruction * inst)328 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
329 {
330 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
331 	unsigned int i;
332 
333 	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
334 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
335 		struct rc_constant * constant;
336 		struct rc_src_register newsrc;
337 		int have_real_reference;
338 		unsigned int chan;
339 
340 		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
341 		for (chan = 0; chan < 4; ++chan)
342 			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
343 				break;
344 		if (chan == 4) {
345 			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
346 			continue;
347 		}
348 
349 		/* Convert immediates to swizzles. */
350 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
351 		    inst->U.I.SrcReg[src].RelAddr ||
352 		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
353 			continue;
354 
355 		constant =
356 			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
357 
358 		if (constant->Type != RC_CONSTANT_IMMEDIATE)
359 			continue;
360 
361 		newsrc = inst->U.I.SrcReg[src];
362 		have_real_reference = 0;
363 		for (chan = 0; chan < 4; ++chan) {
364 			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
365 			unsigned int newswz;
366 			float imm;
367 			float baseimm;
368 
369 			if (swz >= 4)
370 				continue;
371 
372 			imm = constant->u.Immediate[swz];
373 			baseimm = imm;
374 			if (imm < 0.0)
375 				baseimm = -baseimm;
376 
377 			if (baseimm == 0.0) {
378 				newswz = RC_SWIZZLE_ZERO;
379 			} else if (baseimm == 1.0) {
380 				newswz = RC_SWIZZLE_ONE;
381 			} else if (baseimm == 0.5 && c->has_half_swizzles) {
382 				newswz = RC_SWIZZLE_HALF;
383 			} else {
384 				have_real_reference = 1;
385 				continue;
386 			}
387 
388 			SET_SWZ(newsrc.Swizzle, chan, newswz);
389 			if (imm < 0.0 && !newsrc.Abs)
390 				newsrc.Negate ^= 1 << chan;
391 		}
392 
393 		if (!have_real_reference) {
394 			newsrc.File = RC_FILE_NONE;
395 			newsrc.Index = 0;
396 		}
397 
398 		/* don't make the swizzle worse */
399 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
400 		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
401 			continue;
402 
403 		inst->U.I.SrcReg[src] = newsrc;
404 	}
405 
406 	/* Simplify instructions based on constants */
407 	if (inst->U.I.Opcode == RC_OPCODE_MAD)
408 		constant_folding_mad(inst);
409 
410 	/* note: MAD can simplify to MUL or ADD */
411 	if (inst->U.I.Opcode == RC_OPCODE_MUL)
412 		constant_folding_mul(inst);
413 	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
414 		constant_folding_add(inst);
415 
416 	/* In case this instruction has been converted, make sure all of the
417 	 * registers that are no longer used are empty. */
418 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
419 	for(i = opcode->NumSrcRegs; i < 3; i++) {
420 		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
421 	}
422 }
423 
424 /**
425  * If src and dst use the same register, this function returns a writemask that
426  * indicates which components are read by src.  Otherwise zero is returned.
427  */
src_reads_dst_mask(struct rc_src_register src,struct rc_dst_register dst)428 static unsigned int src_reads_dst_mask(struct rc_src_register src,
429 						struct rc_dst_register dst)
430 {
431 	if (dst.File != src.File || dst.Index != src.Index) {
432 		return 0;
433 	}
434 	return rc_swizzle_to_writemask(src.Swizzle);
435 }
436 
437 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
438  * in any of its channels.  Return 0 otherwise. */
src_has_const_swz(struct rc_src_register src)439 static int src_has_const_swz(struct rc_src_register src) {
440 	int chan;
441 	for(chan = 0; chan < 4; chan++) {
442 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
443 		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
444 						|| swz == RC_SWIZZLE_ONE) {
445 			return 1;
446 		}
447 	}
448 	return 0;
449 }
450 
presub_scan_read(void * data,struct rc_instruction * inst,struct rc_src_register * src)451 static void presub_scan_read(
452 	void * data,
453 	struct rc_instruction * inst,
454 	struct rc_src_register * src)
455 {
456 	struct rc_reader_data * reader_data = data;
457 	rc_presubtract_op * presub_opcode = reader_data->CbData;
458 
459 	if (!rc_inst_can_use_presub(inst, *presub_opcode,
460 			reader_data->Writer->U.I.DstReg.WriteMask,
461 			src,
462 			&reader_data->Writer->U.I.SrcReg[0],
463 			&reader_data->Writer->U.I.SrcReg[1])) {
464 		reader_data->Abort = 1;
465 		return;
466 	}
467 }
468 
presub_helper(struct radeon_compiler * c,struct rc_instruction * inst_add,rc_presubtract_op presub_opcode,rc_presub_replace_fn presub_replace)469 static int presub_helper(
470 	struct radeon_compiler * c,
471 	struct rc_instruction * inst_add,
472 	rc_presubtract_op presub_opcode,
473 	rc_presub_replace_fn presub_replace)
474 {
475 	struct rc_reader_data reader_data;
476 	unsigned int i;
477 	rc_presubtract_op cb_op = presub_opcode;
478 
479 	reader_data.CbData = &cb_op;
480 	reader_data.ExitOnAbort = 1;
481 	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
482 						is_src_clobbered_scan_write);
483 
484 	if (reader_data.Abort || reader_data.ReaderCount == 0)
485 		return 0;
486 
487 	for(i = 0; i < reader_data.ReaderCount; i++) {
488 		unsigned int src_index;
489 		struct rc_reader reader = reader_data.Readers[i];
490 		const struct rc_opcode_info * info =
491 				rc_get_opcode_info(reader.Inst->U.I.Opcode);
492 
493 		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
494 			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
495 				presub_replace(inst_add, reader.Inst, src_index);
496 		}
497 	}
498 	return 1;
499 }
500 
501 /* This function assumes that inst_add->U.I.SrcReg[0] and
502  * inst_add->U.I.SrcReg[1] aren't both negative. */
presub_replace_add(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)503 static void presub_replace_add(
504 	struct rc_instruction * inst_add,
505 	struct rc_instruction * inst_reader,
506 	unsigned int src_index)
507 {
508 	rc_presubtract_op presub_opcode;
509 	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
510 		presub_opcode = RC_PRESUB_SUB;
511 	else
512 		presub_opcode = RC_PRESUB_ADD;
513 
514 	if (inst_add->U.I.SrcReg[1].Negate) {
515 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
516 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
517 	} else {
518 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
519 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
520 	}
521 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
522 	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
523 	inst_reader->U.I.PreSub.Opcode = presub_opcode;
524 	inst_reader->U.I.SrcReg[src_index] =
525 			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
526 					inst_reader->U.I.PreSub.SrcReg[0]);
527 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
528 	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
529 }
530 
is_presub_candidate(struct radeon_compiler * c,struct rc_instruction * inst)531 static int is_presub_candidate(
532 	struct radeon_compiler * c,
533 	struct rc_instruction * inst)
534 {
535 	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
536 	unsigned int i;
537 	unsigned int is_constant[2] = {0, 0};
538 
539 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
540 
541 	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
542 			|| inst->U.I.SaturateMode
543 			|| inst->U.I.WriteALUResult
544 			|| inst->U.I.Omod) {
545 		return 0;
546 	}
547 
548 	/* If both sources use a constant swizzle, then we can't convert it to
549 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
550 	 * operations neither source can contain a constant swizzle.  This
551 	 * specific case is checked in peephole_add_presub_add() when
552 	 * we make sure the swizzles for both sources are equal, so we
553 	 * don't need to worry about it here. */
554 	for (i = 0; i < 2; i++) {
555 		int chan;
556 		for (chan = 0; chan < 4; chan++) {
557 			rc_swizzle swz =
558 				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
559 			if (swz == RC_SWIZZLE_ONE
560 					|| swz == RC_SWIZZLE_ZERO
561 					|| swz == RC_SWIZZLE_HALF) {
562 				is_constant[i] = 1;
563 			}
564 		}
565 	}
566 	if (is_constant[0] && is_constant[1])
567 		return 0;
568 
569 	for(i = 0; i < info->NumSrcRegs; i++) {
570 		struct rc_src_register src = inst->U.I.SrcReg[i];
571 		if (src_reads_dst_mask(src, inst->U.I.DstReg))
572 			return 0;
573 
574 		src.File = RC_FILE_PRESUB;
575 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
576 			return 0;
577 	}
578 	return 1;
579 }
580 
peephole_add_presub_add(struct radeon_compiler * c,struct rc_instruction * inst_add)581 static int peephole_add_presub_add(
582 	struct radeon_compiler * c,
583 	struct rc_instruction * inst_add)
584 {
585 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
586         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
587         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
588 
589 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
590 		return 0;
591 
592 	/* src0 and src1 can't have absolute values */
593 	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
594 	        return 0;
595 
596 	/* presub_replace_add() assumes only one is negative */
597 	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
598 	        return 0;
599 
600         /* if src0 is negative, at least all bits of dstmask have to be set */
601         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
602 	        return 0;
603 
604         /* if src1 is negative, at least all bits of dstmask have to be set */
605         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
606 	        return 0;
607 
608 	if (!is_presub_candidate(c, inst_add))
609 		return 0;
610 
611 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
612 		rc_remove_instruction(inst_add);
613 		return 1;
614 	}
615 	return 0;
616 }
617 
presub_replace_inv(struct rc_instruction * inst_add,struct rc_instruction * inst_reader,unsigned int src_index)618 static void presub_replace_inv(
619 	struct rc_instruction * inst_add,
620 	struct rc_instruction * inst_reader,
621 	unsigned int src_index)
622 {
623 	/* We must be careful not to modify inst_add, since it
624 	 * is possible it will remain part of the program.*/
625 	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
626 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
627 	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
628 	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
629 						inst_reader->U.I.PreSub.SrcReg[0]);
630 
631 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
632 	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
633 }
634 
635 /**
636  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
637  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
638  * of the add instruction must have the constatnt 1 swizzle.  This function
639  * does not check const registers to see if their value is 1.0, so it should
640  * be called after the constant_folding optimization.
641  * @return
642  * 	0 if the ADD instruction is still part of the program.
643  * 	1 if the ADD instruction is no longer part of the program.
644  */
peephole_add_presub_inv(struct radeon_compiler * c,struct rc_instruction * inst_add)645 static int peephole_add_presub_inv(
646 	struct radeon_compiler * c,
647 	struct rc_instruction * inst_add)
648 {
649 	unsigned int i, swz;
650 
651 	if (!is_presub_candidate(c, inst_add))
652 		return 0;
653 
654 	/* Check if src0 is 1. */
655 	/* XXX It would be nice to use is_src_uniform_constant here, but that
656 	 * function only works if the register's file is RC_FILE_NONE */
657 	for(i = 0; i < 4; i++ ) {
658 		if (!(inst_add->U.I.DstReg.WriteMask & (1 << i)))
659 			continue;
660 
661 		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
662 		if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i))
663 			return 0;
664 	}
665 
666 	/* Check src1. */
667 	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
668 						inst_add->U.I.DstReg.WriteMask
669 		|| inst_add->U.I.SrcReg[1].Abs
670 		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
671 			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
672 		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
673 
674 		return 0;
675 	}
676 
677 	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
678 		rc_remove_instruction(inst_add);
679 		return 1;
680 	}
681 	return 0;
682 }
683 
684 struct peephole_mul_cb_data {
685 	struct rc_dst_register * Writer;
686 	unsigned int Clobbered;
687 };
688 
omod_filter_reader_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)689 static void omod_filter_reader_cb(
690 	void * userdata,
691 	struct rc_instruction * inst,
692 	rc_register_file file,
693 	unsigned int index,
694 	unsigned int mask)
695 {
696 	struct peephole_mul_cb_data * d = userdata;
697 	if (rc_src_reads_dst_mask(file, mask, index,
698 		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
699 
700 		d->Clobbered = 1;
701 	}
702 }
703 
omod_filter_writer_cb(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)704 static void omod_filter_writer_cb(
705 	void * userdata,
706 	struct rc_instruction * inst,
707 	rc_register_file file,
708 	unsigned int index,
709 	unsigned int mask)
710 {
711 	struct peephole_mul_cb_data * d = userdata;
712 	if (file == d->Writer->File && index == d->Writer->Index &&
713 					(mask & d->Writer->WriteMask)) {
714 		d->Clobbered = 1;
715 	}
716 }
717 
peephole_mul_omod(struct radeon_compiler * c,struct rc_instruction * inst_mul,struct rc_list * var_list)718 static int peephole_mul_omod(
719 	struct radeon_compiler * c,
720 	struct rc_instruction * inst_mul,
721 	struct rc_list * var_list)
722 {
723 	unsigned int chan = 0, swz, i;
724 	int const_index = -1;
725 	int temp_index = -1;
726 	float const_value;
727 	rc_omod_op omod_op = RC_OMOD_DISABLE;
728 	struct rc_list * writer_list;
729 	struct rc_variable * var;
730 	struct peephole_mul_cb_data cb_data;
731 	unsigned writemask_sum;
732 
733 	for (i = 0; i < 2; i++) {
734 		unsigned int j;
735 		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
736 			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
737 			return 0;
738 		}
739 		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
740 			if (temp_index != -1) {
741 				/* The instruction has two temp sources */
742 				return 0;
743 			} else {
744 				temp_index = i;
745 				continue;
746 			}
747 		}
748 		/* If we get this far Src[i] must be a constant src */
749 		if (inst_mul->U.I.SrcReg[i].Negate) {
750 			return 0;
751 		}
752 		/* The constant src needs to read from the same swizzle */
753 		swz = RC_SWIZZLE_UNUSED;
754 		chan = 0;
755 		for (j = 0; j < 4; j++) {
756 			unsigned int j_swz =
757 				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
758 			if (j_swz == RC_SWIZZLE_UNUSED) {
759 				continue;
760 			}
761 			if (swz == RC_SWIZZLE_UNUSED) {
762 				swz = j_swz;
763 				chan = j;
764 			} else if (j_swz != swz) {
765 				return 0;
766 			}
767 		}
768 
769 		if (const_index != -1) {
770 			/* The instruction has two constant sources */
771 			return 0;
772 		} else {
773 			const_index = i;
774 		}
775 	}
776 
777 	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
778 				inst_mul->U.I.SrcReg[const_index].Index)) {
779 		return 0;
780 	}
781 	const_value = rc_get_constant_value(c,
782 			inst_mul->U.I.SrcReg[const_index].Index,
783 			inst_mul->U.I.SrcReg[const_index].Swizzle,
784 			inst_mul->U.I.SrcReg[const_index].Negate,
785 			chan);
786 
787 	if (const_value == 2.0f) {
788 		omod_op = RC_OMOD_MUL_2;
789 	} else if (const_value == 4.0f) {
790 		omod_op = RC_OMOD_MUL_4;
791 	} else if (const_value == 8.0f) {
792 		omod_op = RC_OMOD_MUL_8;
793 	} else if (const_value == (1.0f / 2.0f)) {
794 		omod_op = RC_OMOD_DIV_2;
795 	} else if (const_value == (1.0f / 4.0f)) {
796 		omod_op = RC_OMOD_DIV_4;
797 	} else if (const_value == (1.0f / 8.0f)) {
798 		omod_op = RC_OMOD_DIV_8;
799 	} else {
800 		return 0;
801 	}
802 
803 	writer_list = rc_variable_list_get_writers_one_reader(var_list,
804 		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
805 
806 	if (!writer_list) {
807 		return 0;
808 	}
809 
810 	cb_data.Clobbered = 0;
811 	cb_data.Writer = &inst_mul->U.I.DstReg;
812 	for (var = writer_list->Item; var; var = var->Friend) {
813 		struct rc_instruction * inst;
814 		const struct rc_opcode_info * info = rc_get_opcode_info(
815 				var->Inst->U.I.Opcode);
816 		if (info->HasTexture) {
817 			return 0;
818 		}
819 		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
820 			return 0;
821 		}
822 		for (inst = inst_mul->Prev; inst != var->Inst;
823 							inst = inst->Prev) {
824 			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
825 								&cb_data);
826 			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
827 								&cb_data);
828 			if (cb_data.Clobbered) {
829 				break;
830 			}
831 		}
832 	}
833 
834 	if (cb_data.Clobbered) {
835 		return 0;
836 	}
837 
838 	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
839 
840 	/* rc_normal_rewrite_writemask can't expand a previous writemask to store
841 	 * more channels replicated.
842 	 */
843 	if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask))
844 		return 0;
845 
846 	/* Rewrite the instructions */
847 	for (var = writer_list->Item; var; var = var->Friend) {
848 		struct rc_variable * writer = var;
849 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
850 					writemask_sum,
851 					inst_mul->U.I.DstReg.WriteMask);
852 		writer->Inst->U.I.Omod = omod_op;
853 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
854 		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
855 		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
856 		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
857 	}
858 
859 	rc_remove_instruction(inst_mul);
860 
861 	return 1;
862 }
863 
864 /**
865  * @return
866  * 	0 if inst is still part of the program.
867  * 	1 if inst is no longer part of the program.
868  */
peephole(struct radeon_compiler * c,struct rc_instruction * inst)869 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
870 {
871 	switch(inst->U.I.Opcode){
872 	case RC_OPCODE_ADD:
873 		if (c->has_presub) {
874 			if(peephole_add_presub_inv(c, inst))
875 				return 1;
876 			if(peephole_add_presub_add(c, inst))
877 				return 1;
878 		}
879 		break;
880 	default:
881 		break;
882 	}
883 	return 0;
884 }
885 
rc_optimize(struct radeon_compiler * c,void * user)886 void rc_optimize(struct radeon_compiler * c, void *user)
887 {
888 	struct rc_instruction * inst = c->Program.Instructions.Next;
889 	struct rc_list * var_list;
890 	while(inst != &c->Program.Instructions) {
891 		struct rc_instruction * cur = inst;
892 		inst = inst->Next;
893 
894 		constant_folding(c, cur);
895 
896 		if(peephole(c, cur))
897 			continue;
898 
899 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
900 			copy_propagate(c, cur);
901 			/* cur may no longer be part of the program */
902 		}
903 	}
904 
905 	if (!c->has_omod) {
906 		return;
907 	}
908 
909 	inst = c->Program.Instructions.Next;
910 	while(inst != &c->Program.Instructions) {
911 		struct rc_instruction * cur = inst;
912 		inst = inst->Next;
913 		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
914 			var_list = rc_get_variables(c);
915 			peephole_mul_omod(c, cur, var_list);
916 		}
917 	}
918 }
919