1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "radeon_program_pair.h"
30 
31 #include <stdio.h>
32 
33 #include "main/glheader.h"
34 #include "util/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
37 
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
45 
46 #define VERBOSE 0
47 
48 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
49 
50 
51 
52 struct register_info {
53 	struct live_intervals Live[4];
54 
55 	unsigned int Used:1;
56 	unsigned int Allocated:1;
57 	unsigned int File:3;
58 	unsigned int Index:RC_REGISTER_INDEX_BITS;
59 	unsigned int Writemask;
60 };
61 
62 struct regalloc_state {
63 	struct radeon_compiler * C;
64 
65 	struct register_info * Input;
66 	unsigned int NumInputs;
67 
68 	struct register_info * Temporary;
69 	unsigned int NumTemporaries;
70 
71 	unsigned int Simple;
72 	int LoopEnd;
73 };
74 
75 struct rc_class {
76 	enum rc_reg_class ID;
77 
78 	unsigned int WritemaskCount;
79 
80 	/** List of writemasks that belong to this class */
81 	unsigned int Writemasks[3];
82 
83 
84 };
85 
86 static const struct rc_class rc_class_list [] = {
87 	{RC_REG_CLASS_SINGLE, 3,
88 		{RC_MASK_X,
89 		 RC_MASK_Y,
90 		 RC_MASK_Z}},
91 	{RC_REG_CLASS_DOUBLE, 3,
92 		{RC_MASK_X | RC_MASK_Y,
93 		 RC_MASK_X | RC_MASK_Z,
94 		 RC_MASK_Y | RC_MASK_Z}},
95 	{RC_REG_CLASS_TRIPLE, 1,
96 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
97 		 RC_MASK_NONE,
98 		 RC_MASK_NONE}},
99 	{RC_REG_CLASS_ALPHA, 1,
100 		{RC_MASK_W,
101 		 RC_MASK_NONE,
102 		 RC_MASK_NONE}},
103 	{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
104 		{RC_MASK_X | RC_MASK_W,
105 		 RC_MASK_Y | RC_MASK_W,
106 		 RC_MASK_Z | RC_MASK_W}},
107 	{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
108 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
109 		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
110 		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
111 	{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
112 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
113 		RC_MASK_NONE,
114 		RC_MASK_NONE}},
115 	{RC_REG_CLASS_X, 1,
116 		{RC_MASK_X,
117 		RC_MASK_NONE,
118 		RC_MASK_NONE}},
119 	{RC_REG_CLASS_Y, 1,
120 		{RC_MASK_Y,
121 		RC_MASK_NONE,
122 		RC_MASK_NONE}},
123 	{RC_REG_CLASS_Z, 1,
124 		{RC_MASK_Z,
125 		RC_MASK_NONE,
126 		RC_MASK_NONE}},
127 	{RC_REG_CLASS_XY, 1,
128 		{RC_MASK_X | RC_MASK_Y,
129 		RC_MASK_NONE,
130 		RC_MASK_NONE}},
131 	{RC_REG_CLASS_YZ, 1,
132 		{RC_MASK_Y | RC_MASK_Z,
133 		RC_MASK_NONE,
134 		RC_MASK_NONE}},
135 	{RC_REG_CLASS_XZ, 1,
136 		{RC_MASK_X | RC_MASK_Z,
137 		RC_MASK_NONE,
138 		RC_MASK_NONE}},
139 	{RC_REG_CLASS_XW, 1,
140 		{RC_MASK_X | RC_MASK_W,
141 		RC_MASK_NONE,
142 		RC_MASK_NONE}},
143 	{RC_REG_CLASS_YW, 1,
144 		{RC_MASK_Y | RC_MASK_W,
145 		RC_MASK_NONE,
146 		RC_MASK_NONE}},
147 	{RC_REG_CLASS_ZW, 1,
148 		{RC_MASK_Z | RC_MASK_W,
149 		RC_MASK_NONE,
150 		RC_MASK_NONE}},
151 	{RC_REG_CLASS_XYW, 1,
152 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
153 		RC_MASK_NONE,
154 		RC_MASK_NONE}},
155 	{RC_REG_CLASS_YZW, 1,
156 		{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
157 		RC_MASK_NONE,
158 		RC_MASK_NONE}},
159 	{RC_REG_CLASS_XZW, 1,
160 		{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
161 		RC_MASK_NONE,
162 		RC_MASK_NONE}}
163 };
164 
print_live_intervals(struct live_intervals * src)165 static void print_live_intervals(struct live_intervals * src)
166 {
167 	if (!src || !src->Used) {
168 		DBG("(null)");
169 		return;
170 	}
171 
172 	DBG("(%i,%i)", src->Start, src->End);
173 }
174 
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)175 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
176 {
177 	if (VERBOSE) {
178 		DBG("overlap_live_intervals: ");
179 		print_live_intervals(a);
180 		DBG(" to ");
181 		print_live_intervals(b);
182 		DBG("\n");
183 	}
184 
185 	if (!a->Used || !b->Used) {
186 		DBG("    unused interval\n");
187 		return 0;
188 	}
189 
190 	if (a->Start > b->Start) {
191 		if (a->Start < b->End) {
192 			DBG("    overlap\n");
193 			return 1;
194 		}
195 	} else if (b->Start > a->Start) {
196 		if (b->Start < a->End) {
197 			DBG("    overlap\n");
198 			return 1;
199 		}
200 	} else { /* a->Start == b->Start */
201 		if (a->Start != a->End && b->Start != b->End) {
202 			DBG("    overlap\n");
203 			return 1;
204 		}
205 	}
206 
207 	DBG("    no overlap\n");
208 
209 	return 0;
210 }
211 
scan_read_callback(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)212 static void scan_read_callback(void * data, struct rc_instruction * inst,
213 		rc_register_file file, unsigned int index, unsigned int mask)
214 {
215 	struct regalloc_state * s = data;
216 	struct register_info * reg;
217 	unsigned int i;
218 
219 	if (file != RC_FILE_INPUT)
220 		return;
221 
222 	s->Input[index].Used = 1;
223 	reg = &s->Input[index];
224 
225 	for (i = 0; i < 4; i++) {
226 		if (!((mask >> i) & 0x1)) {
227 			continue;
228 		}
229 		reg->Live[i].Used = 1;
230 		reg->Live[i].Start = 0;
231 		reg->Live[i].End =
232 			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
233 	}
234 }
235 
remap_register(void * data,struct rc_instruction * inst,rc_register_file * file,unsigned int * index)236 static void remap_register(void * data, struct rc_instruction * inst,
237 		rc_register_file * file, unsigned int * index)
238 {
239 	struct regalloc_state * s = data;
240 	const struct register_info * reg;
241 
242 	if (*file == RC_FILE_TEMPORARY && s->Simple)
243 		reg = &s->Temporary[*index];
244 	else if (*file == RC_FILE_INPUT)
245 		reg = &s->Input[*index];
246 	else
247 		return;
248 
249 	if (reg->Allocated) {
250 		*index = reg->Index;
251 	}
252 }
253 
alloc_input_simple(void * data,unsigned int input,unsigned int hwreg)254 static void alloc_input_simple(void * data, unsigned int input,
255 							unsigned int hwreg)
256 {
257 	struct regalloc_state * s = data;
258 
259 	if (input >= s->NumInputs)
260 		return;
261 
262 	s->Input[input].Allocated = 1;
263 	s->Input[input].File = RC_FILE_TEMPORARY;
264 	s->Input[input].Index = hwreg;
265 }
266 
267 /* This functions offsets the temporary register indices by the number
268  * of input registers, because input registers are actually temporaries and
269  * should not occupy the same space.
270  *
271  * This pass is supposed to be used to maintain correct allocation of inputs
272  * if the standard register allocation is disabled. */
do_regalloc_inputs_only(struct regalloc_state * s)273 static void do_regalloc_inputs_only(struct regalloc_state * s)
274 {
275 	for (unsigned i = 0; i < s->NumTemporaries; i++) {
276 		s->Temporary[i].Allocated = 1;
277 		s->Temporary[i].File = RC_FILE_TEMPORARY;
278 		s->Temporary[i].Index = i + s->NumInputs;
279 	}
280 }
281 
is_derivative(rc_opcode op)282 static unsigned int is_derivative(rc_opcode op)
283 {
284 	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
285 }
286 
find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)287 static int find_class(
288 	const struct rc_class * classes,
289 	unsigned int writemask,
290 	unsigned int max_writemask_count)
291 {
292 	unsigned int i;
293 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
294 		unsigned int j;
295 		if (classes[i].WritemaskCount > max_writemask_count) {
296 			continue;
297 		}
298 		for (j = 0; j < 3; j++) {
299 			if (classes[i].Writemasks[j] == writemask) {
300 				return i;
301 			}
302 		}
303 	}
304 	return -1;
305 }
306 
307 struct variable_get_class_cb_data {
308 	unsigned int * can_change_writemask;
309 	unsigned int conversion_swizzle;
310 };
311 
variable_get_class_read_cb(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)312 static void variable_get_class_read_cb(
313 	void * userdata,
314 	struct rc_instruction * inst,
315 	struct rc_pair_instruction_arg * arg,
316 	struct rc_pair_instruction_source * src)
317 {
318 	struct variable_get_class_cb_data * d = userdata;
319 	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
320 							d->conversion_swizzle);
321 	if (!r300_swizzle_is_native_basic(new_swizzle)) {
322 		*d->can_change_writemask = 0;
323 	}
324 }
325 
variable_get_class(struct rc_variable * variable,const struct rc_class * classes)326 static enum rc_reg_class variable_get_class(
327 	struct rc_variable * variable,
328 	const struct rc_class * classes)
329 {
330 	unsigned int i;
331 	unsigned int can_change_writemask= 1;
332 	unsigned int writemask = rc_variable_writemask_sum(variable);
333 	struct rc_list * readers = rc_variable_readers_union(variable);
334 	int class_index;
335 
336 	if (!variable->C->is_r500) {
337 		struct rc_class c;
338 		struct rc_variable * var_ptr;
339 		/* The assumption here is that if an instruction has type
340 		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
341 		 * r300 and r400 can't swizzle the result of a TEX lookup. */
342 		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
343 			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
344 				writemask = RC_MASK_XYZW;
345 			}
346 		}
347 
348 		/* Check if it is possible to do swizzle packing for r300/r400
349 		 * without creating non-native swizzles. */
350 		class_index = find_class(classes, writemask, 3);
351 		if (class_index < 0) {
352 			goto error;
353 		}
354 		c = classes[class_index];
355 		if (c.WritemaskCount == 1) {
356 			goto done;
357 		}
358 		for (i = 0; i < c.WritemaskCount; i++) {
359 			struct rc_variable * var_ptr;
360 			for (var_ptr = variable; var_ptr;
361 						var_ptr = var_ptr->Friend) {
362 				int j;
363 				unsigned int conversion_swizzle =
364 						rc_make_conversion_swizzle(
365 						writemask, c.Writemasks[i]);
366 				struct variable_get_class_cb_data d;
367 				d.can_change_writemask = &can_change_writemask;
368 				d.conversion_swizzle = conversion_swizzle;
369 				/* If we get this far var_ptr->Inst has to
370 				 * be a pair instruction.  If variable or any
371 				 * of its friends are normal instructions,
372 				 * then the writemask will be set to RC_MASK_XYZW
373 				 * and the function will return before it gets
374 				 * here. */
375 				rc_pair_for_all_reads_arg(var_ptr->Inst,
376 					variable_get_class_read_cb, &d);
377 
378 				for (j = 0; j < var_ptr->ReaderCount; j++) {
379 					unsigned int old_swizzle;
380 					unsigned int new_swizzle;
381 					struct rc_reader r = var_ptr->Readers[j];
382 					if (r.Inst->Type ==
383 							RC_INSTRUCTION_PAIR ) {
384 						old_swizzle = r.U.P.Arg->Swizzle;
385 					} else {
386 						/* Source operands of TEX
387 						 * instructions can't be
388 						 * swizzle on r300/r400 GPUs.
389 						 */
390 						can_change_writemask = 0;
391 						break;
392 					}
393 					new_swizzle = rc_adjust_channels(
394 						old_swizzle, conversion_swizzle);
395 					if (!r300_swizzle_is_native_basic(
396 								new_swizzle)) {
397 						can_change_writemask = 0;
398 						break;
399 					}
400 				}
401 				if (!can_change_writemask) {
402 					break;
403 				}
404 			}
405 			if (!can_change_writemask) {
406 				break;
407 			}
408 		}
409 	}
410 
411 	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
412 		/* DDX/DDY seem to always fail when their writemasks are
413 		 * changed.*/
414 		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
415 		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
416 			can_change_writemask = 0;
417 		}
418 	}
419 	for ( ; readers; readers = readers->Next) {
420 		struct rc_reader * r = readers->Item;
421 		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
422 			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
423 				can_change_writemask = 0;
424 				break;
425 			}
426 			/* DDX/DDY also fail when their swizzles are changed. */
427 			if (is_derivative(r->Inst->U.P.RGB.Opcode)
428 			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
429 				can_change_writemask = 0;
430 				break;
431 			}
432 		}
433 	}
434 
435 	class_index = find_class(classes, writemask,
436 						can_change_writemask ? 3 : 1);
437 done:
438 	if (class_index > -1) {
439 		return classes[class_index].ID;
440 	} else {
441 error:
442 		rc_error(variable->C,
443 				"Could not find class for index=%u mask=%u\n",
444 				variable->Dst.Index, writemask);
445 		return 0;
446 	}
447 }
448 
overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)449 static unsigned int overlap_live_intervals_array(
450 	struct live_intervals * a,
451 	struct live_intervals * b)
452 {
453 	unsigned int a_chan, b_chan;
454 	for (a_chan = 0; a_chan < 4; a_chan++) {
455 		for (b_chan = 0; b_chan < 4; b_chan++) {
456 			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
457 					return 1;
458 			}
459 		}
460 	}
461 	return 0;
462 }
463 
reg_get_index(int reg)464 static unsigned int reg_get_index(int reg)
465 {
466 	return reg / RC_MASK_XYZW;
467 }
468 
reg_get_writemask(int reg)469 static unsigned int reg_get_writemask(int reg)
470 {
471 	return (reg % RC_MASK_XYZW) + 1;
472 }
473 
get_reg_id(unsigned int index,unsigned int writemask)474 static int get_reg_id(unsigned int index, unsigned int writemask)
475 {
476 	assert(writemask);
477 	if (writemask == 0) {
478 		return 0;
479 	}
480 	return (index * RC_MASK_XYZW) + (writemask - 1);
481 }
482 
483 #if VERBOSE
print_reg(int reg)484 static void print_reg(int reg)
485 {
486 	unsigned int index = reg_get_index(reg);
487 	unsigned int mask = reg_get_writemask(reg);
488 	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
489 		mask & RC_MASK_X ? 'x' : '_',
490 		mask & RC_MASK_Y ? 'y' : '_',
491 		mask & RC_MASK_Z ? 'z' : '_',
492 		mask & RC_MASK_W ? 'w' : '_');
493 }
494 #endif
495 
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)496 static void add_register_conflicts(
497 	struct ra_regs * regs,
498 	unsigned int max_temp_regs)
499 {
500 	unsigned int index, a_mask, b_mask;
501 	for (index = 0; index < max_temp_regs; index++) {
502 		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
503 			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
504 								b_mask++) {
505 				if (a_mask & b_mask) {
506 					ra_add_reg_conflict(regs,
507 						get_reg_id(index, a_mask),
508 						get_reg_id(index, b_mask));
509 				}
510 			}
511 		}
512 	}
513 }
514 
do_advanced_regalloc(struct regalloc_state * s)515 static void do_advanced_regalloc(struct regalloc_state * s)
516 {
517 
518 	unsigned int i, input_node, node_count, node_index;
519 	struct ra_class ** node_classes;
520 	struct rc_instruction * inst;
521 	struct rc_list * var_ptr;
522 	struct rc_list * variables;
523 	struct ra_graph * graph;
524 	const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
525 
526 	/* Get list of program variables */
527 	variables = rc_get_variables(s->C);
528 	node_count = rc_list_count(variables);
529 	node_classes = memory_pool_malloc(&s->C->Pool,
530 			node_count * sizeof(struct ra_class *));
531 
532 	for (var_ptr = variables, node_index = 0; var_ptr;
533 					var_ptr = var_ptr->Next, node_index++) {
534 		unsigned int class_index;
535 		/* Compute the live intervals */
536 		rc_variable_compute_live_intervals(var_ptr->Item);
537 
538 		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
539 		node_classes[node_index] = ra_state->classes[class_index];
540 	}
541 
542 
543 	/* Calculate live intervals for input registers */
544 	for (inst = s->C->Program.Instructions.Next;
545 					inst != &s->C->Program.Instructions;
546 					inst = inst->Next) {
547 		rc_opcode op = rc_get_flow_control_inst(inst);
548 		if (op == RC_OPCODE_BGNLOOP) {
549 			struct rc_instruction * endloop =
550 							rc_match_bgnloop(inst);
551 			if (endloop->IP > s->LoopEnd) {
552 				s->LoopEnd = endloop->IP;
553 			}
554 		}
555 		rc_for_all_reads_mask(inst, scan_read_callback, s);
556 	}
557 
558 	/* Compute the writemask for inputs. */
559 	for (i = 0; i < s->NumInputs; i++) {
560 		unsigned int chan, writemask = 0;
561 		for (chan = 0; chan < 4; chan++) {
562 			if (s->Input[i].Live[chan].Used) {
563 				writemask |= (1 << chan);
564 			}
565 		}
566 		s->Input[i].Writemask = writemask;
567 	}
568 
569 	graph = ra_alloc_interference_graph(ra_state->regs,
570 						node_count + s->NumInputs);
571 
572 	for (node_index = 0; node_index < node_count; node_index++) {
573 		ra_set_node_class(graph, node_index, node_classes[node_index]);
574 	}
575 
576 	/* Build the interference graph */
577 	for (var_ptr = variables, node_index = 0; var_ptr;
578 					var_ptr = var_ptr->Next,node_index++) {
579 		struct rc_list * a, * b;
580 		unsigned int b_index;
581 
582 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
583 						b; b = b->Next, b_index++) {
584 			struct rc_variable * var_a = a->Item;
585 			while (var_a) {
586 				struct rc_variable * var_b = b->Item;
587 				while (var_b) {
588 					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
589 						ra_add_node_interference(graph,
590 							node_index, b_index);
591 					}
592 					var_b = var_b->Friend;
593 				}
594 				var_a = var_a->Friend;
595 			}
596 		}
597 	}
598 
599 	/* Add input registers to the interference graph */
600 	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
601 		if (!s->Input[i].Writemask) {
602 			continue;
603 		}
604 		for (var_ptr = variables, node_index = 0;
605 				var_ptr; var_ptr = var_ptr->Next, node_index++) {
606 			struct rc_variable * var = var_ptr->Item;
607 			if (overlap_live_intervals_array(s->Input[i].Live,
608 								var->Live)) {
609 				ra_add_node_interference(graph, node_index,
610 						node_count + input_node);
611 			}
612 		}
613 		/* Manually allocate a register for this input */
614 		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
615 				s->Input[i].Index, s->Input[i].Writemask));
616 		input_node++;
617 	}
618 
619 	if (!ra_allocate(graph)) {
620 		rc_error(s->C, "Ran out of hardware temporaries\n");
621 		return;
622 	}
623 
624 	/* Rewrite the registers */
625 	for (var_ptr = variables, node_index = 0; var_ptr;
626 				var_ptr = var_ptr->Next, node_index++) {
627 		int reg = ra_get_node_reg(graph, node_index);
628 		unsigned int writemask = reg_get_writemask(reg);
629 		unsigned int index = reg_get_index(reg);
630 		struct rc_variable * var = var_ptr->Item;
631 
632 		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
633 			writemask = rc_variable_writemask_sum(var);
634 		}
635 
636 		if (var->Dst.File == RC_FILE_INPUT) {
637 			continue;
638 		}
639 		rc_variable_change_dst(var, index, writemask);
640 	}
641 
642 	ralloc_free(graph);
643 }
644 
rc_init_regalloc_state(struct rc_regalloc_state * s)645 void rc_init_regalloc_state(struct rc_regalloc_state *s)
646 {
647 	unsigned i, j, index;
648 	unsigned **ra_q_values;
649 
650 	/* Pre-computed q values.  This array describes the maximum number of
651 	 * a class's [row] registers that are in conflict with a single
652 	 * register from another class [column].
653 	 *
654 	 * For example:
655 	 * q_values[0][2] is 3, because a register from class 2
656 	 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
657 	 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
658 	 * and T0.z.
659 	 *
660 	 * q_values[2][0] is 1, because a register from class 0
661 	 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
662 	 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
663 	 *
664 	 * The q values for each register class [row] will never be greater
665 	 * than the maximum number of writemask combinations for that class.
666 	 *
667 	 * For example:
668 	 *
669 	 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
670 	 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
671 	 * than 1.
672 	 */
673 	const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
674 	{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
675 	{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
676 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
677 	{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
678 	{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
679 	{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
680 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
681 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
682 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
683 	{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
684 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
685 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
686 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
687 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
688 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
689 	{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
690 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
691 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
692 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
693 	};
694 
695 	/* Allocate the main ra data structure */
696 	s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
697                                    true);
698 
699 	/* Create the register classes */
700 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
701 		const struct rc_class *class = &rc_class_list[i];
702 		s->classes[class->ID] = ra_alloc_reg_class(s->regs);
703 
704 		/* Assign registers to the classes */
705 		for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
706 			for (j = 0; j < class->WritemaskCount; j++) {
707 				int reg_id = get_reg_id(index,
708 						class->Writemasks[j]);
709 				ra_class_add_reg(s->classes[class->ID], reg_id);
710 			}
711 		}
712 	}
713 
714 	/* Set the q values.  The q_values array is indexed based on
715 	 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
716 	 * different than the ID assigned to that class by ra.
717 	 * This why we need to manually construct this list.
718 	 */
719 	ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
720 
721 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
722 		ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
723 		for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
724 			ra_q_values[i][j] = q_values[i][j];
725 		}
726 	}
727 
728 	/* Add register conflicts */
729 	add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
730 
731 	ra_set_finalize(s->regs, ra_q_values);
732 
733 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
734 		FREE(ra_q_values[i]);
735 	}
736 	FREE(ra_q_values);
737 }
738 
rc_destroy_regalloc_state(struct rc_regalloc_state * s)739 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
740 {
741 	ralloc_free(s->regs);
742 }
743 
744 /**
745  * @param user This parameter should be a pointer to an integer value.  If this
746  * integer value is zero, then a simple register allocator will be used that
747  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
748  * user is non-zero, then the regular register allocator will be used
749  * (\sa do_regalloc).
750   */
rc_pair_regalloc(struct radeon_compiler * cc,void * user)751 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
752 {
753 	struct r300_fragment_program_compiler *c =
754 				(struct r300_fragment_program_compiler*)cc;
755 	struct regalloc_state s;
756 	int * do_full_regalloc = (int*)user;
757 
758 	memset(&s, 0, sizeof(s));
759 	s.C = cc;
760 	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
761 	s.Input = memory_pool_malloc(&cc->Pool,
762 			s.NumInputs * sizeof(struct register_info));
763 	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
764 
765 	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
766 	s.Temporary = memory_pool_malloc(&cc->Pool,
767 			s.NumTemporaries * sizeof(struct register_info));
768 	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
769 
770 	rc_recompute_ips(s.C);
771 
772 	c->AllocateHwInputs(c, &alloc_input_simple, &s);
773 	if (*do_full_regalloc) {
774 		do_advanced_regalloc(&s);
775 	} else {
776 		s.Simple = 1;
777 		do_regalloc_inputs_only(&s);
778 	}
779 
780 	/* Rewrite inputs and if we are doing the simple allocation, rewrite
781 	 * temporaries too. */
782 	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
783 					inst != &s.C->Program.Instructions;
784 					inst = inst->Next) {
785 		rc_remap_registers(inst, &remap_register, &s);
786 	}
787 }
788