1 /*
2  * Copyright (C) 2019 Google.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "util/ralloc.h"
25 
26 #include "ir3.h"
27 
28 static bool
is_fp16_conv(struct ir3_instruction * instr)29 is_fp16_conv(struct ir3_instruction *instr)
30 {
31 	if (instr->opc != OPC_MOV)
32 		return false;
33 
34 	struct ir3_register *dst = instr->regs[0];
35 	struct ir3_register *src = instr->regs[1];
36 
37 	/* disallow conversions that cannot be folded into
38 	 * alu instructions:
39 	 */
40 	if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF))
41 		return false;
42 
43 	if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
44 		return false;
45 	if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
46 		return false;
47 
48 	if (instr->cat1.src_type == TYPE_F32 &&
49 			instr->cat1.dst_type == TYPE_F16)
50 		return true;
51 
52 	if (instr->cat1.src_type == TYPE_F16 &&
53 			instr->cat1.dst_type == TYPE_F32)
54 		return true;
55 
56 	return false;
57 }
58 
59 static bool
all_uses_fp16_conv(struct ir3_instruction * conv_src)60 all_uses_fp16_conv(struct ir3_instruction *conv_src)
61 {
62 	foreach_ssa_use (use, conv_src)
63 		if (!is_fp16_conv(use))
64 			return false;
65 	return true;
66 }
67 
68 /* For an instruction which has a conversion folded in, re-write the
69  * uses of *all* conv's that used that src to be a simple mov that
70  * cp can eliminate.  This avoids invalidating the SSA uses, it just
71  * shifts the use to a simple mov.
72  */
73 static void
rewrite_src_uses(struct ir3_instruction * src)74 rewrite_src_uses(struct ir3_instruction *src)
75 {
76 	foreach_ssa_use (use, src) {
77 		assert(is_fp16_conv(use));
78 
79 		if (is_half(src)) {
80 			use->regs[1]->flags |= IR3_REG_HALF;
81 		} else {
82 			use->regs[1]->flags &= ~IR3_REG_HALF;
83 		}
84 
85 		use->cat1.src_type = use->cat1.dst_type;
86 	}
87 }
88 
89 static bool
try_conversion_folding(struct ir3_instruction * conv)90 try_conversion_folding(struct ir3_instruction *conv)
91 {
92 	struct ir3_instruction *src;
93 
94 	if (!is_fp16_conv(conv))
95 		return false;
96 
97 	/* NOTE: we can have non-ssa srcs after copy propagation: */
98 	src = ssa(conv->regs[1]);
99 	if (!src)
100 		return false;
101 
102 	if (!is_alu(src))
103 		return false;
104 
105 	/* avoid folding f2f32(f2f16) together, in cases where this is legal to
106 	 * do (glsl) nir should have handled that for us already:
107 	 */
108 	if (is_fp16_conv(src))
109 		return false;
110 
111 	switch (src->opc) {
112 	case OPC_SEL_B32:
113 	case OPC_SEL_B16:
114 	case OPC_MAX_F:
115 	case OPC_MIN_F:
116 	case OPC_SIGN_F:
117 	case OPC_ABSNEG_F:
118 		return false;
119 	case OPC_MOV:
120 		/* if src is a "cov" and type doesn't match, then it can't be folded
121 		 * for example cov.u32u16+cov.f16f32 can't be folded to cov.u32f32
122 		 */
123 		if (src->cat1.dst_type != src->cat1.src_type &&
124 			conv->cat1.src_type != src->cat1.dst_type)
125 			return false;
126 		break;
127 	default:
128 		break;
129 	}
130 
131 	if (!all_uses_fp16_conv(src))
132 		return false;
133 
134 	if (src->opc == OPC_MOV) {
135 		if (src->cat1.dst_type == src->cat1.src_type) {
136 			/* If we're folding a conversion into a bitwise move, we need to
137 			 * change the dst type to F32 to get the right behavior, since we
138 			 * could be moving a float with a u32.u32 move.
139 			 */
140 			src->cat1.dst_type = conv->cat1.dst_type;
141 			src->cat1.src_type = conv->cat1.src_type;
142 		} else {
143 			/* Otherwise, for typechanging movs, we can just change the dst
144 			 * type to F16 to collaps the two conversions.  For example
145 			 * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
146 			 */
147 			src->cat1.dst_type = conv->cat1.dst_type;
148 		}
149 	}
150 
151 	ir3_set_dst_type(src, is_half(conv));
152 	rewrite_src_uses(src);
153 
154 	return true;
155 }
156 
157 bool
ir3_cf(struct ir3 * ir)158 ir3_cf(struct ir3 *ir)
159 {
160 	void *mem_ctx = ralloc_context(NULL);
161 	bool progress = false;
162 
163 	ir3_find_ssa_uses(ir, mem_ctx, false);
164 
165 	foreach_block (block, &ir->block_list) {
166 		foreach_instr (instr, &block->instr_list) {
167 			progress |= try_conversion_folding(instr);
168 		}
169 	}
170 
171 	ralloc_free(mem_ctx);
172 
173 	return progress;
174 }
175