1 /*
2  * Copyright (C) 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ir3_ra.h"
25 
26 /* The spilling pass leaves out a few details required to successfully operate
27  * ldp/stp:
28  *
29  * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
30  *    that and just spills/restores entire values, including arrays and values
31  *    created for texture setup which can be more than 4 components.
32  * 2. The spiller doesn't add barrier dependencies needed for post-RA
33  *    scheduling.
34  *
35  * The first one, in particular, is much easier to handle after RA because
36  * arrays and normal values can be treated the same way. Therefore this pass
37  * runs after RA, and handles both issues. This keeps the complexity out of the
38  * spiller.
39  */
40 
41 static void
split_spill(struct ir3_instruction * spill)42 split_spill(struct ir3_instruction *spill)
43 {
44    unsigned orig_components = spill->srcs[2]->uim_val;
45 
46    /* We don't handle splitting dependencies. */
47    assert(spill->deps_count == 0);
48 
49    if (orig_components <= 4) {
50       if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
51          spill->srcs[1]->wrmask = MASK(orig_components);
52          spill->srcs[1]->num = spill->srcs[1]->array.base;
53          spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
54       }
55       return;
56    }
57 
58    for (unsigned comp = 0; comp < orig_components; comp += 4) {
59       unsigned components = MIN2(orig_components - comp, 4);
60       struct ir3_instruction *clone = ir3_instr_clone(spill);
61       ir3_instr_move_before(clone, spill);
62 
63       clone->srcs[1]->wrmask = MASK(components);
64       if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
65          clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
66          clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
67       }
68 
69       clone->srcs[2]->uim_val = components;
70       clone->cat6.dst_offset +=
71          comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4);
72    }
73 
74    list_delinit(&spill->node);
75 }
76 
77 static void
split_reload(struct ir3_instruction * reload)78 split_reload(struct ir3_instruction *reload)
79 {
80    unsigned orig_components = reload->srcs[2]->uim_val;
81 
82    assert(reload->deps_count == 0);
83 
84    if (orig_components <= 4) {
85       if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
86          reload->dsts[0]->wrmask = MASK(orig_components);
87          reload->dsts[0]->num = reload->dsts[0]->array.base;
88          reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
89       }
90       return;
91    }
92 
93    for (unsigned comp = 0; comp < orig_components; comp += 4) {
94       unsigned components = MIN2(orig_components - comp, 4);
95       struct ir3_instruction *clone = ir3_instr_clone(reload);
96       ir3_instr_move_before(clone, reload);
97 
98       clone->dsts[0]->wrmask = MASK(components);
99       if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
100          clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
101          clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
102       }
103 
104       clone->srcs[2]->uim_val = components;
105       clone->srcs[1]->uim_val +=
106          comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4);
107    }
108 
109    list_delinit(&reload->node);
110 }
111 
112 static void
add_spill_reload_deps(struct ir3_block * block)113 add_spill_reload_deps(struct ir3_block *block)
114 {
115    struct ir3_instruction *last_spill = NULL;
116 
117    foreach_instr (instr, &block->instr_list) {
118       if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
119           last_spill) {
120          ir3_instr_add_dep(instr, last_spill);
121       }
122 
123       if (instr->opc == OPC_SPILL_MACRO)
124          last_spill = instr;
125    }
126 
127 
128    last_spill = NULL;
129 
130    foreach_instr_rev (instr, &block->instr_list) {
131       if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
132           last_spill) {
133          ir3_instr_add_dep(last_spill, instr);
134       }
135 
136       if (instr->opc == OPC_SPILL_MACRO)
137          last_spill = instr;
138    }
139 }
140 
141 bool
ir3_lower_spill(struct ir3 * ir)142 ir3_lower_spill(struct ir3 *ir)
143 {
144    foreach_block (block, &ir->block_list) {
145       foreach_instr_safe (instr, &block->instr_list) {
146          if (instr->opc == OPC_SPILL_MACRO)
147             split_spill(instr);
148          else if (instr->opc == OPC_RELOAD_MACRO)
149             split_reload(instr);
150       }
151 
152       add_spill_reload_deps(block);
153 
154       foreach_instr (instr, &block->instr_list) {
155          if (instr->opc == OPC_SPILL_MACRO)
156             instr->opc = OPC_STP;
157          else if (instr->opc == OPC_RELOAD_MACRO)
158             instr->opc = OPC_LDP;
159       }
160    }
161 
162    return true;
163 }
164