1 /*
2 * Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /* machreg.c - Machine register definitions for the i386/387 */
19
20 #include "machreg.h"
21 #include "error.h"
22 #include "global.h"
23 #include "symtab.h"
24 #include "regutil.h"
25 #include "machreg.h"
26 #include "ili.h"
27
28 /* local functions for mr_getreg() & mr_getnext(); these routines called
29 * twice to fetch two IR registers for a KR register.
30 */
31 static int _mr_getreg(int rtype);
32 static int _mr_getnext(int rtype);
33
34 static int getnext_reg; /* current register for retry (mr_getnext) */
35
36 static bool mr_restore; /* need to backout for KR registers? */
37 static char mr_restore_next_global; /* saving the mr.next_global field */
38 static char mr_restore_nused; /* saving the mr.nused field */
39
40 static MACH_REG mach_reg[MR_UNIQ] = {
41 {1, 8, 8 /*TBD*/, MR_L1, MR_U1, MR_U1, MR_U1, 0, 0, 'i'}, /* %r's */
42 {1, 8, 8 /*TBD*/, MR_L2, MR_U2, MR_U2, MR_U2, 0, MR_MAX1, 'f'}, /* %f's */
43 {1, 8, 8 /*TBD*/, MR_L3, MR_U3, MR_U3, MR_U3, 0, (MR_MAX1 + MR_MAX2),
44 'x'} /* %f's xmm */
45 };
46
47 REG reg[RATA_RTYPES_TOTAL] = {
48 {6, 0, 0, 0, &mach_reg[0], RCF_NONE}, /* IR */
49 {3, 0, 0, 0, &mach_reg[1], RCF_NONE}, /* SP */
50 {3, 0, 0, 0, &mach_reg[1], RCF_NONE}, /* DP */
51 {6, 0, 0, 0, &mach_reg[0], RCF_NONE}, /* AR */
52 {3, 0, 0, 0, &mach_reg[0], RCF_NONE}, /* KR */
53 {0, 0, 0, 0, 0, 0}, /* VECT */
54 {0, 0, 0, 0, 0, 0}, /* QP */
55 {3, 0, 0, 0, 0, RCF_NONE}, /* CSP */
56 {3, 0, 0, 0, 0, RCF_NONE}, /* CDP */
57 {0, 0, 0, 0, 0, 0}, /* CQP */
58 {0, 0, 0, 0, 0, 0}, /* X87 */
59 {0, 0, 0, 0, 0, 0}, /* CX87 */
60 /* the following will be mapped over SP and DP above */
61 {3, 0, 0, 0, &mach_reg[2], RCF_NONE}, /* SPXM */
62 {3, 0, 0, 0, &mach_reg[2], RCF_NONE}, /* DPXM */
63 };
64
65 RGSETB rgsetb;
66
67 const int scratch_regs[3] = {IR_RAX, IR_RCX, IR_RDX};
68
69 #if defined(TARGET_LLVM_ARM) || defined(TARGET_LLVM_POWER)
70
71 /* arguments passed in registers */
72 int mr_arg_ir[MR_MAX_IREG_ARGS + 1];
73 /* xmm0 --> xmm7 */
74 int mr_arg_xr[MR_MAX_XREG_ARGS + 1] = {XR_XMM0, XR_XMM1, XR_XMM2, XR_XMM3,
75 XR_XMM4, XR_XMM5, XR_XMM6, XR_XMM7};
76
77 /* return result registers */
78 /* rax, rdx */
79 int mr_res_ir[MR_MAX_IREG_RES + 1] = {IR_RAX, IR_RDX};
80 /* xmm0, xmm1 */
81 int mr_res_xr[MR_MAX_XREG_RES + 1] = {XR_XMM0, XR_XMM1};
82
83 #elif defined(TARGET_WIN_X8664)
84
85 /* arguments passed in registers */
86 /* rcx,rdx,r8,r9 */
87 int mr_arg_ir[MR_MAX_IREG_ARGS] = {IR_RCX, IR_RDX, IR_R8, IR_R9};
88 /* xmm0 --> xmm3 */
89 int mr_arg_xr[MR_MAX_XREG_ARGS] = {XR_XMM0, XR_XMM1, XR_XMM2, XR_XMM3};
90
91 /* return result registers */
92 /* rax */
93 int mr_res_ir[MR_MAX_IREG_RES] = {IR_RAX};
94 /* xmm0 */
95 int mr_res_xr[MR_MAX_XREG_RES] = {XR_XMM0};
96
97 #else
98
99 /* arguments passed in registers */
100 /* rdi,rsi,rdx,rcx,r8,r9 */
101 int mr_arg_ir[MR_MAX_IREG_ARGS] = {IR_RDI, IR_RSI, IR_RDX,
102 IR_RCX, IR_R8, IR_R9};
103 /* xmm0 --> xmm7 */
104 int mr_arg_xr[MR_MAX_XREG_ARGS] = {XR_XMM0, XR_XMM1, XR_XMM2, XR_XMM3,
105 XR_XMM4, XR_XMM5, XR_XMM6, XR_XMM7};
106
107 /* return result registers */
108 /* rax, rdx */
109 int mr_res_ir[MR_MAX_IREG_RES] = {IR_RAX, IR_RDX};
110 /* xmm0, xmm1 */
111 int mr_res_xr[MR_MAX_XREG_RES] = {XR_XMM0, XR_XMM1};
112
113 #endif
114
115 /** \brief Initialize mach_reg structs and reg array. This is done for each
116 * function (subprogram)
117 */
118 void
mr_init()119 mr_init()
120 {
121 int i;
122
123 aux.curr_entry->first_dr = reg[RATA_IR].mach_reg->first_global;
124 aux.curr_entry->first_sp = reg[RATA_SP].mach_reg->first_global;
125 aux.curr_entry->first_dp = reg[RATA_DP].mach_reg->first_global;
126 aux.curr_entry->first_ar = reg[RATA_AR].mach_reg->first_global;
127
128 for (i = 0; i < MR_UNIQ; i++) {
129 mach_reg[i].next_global = mach_reg[i].first_global;
130 mach_reg[i].nused = 0;
131 }
132
133 for (i = 0; i <= RATA_RTYPES_ACTIVE; i++) {
134 reg[i].nused = 0;
135 reg[i].rcand = 0;
136 }
137
138 /* for pic code, we need to reserve %ebx -- treat it like it
139 * has already been assigned. Since it is register #1, this
140 * is not too difficult.
141 */
142 if (XBIT(62, 8)) {
143 mach_reg[0].next_global++;
144 mach_reg[0].nused = 1;
145 reg[RATA_IR].nused = 1;
146 reg[RATA_AR].nused = 1;
147 }
148
149 }
150
151 static int
mr_isxmm(int rtype)152 mr_isxmm(int rtype)
153 {
154 #if DEBUG
155 assert((rtype == RATA_SP || rtype == RATA_DP || rtype == RATA_CSP ||
156 rtype == RATA_CDP),
157 "mr_isxmm bad rtype", rtype, ERR_Severe);
158 #endif
159 return (reg[rtype].mach_reg->Class == 'x');
160 }
161
162 void
mr_reset_numglobals(int reduce_by)163 mr_reset_numglobals(int reduce_by)
164 {
165 mach_reg[0].last_global = mach_reg[0].end_global - reduce_by;
166 }
167
168 void
mr_reset_frglobals()169 mr_reset_frglobals()
170 {
171 /* effectively turn off fp global regs. */
172 mach_reg[1].last_global = mach_reg[1].first_global - 1;
173 mach_reg[2].last_global = mach_reg[2].first_global - 1;
174 }
175
176 /** \brief get a global register for a given register type (RATA_IR, etc.).
177 * NOTE that the global registers are allocated in increasing order.
178 * next_global locates the next available global register. The range
179 * of global register values is from first_global to last_global,
180 * inclusive.
181 */
182 int
mr_getreg(int rtype)183 mr_getreg(int rtype)
184 {
185 int rg;
186
187 rg = _mr_getreg(rtype);
188
189 return rg;
190 }
191
192 static int
_mr_getreg(int rtype)193 _mr_getreg(int rtype)
194 {
195 register MACH_REG *mr;
196
197 if (reg[rtype].nused >= reg[rtype].max)
198 return NO_REG;
199
200 mr = reg[rtype].mach_reg;
201 if (mr->next_global > mr->last_global)
202 return NO_REG;
203
204 if (BIH_SMOVE(gbl.entbih) && mr->next_global > 1)
205 return NO_REG;
206
207 /* currently, only allow more than one floating point
208 * global register if an xbit is set.
209 */
210 if ((rtype == RATA_SP || rtype == RATA_DP || rtype == RATA_CSP ||
211 rtype == RATA_CDP) &&
212 (!XBIT(4, 0x4) || ratb.mexits))
213 if (mr->next_global > mr->first_global)
214 return NO_REG;
215
216 /* floating point globals need to always start from fp2 (fp1 is
217 * by convention where the return value of fp functions is placed)
218 * and then increment for each inner loop being processed. Thus,
219 * the nused field records the largest number of fp registers
220 * assigned to any loop. This is done differently for the I386
221 * fp as opposed to the I386 integers or any other register set
222 * (due primarily to the fact that the fp registers on x86 are
223 * actually a stack).
224 */
225 if ((rtype != RATA_SP && rtype != RATA_DP && rtype != RATA_CSP &&
226 rtype != RATA_CDP) ||
227 (mr->next_global - mr->first_global + 1 > mr->nused)) {
228 reg[rtype].nused++;
229 mr->nused++;
230 }
231 return (mr->next_global++);
232 }
233
234 /** \brief map a register type and global register number to an index value in
235 * the range 0 .. MR_NUMGLB-1, taking into consideration that certain
236 * register types map to the same machine register set.
237 *
238 * This is used by * the optimizer to index into its register history table.
239 */
240 int
mr_gindex(int rtype,int regno)241 mr_gindex(int rtype, int regno)
242 {
243 MACH_REG *mr = reg[rtype].mach_reg;
244 return ((regno - mr->first_global) + mr->mapbase);
245 }
246
247 /** \brief communicate to the scheduler the first global register not assigned
248 * for each register class
249 *
250 * Note that this will be the physical register
251 * number; it reflects the number of registers assigned from the physical
252 * set mapped from the generic register set. Because two or more generic
253 * register sets can map to a single register set, this information
254 * can only be computed after all of the assignments are done.
255 *
256 */
257 void
mr_end()258 mr_end()
259 {
260 aux.curr_entry->first_dr += reg[RATA_IR].mach_reg->nused;
261 aux.curr_entry->first_ar += reg[RATA_AR].mach_reg->nused;
262 aux.curr_entry->first_sp += reg[RATA_SP].mach_reg->nused;
263 aux.curr_entry->first_dp += reg[RATA_DP].mach_reg->nused;
264
265 }
266
267 void
mr_reset_fpregs()268 static mr_reset_fpregs()
269 {
270 mach_reg[1].next_global = mach_reg[1].first_global;
271 mach_reg[2].next_global = mach_reg[2].first_global;
272 }
273
274 /** \brief Initialize for scanning the entire machine register set used for
275 * rtype.
276 *
277 * This mechanism for retrieving registers is done when we can no longer
278 * retrieve registers from mr_getreg (we're out of rtype registers).
279 * Ensuing calls to mr_getnext will attempt to retrieve a register
280 * from the set. The assumption is that the caller (optimizer)
281 * will first call mr_reset, and then call mr_getnext one or more
282 * times.
283 */
284 void
mr_reset(int rtype)285 mr_reset(int rtype)
286 {
287 getnext_reg = reg[rtype].mach_reg->first_global;
288
289 /* if we are generating pic code, we must exclude %ebx as
290 * a potential register.
291 */
292 if ((rtype == RATA_IR || rtype == RATA_AR || rtype == RATA_KR) && XBIT(62, 8))
293 getnext_reg++;
294
295 }
296
297 /** \func Attempt to retrieve the next available register from the set used
298 * for rtype.
299 *
300 * If one is found, it may be necessary to update the
301 * mach_reg info since we're scanning the entire set. mr_getreg uses a
302 * portion of the set (as defined by the reg structure); things could
303 * get out of sync when registers of different rtypes share the same
304 * register set.
305 */
306
307 int
mr_getnext(int rtype)308 mr_getnext(int rtype)
309 {
310 int rg;
311
312 rg = _mr_getnext(rtype);
313 return rg;
314 }
315
316 static
_mr_getnext(int rtype)317 int _mr_getnext(int rtype)
318 {
319 int mreg;
320 MACH_REG *mr;
321
322 mr = reg[rtype].mach_reg;
323 if (getnext_reg > mr->last_global)
324 return NO_REG;
325 if (BIH_SMOVE(gbl.entbih) && mr->next_global > 1)
326 return NO_REG;
327
328 if ((rtype == RATA_SP || rtype == RATA_DP || rtype == RATA_CSP ||
329 rtype == RATA_CDP) &&
330 (!XBIT(4, 0x4) || ratb.mexits))
331 if (getnext_reg > mr->first_global)
332 return NO_REG;
333
334 mreg = getnext_reg;
335 getnext_reg++;
336 if (mreg >= mr->next_global) {
337 mr_restore = true;
338 mr_restore_nused = mr->nused;
339 mr_restore_next_global = mr->next_global;
340 /* same comment as in _mr_getreg */
341 if ((rtype != RATA_SP && rtype != RATA_DP && rtype != RATA_CSP &&
342 rtype != RATA_CDP) ||
343 ((mr->next_global - mr->first_global + 1) > mr->nused))
344 mr->nused++;
345 mr->next_global = getnext_reg;
346 }
347 return mreg;
348 }
349
350 /* RGSET functions */
351 static void
mr_init_rgset()352 mr_init_rgset()
353 {
354 RGSET tmp;
355 int bihx;
356
357 /* just verify that regs all fit in RGSET fields. (+1 below is because
358 * current RGSET macro's assume regs start at 1, position 0 in bitfields
359 * is wasted. TST_ and SET_ macros could be changed along with these
360 * asserts to save the bit.
361 */
362 assert(sizeof(tmp.xr) * 8 >= mach_reg[2].max + 1, "RGSET xr ops invalid", 0,
363 ERR_Severe);
364
365 rgsetb.stg_avail = 1;
366
367 /* make sure BIH_RGSET fields are fresh. */
368 bihx = gbl.entbih;
369 for (;;) {
370 BIH_RGSET(bihx) = 0;
371 if (BIH_LAST(bihx))
372 break;
373 bihx = BIH_NEXT(bihx);
374 }
375 }
376
377 /** \brief allocate and initialize a RGSET entry. */
378 int
mr_get_rgset()379 mr_get_rgset()
380 {
381 int rgset;
382
383 rgset = rgsetb.stg_avail++;
384 if (rgsetb.stg_avail > MAXRAT)
385 error((error_code_t)7, ERR_Fatal, 0, CNULL, CNULL);
386 NEED(rgsetb.stg_avail, rgsetb.stg_base, RGSET, rgsetb.stg_size,
387 rgsetb.stg_size + 100);
388 if (rgsetb.stg_base == NULL)
389 error((error_code_t)7, ERR_Fatal, 0, CNULL, CNULL);
390
391 RGSET_XR(rgset) = 0;
392
393 return rgset;
394 }
395
396 static void
mr_dmp_rgset(int rgseti)397 mr_dmp_rgset(int rgseti)
398 {
399 int i;
400 int cnt = 0;
401
402 fprintf(gbl.dbgfil, "rgset %d:", rgseti);
403 if (rgseti == 0) {
404 fprintf(gbl.dbgfil, " null");
405 assert(RGSET_XR(0) == 0, "mr_dmp_rgset says someone was writing 0", 0, ERR_Severe);
406 }
407 for (i = XR_FIRST; i <= XR_LAST; i++) {
408 if (TST_RGSET_XR(rgseti, i)) {
409 fprintf(gbl.dbgfil, " xmm%d", i);
410 cnt++;
411 }
412 }
413 fprintf(gbl.dbgfil, " total %d\n", cnt);
414 }
415
416 /* called from flow.c to tell globalreg, and scheduler which
417 xmm regs are used by the vectorizer.
418 */
419 static void
mr_bset_xmm_rgset(int ili,int bih)420 mr_bset_xmm_rgset(int ili, int bih)
421 {
422 int j, opn;
423 ILI_OP opc;
424 int noprs;
425
426 if (BIH_RGSET(bih) == 0) {
427 BIH_RGSET(bih) = mr_get_rgset();
428 }
429
430 opc = ILI_OPC(ili);
431 noprs = ilis[opc].oprs;
432 for (j = 1; j <= noprs; j++) {
433 opn = ILI_OPND(ili, j);
434 switch (IL_OPRFLAG(opc, j)) {
435 case ILIO_XMM:
436 assert(opn >= XR_FIRST && opn <= XR_LAST,
437 "mr_bset_xmm_rgset: bad xmm register value", ili, ERR_Warning);
438 SET_RGSET_XR(BIH_RGSET(bih), opn);
439 break;
440 default:
441 break;
442 }
443 }
444 }
445