1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44
45
46 /* TODO 27 Oct 04:
47
48 We can possibly do V-V coalescing even when the src is spilled,
49 providing we can arrange for the dst to have the same spill slot.
50
51 Note that state[].hreg is the same as the available real regs.
52
53 Generally rationalise data structures. */
54
55
56 /* Records information on virtual register live ranges. Computed once
57 and remains unchanged after that. */
58 typedef
59 struct {
60 /* Becomes live for the first time after this insn ... */
61 Short live_after;
62 /* Becomes dead for the last time before this insn ... */
63 Short dead_before;
64 /* The "home" spill slot, if needed. Never changes. */
65 Short spill_offset;
66 Short spill_size;
67 /* What kind of register this is. */
68 HRegClass reg_class;
69 }
70 VRegLR;
71
72
73 /* Records information on real-register live ranges. Computed once
74 and remains unchanged after that. */
75 typedef
76 struct {
77 HReg rreg;
78 /* Becomes live after this insn ... */
79 Short live_after;
80 /* Becomes dead before this insn ... */
81 Short dead_before;
82 }
83 RRegLR;
84
85
86 /* An array of the following structs (rreg_state) comprises the
87 running state of the allocator. It indicates what the current
88 disposition of each allocatable real register is. The array gets
89 updated as the allocator processes instructions. The identity of
90 the register is not recorded here, because the index of this
91 structure in doRegisterAllocation()'s |rreg_state| is the index
92 number of the register, and the register itself can be extracted
93 from the RRegUniverse supplied to doRegisterAllocation(). */
94 typedef
95 struct {
96 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
97 /* Is this involved in any HLRs? (only an optimisation hint) */
98 Bool has_hlrs;
99 /* ------ FIELDS WHICH DO CHANGE ------ */
100 /* 6 May 07: rearranged fields below so the whole struct fits
101 into 16 bytes on both x86 and amd64. */
102 /* Used when .disp == Bound and we are looking for vregs to
103 spill. */
104 Bool is_spill_cand;
105 /* Optimisation: used when .disp == Bound. Indicates when the
106 rreg has the same value as the spill slot for the associated
107 vreg. Is safely left at False, and becomes True after a
108 spill store or reload for this rreg. */
109 Bool eq_spill_slot;
110 /* What's it's current disposition? */
111 enum { Free, /* available for use */
112 Unavail, /* in a real-reg live range */
113 Bound /* in use (holding value of some vreg) */
114 }
115 disp;
116 /* If .disp == Bound, what vreg is it bound to? */
117 HReg vreg;
118 }
119 RRegState;
120
121
122 /* The allocator also maintains a redundant array of indexes
123 (vreg_state) from vreg numbers back to entries in rreg_state. It
124 is redundant because iff vreg_state[i] == j then
125 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126 point at each other. The purpose of this is to speed up activities
127 which involve looking for a particular vreg: there is no need to
128 scan the rreg_state looking for it, just index directly into
129 vreg_state. The FAQ "does this vreg already have an associated
130 rreg" is the main beneficiary.
131
132 To indicate, in vreg_state[i], that a given vreg is not currently
133 associated with any rreg, that entry can be set to INVALID_RREG_NO.
134
135 Because the vreg_state entries are signed Shorts, the max number
136 of vregs that can be handed by regalloc is 32767.
137 */
138
139 #define INVALID_RREG_NO ((Short)(-1))
140
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143
144
145 /* Search forward from some given point in the incoming instruction
146 sequence. Point is to select a virtual register to spill, by
147 finding the vreg which is mentioned as far ahead as possible, in
148 the hope that this will minimise the number of consequent reloads.
149
150 Only do the search for vregs which are Bound in the running state,
151 and for which the .is_spill_cand field is set. This allows the
152 caller to arbitrarily restrict the set of spill candidates to be
153 considered.
154
155 To do this we don't actually need to see the incoming instruction
156 stream. Rather, what we need us the HRegUsage records for the
157 incoming instruction stream. Hence that is passed in.
158
159 Returns an index into the state array indicating the (v,r) pair to
160 spill, or -1 if none was found. */
161 static
findMostDistantlyMentionedVReg(HRegUsage * reg_usages_in,Int search_from_instr,Int num_instrs,RRegState * rreg_state,HRegClass hreg_class,const RegAllocControl * con)162 Int findMostDistantlyMentionedVReg (
163 HRegUsage* reg_usages_in,
164 Int search_from_instr,
165 Int num_instrs,
166 RRegState* rreg_state,
167 HRegClass hreg_class,
168 const RegAllocControl* con
169 )
170 {
171 Int m;
172 Int furthest_k = -1;
173 Int furthest = -1;
174 vassert(search_from_instr >= 0);
175 for (UInt k = con->univ->allocable_start[hreg_class];
176 k <= con->univ->allocable_end[hreg_class]; k++) {
177 if (!rreg_state[k].is_spill_cand)
178 continue;
179 vassert(rreg_state[k].disp == Bound);
180 for (m = search_from_instr; m < num_instrs; m++) {
181 if (HRegUsage__contains(®_usages_in[m], rreg_state[k].vreg))
182 break;
183 }
184 if (m > furthest) {
185 furthest = m;
186 furthest_k = k;
187 }
188 }
189 return furthest_k;
190 }
191
192
193 /* Check that this vreg has been assigned a sane spill offset. */
194 inline
sanity_check_spill_offset(VRegLR * vreg)195 static void sanity_check_spill_offset ( VRegLR* vreg )
196 {
197 switch (vreg->reg_class) {
198 case HRcVec128: case HRcFlt64:
199 vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
200 default:
201 vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
202 }
203 }
204
205
206 /* Double the size of the real-reg live-range array, if needed. */
207 __attribute__((noinline))
ensureRRLRspace_SLOW(RRegLR ** info,Int * size,Int used)208 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
209 {
210 Int k;
211 RRegLR* arr2;
212 if (0)
213 vex_printf("ensureRRLRspace: %d -> %d\n", *size, 2 * *size);
214 vassert(used == *size);
215 arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
216 for (k = 0; k < *size; k++)
217 arr2[k] = (*info)[k];
218 *size *= 2;
219 *info = arr2;
220 }
221 inline
ensureRRLRspace(RRegLR ** info,Int * size,Int used)222 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223 {
224 if (LIKELY(used < *size)) return;
225 ensureRRLRspace_SLOW(info, size, used);
226 }
227
228
229 /* Sort an array of RRegLR entries by either the .live_after or
230 .dead_before fields. This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)231 static void sortRRLRarray ( RRegLR* arr,
232 Int size, Bool by_live_after )
233 {
234 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
235 9841, 29524, 88573, 265720,
236 797161, 2391484 };
237 Int lo = 0;
238 Int hi = size-1;
239 Int i, j, h, bigN, hp;
240 RRegLR v;
241
242 vassert(size >= 0);
243 if (size == 0)
244 return;
245
246 bigN = hi - lo + 1; if (bigN < 2) return;
247 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248
249 if (by_live_after) {
250
251 for ( ; hp >= 0; hp--) {
252 h = incs[hp];
253 for (i = lo + h; i <= hi; i++) {
254 v = arr[i];
255 j = i;
256 while (arr[j-h].live_after > v.live_after) {
257 arr[j] = arr[j-h];
258 j = j - h;
259 if (j <= (lo + h - 1)) break;
260 }
261 arr[j] = v;
262 }
263 }
264
265 } else {
266
267 for ( ; hp >= 0; hp--) {
268 h = incs[hp];
269 for (i = lo + h; i <= hi; i++) {
270 v = arr[i];
271 j = i;
272 while (arr[j-h].dead_before > v.dead_before) {
273 arr[j] = arr[j-h];
274 j = j - h;
275 if (j <= (lo + h - 1)) break;
276 }
277 arr[j] = v;
278 }
279 }
280
281 }
282 }
283
284
285 /* Compute the index of the highest and lowest 1 in a ULong,
286 respectively. Results are undefined if the argument is zero.
287 Don't pass it zero :) */
ULong__maxIndex(ULong w64)288 static inline UInt ULong__maxIndex ( ULong w64 ) {
289 return 63 - __builtin_clzll(w64);
290 }
291
ULong__minIndex(ULong w64)292 static inline UInt ULong__minIndex ( ULong w64 ) {
293 return __builtin_ctzll(w64);
294 }
295
296
297 /* A target-independent register allocator. Requires various
298 functions which it uses to deal abstractly with instructions and
299 registers, since it cannot have any target-specific knowledge.
300
301 Returns a new list of instructions, which, as a result of the
302 behaviour of mapRegs, will be in-place modifications of the
303 original instructions.
304
305 Requires that the incoming code has been generated using
306 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
307 that range is a checked run-time error.
308
309 Takes an expandable array of pointers to unallocated insns.
310 Returns an expandable array of pointers to allocated insns.
311 */
doRegisterAllocation_v2(HInstrArray * instrs_in,const RegAllocControl * con)312 HInstrArray* doRegisterAllocation_v2 (
313
314 /* Incoming virtual-registerised code. */
315 HInstrArray* instrs_in,
316
317 /* Register allocator controls to use. */
318 const RegAllocControl* con
319 )
320 {
321 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
322
323 const Bool eq_spill_opt = True;
324
325 /* Info on vregs and rregs. Computed once and remains
326 unchanged. */
327 Int n_vregs;
328 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
329
330 /* We keep two copies of the real-reg live range info, one sorted
331 by .live_after and the other by .dead_before. First the
332 unsorted info is created in the _la variant is copied into the
333 _db variant. Once that's done both of them are sorted.
334 We also need two integer cursors which record the next
335 location in the two arrays to consider. */
336 RRegLR* rreg_lrs_la;
337 RRegLR* rreg_lrs_db;
338 Int rreg_lrs_size;
339 Int rreg_lrs_used;
340 Int rreg_lrs_la_next;
341 Int rreg_lrs_db_next;
342
343 /* Info on register usage in the incoming instruction array.
344 Computed once and remains unchanged, more or less; updated
345 sometimes by the direct-reload optimisation. */
346 HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
347
348 /* Used when constructing vreg_lrs (for allocating stack
349 slots). */
350 Short ss_busy_until_before[N_SPILL64S];
351
352 /* Used when constructing rreg_lrs. */
353 Int* rreg_live_after;
354 Int* rreg_dead_before;
355
356 /* Running state of the core allocation algorithm. */
357 RRegState* rreg_state; /* [0 .. n_rregs-1] */
358 Int n_rregs;
359
360 /* .. and the redundant backward map */
361 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
362 This implies n_rregs must be <= 32768. */
363 Short* vreg_state; /* [0 .. n_vregs-1] */
364
365 /* The vreg -> rreg map constructed and then applied to each
366 instr. */
367 HRegRemap remap;
368
369 /* The output array of instructions. */
370 HInstrArray* instrs_out;
371
372 /* Sanity checks are expensive. They are only done periodically,
373 not at each insn processed. */
374 Bool do_sanity_check;
375
376 vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
377 vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
378 vassert(0 == (N_SPILL64S % 2));
379
380 /* The live range numbers are signed shorts, and so limiting the
381 number of insns to 15000 comfortably guards against them
382 overflowing 32k. */
383 vassert(instrs_in->arr_used <= 15000);
384
385 # define INVALID_INSTRNO (-2)
386
387 # define EMIT_INSTR(_instr) \
388 do { \
389 HInstr* _tmp = (_instr); \
390 if (DEBUG_REGALLOC) { \
391 vex_printf("** "); \
392 con->ppInstr(_tmp, con->mode64); \
393 vex_printf("\n\n"); \
394 } \
395 addHInstr ( instrs_out, _tmp ); \
396 } while (0)
397
398 # define PRINT_STATE \
399 do { \
400 Int z, q; \
401 for (z = 0; z < n_rregs; z++) { \
402 vex_printf(" rreg_state[%2d] = ", z); \
403 con->ppReg(con->univ->regs[z]); \
404 vex_printf(" \t"); \
405 switch (rreg_state[z].disp) { \
406 case Free: vex_printf("Free\n"); break; \
407 case Unavail: vex_printf("Unavail\n"); break; \
408 case Bound: vex_printf("BoundTo "); \
409 con->ppReg(rreg_state[z].vreg); \
410 vex_printf("\n"); break; \
411 } \
412 } \
413 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
414 q = 0; \
415 for (z = 0; z < n_vregs; z++) { \
416 if (vreg_state[z] == INVALID_RREG_NO) \
417 continue; \
418 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
419 q++; \
420 if (q > 0 && (q % 6) == 0) \
421 vex_printf("\n "); \
422 } \
423 vex_printf("\n"); \
424 } while (0)
425
426
427 /* --------- Stage 0: set up output array --------- */
428 /* --------- and allocate/initialise running state. --------- */
429
430 instrs_out = newHInstrArray();
431
432 /* ... and initialise running state. */
433 /* n_rregs is no more than a short name for n_available_real_regs. */
434 n_rregs = con->univ->allocable;
435 n_vregs = instrs_in->n_vregs;
436
437 /* If this is not so, vreg_state entries will overflow. */
438 vassert(n_vregs < 32767);
439
440 /* If this is not so, the universe we have is nonsensical. */
441 vassert(n_rregs > 0);
442
443 rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
444 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
445
446 for (Int j = 0; j < n_rregs; j++) {
447 rreg_state[j].has_hlrs = False;
448 rreg_state[j].disp = Free;
449 rreg_state[j].vreg = INVALID_HREG;
450 rreg_state[j].is_spill_cand = False;
451 rreg_state[j].eq_spill_slot = False;
452 }
453
454 for (Int j = 0; j < n_vregs; j++)
455 vreg_state[j] = INVALID_RREG_NO;
456
457
458 /* --------- Stage 1: compute vreg live ranges. --------- */
459 /* --------- Stage 2: compute rreg live ranges. --------- */
460
461 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
462
463 /* This is relatively simple, because (1) we only seek the complete
464 end-to-end live range of each vreg, and are not interested in
465 any holes in it, and (2) the vregs are conveniently numbered 0
466 .. n_vregs-1, so we can just dump the results in a
467 pre-allocated array. */
468
469 vreg_lrs = NULL;
470 if (n_vregs > 0)
471 vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
472
473 for (Int j = 0; j < n_vregs; j++) {
474 vreg_lrs[j].live_after = INVALID_INSTRNO;
475 vreg_lrs[j].dead_before = INVALID_INSTRNO;
476 vreg_lrs[j].spill_offset = 0;
477 vreg_lrs[j].spill_size = 0;
478 vreg_lrs[j].reg_class = HRcINVALID;
479 }
480
481 /* An array to hold the reg-usage info for the incoming
482 instructions. */
483 reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
484
485 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486
487 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
488
489 /* This is more complex than Stage 1, because we need to compute
490 exactly all the live ranges of all the allocatable real regs,
491 and we don't know in advance how many there will be. */
492
493 rreg_lrs_used = 0;
494 rreg_lrs_size = 4;
495 rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
496 rreg_lrs_db = NULL; /* we'll create this later */
497
498 /* We'll need to track live range start/end points seperately for
499 each rreg. Sigh. */
500 vassert(n_rregs > 0);
501 rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
502 rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
503
504 for (Int j = 0; j < n_rregs; j++) {
505 rreg_live_after[j] =
506 rreg_dead_before[j] = INVALID_INSTRNO;
507 }
508
509 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
510
511 /* ------ start of ITERATE OVER INSNS ------ */
512
513 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
514
515 con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64);
516 reg_usage_arr[ii].isVregVregMove
517 = reg_usage_arr[ii].isRegRegMove
518 && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
519 && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
520
521 if (0) {
522 vex_printf("\n%d stage1: ", ii);
523 con->ppInstr(instrs_in->arr[ii], con->mode64);
524 vex_printf("\n");
525 ppHRegUsage(con->univ, ®_usage_arr[ii]);
526 }
527
528 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
529
530 /* for each virtual reg mentioned in the insn ... */
531 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
532
533 HReg vreg = reg_usage_arr[ii].vRegs[j];
534 vassert(hregIsVirtual(vreg));
535
536 Int k = hregIndex(vreg);
537 if (k < 0 || k >= n_vregs) {
538 vex_printf("\n");
539 con->ppInstr(instrs_in->arr[ii], con->mode64);
540 vex_printf("\n");
541 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
542 vpanic("doRegisterAllocation: out-of-range vreg");
543 }
544
545 /* Take the opportunity to note its regclass. We'll need
546 that when allocating spill slots. */
547 if (vreg_lrs[k].reg_class == HRcINVALID) {
548 /* First mention of this vreg. */
549 vreg_lrs[k].reg_class = hregClass(vreg);
550 } else {
551 /* Seen it before, so check for consistency. */
552 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
553 }
554
555 /* Now consider live ranges. */
556 switch (reg_usage_arr[ii].vMode[j]) {
557 case HRmRead:
558 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
559 vex_printf("\n\nOFFENDING VREG = %d\n", k);
560 vpanic("doRegisterAllocation: "
561 "first event for vreg is Read");
562 }
563 vreg_lrs[k].dead_before = toShort(ii + 1);
564 break;
565 case HRmWrite:
566 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
567 vreg_lrs[k].live_after = toShort(ii);
568 vreg_lrs[k].dead_before = toShort(ii + 1);
569 break;
570 case HRmModify:
571 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
572 vex_printf("\n\nOFFENDING VREG = %d\n", k);
573 vpanic("doRegisterAllocation: "
574 "first event for vreg is Modify");
575 }
576 vreg_lrs[k].dead_before = toShort(ii + 1);
577 break;
578 default:
579 vpanic("doRegisterAllocation(1)");
580 } /* switch */
581
582 } /* iterate over virtual registers */
583
584 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
585
586 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
587
588 /* If this doesn't hold, the following iteration over real registers
589 will fail miserably. */
590 vassert(N_RREGUNIVERSE_REGS == 64);
591
592 const ULong rRead = reg_usage_arr[ii].rRead;
593 const ULong rWritten = reg_usage_arr[ii].rWritten;
594 const ULong rMentioned = rRead | rWritten;
595
596 UInt rReg_minIndex;
597 UInt rReg_maxIndex;
598 if (rMentioned == 0) {
599 /* There are no real register uses in this insn. Set
600 rReg_{min,max}Index so that the following loop doesn't iterate
601 at all, so as to avoid wasting time. */
602 rReg_minIndex = 1;
603 rReg_maxIndex = 0;
604 } else {
605 rReg_minIndex = ULong__minIndex(rMentioned);
606 rReg_maxIndex = ULong__maxIndex(rMentioned);
607 /* Don't bother to look at registers which are not available
608 to the allocator. We asserted above that n_rregs > 0, so
609 n_rregs-1 is safe. */
610 if (rReg_maxIndex >= n_rregs)
611 rReg_maxIndex = n_rregs-1;
612 }
613
614 /* for each allocator-available real reg mentioned in the insn ... */
615 /* Note. We are allocating only over the real regs available to
616 the allocator. Others, eg the stack or baseblock pointers,
617 are unavailable to allocation and so we never visit them.
618 Hence the iteration is cut off at n_rregs-1, since n_rregs ==
619 univ->allocable. */
620 for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
621
622 const ULong jMask = 1ULL << j;
623 if (LIKELY((rMentioned & jMask) == 0))
624 continue;
625
626 const Bool isR = (rRead & jMask) != 0;
627 const Bool isW = (rWritten & jMask) != 0;
628
629 /* Dummy initialisations of flush_la and flush_db to avoid
630 possible bogus uninit-var warnings from gcc. */
631 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
632 Bool flush = False;
633
634 if (isW && !isR) {
635 flush_la = rreg_live_after[j];
636 flush_db = rreg_dead_before[j];
637 if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
638 flush = True;
639 rreg_live_after[j] = ii;
640 rreg_dead_before[j] = ii+1;
641 } else if (!isW && isR) {
642 if (rreg_live_after[j] == INVALID_INSTRNO) {
643 vex_printf("\nOFFENDING RREG = ");
644 con->ppReg(con->univ->regs[j]);
645 vex_printf("\n");
646 vex_printf("\nOFFENDING instr = ");
647 con->ppInstr(instrs_in->arr[ii], con->mode64);
648 vex_printf("\n");
649 vpanic("doRegisterAllocation: "
650 "first event for rreg is Read");
651 }
652 rreg_dead_before[j] = ii+1;
653 } else {
654 vassert(isR && isW);
655 if (rreg_live_after[j] == INVALID_INSTRNO) {
656 vex_printf("\nOFFENDING RREG = ");
657 con->ppReg(con->univ->regs[j]);
658 vex_printf("\n");
659 vex_printf("\nOFFENDING instr = ");
660 con->ppInstr(instrs_in->arr[ii], con->mode64);
661 vex_printf("\n");
662 vpanic("doRegisterAllocation: "
663 "first event for rreg is Modify");
664 }
665 rreg_dead_before[j] = ii+1;
666 }
667
668 if (flush) {
669 vassert(flush_la != INVALID_INSTRNO);
670 vassert(flush_db != INVALID_INSTRNO);
671 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
672 if (0)
673 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
674 rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
675 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
676 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
677 rreg_lrs_used++;
678 }
679
680 } /* iterate over rregs in the instr */
681
682 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
683
684 } /* iterate over insns */
685
686 /* ------ end of ITERATE OVER INSNS ------ */
687
688 /* ------ start of FINALISE RREG LIVE RANGES ------ */
689
690 /* Now finish up any live ranges left over. */
691 for (Int j = 0; j < n_rregs; j++) {
692
693 if (0) {
694 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
695 rreg_dead_before[j]);
696 }
697 vassert( (rreg_live_after[j] == INVALID_INSTRNO
698 && rreg_dead_before[j] == INVALID_INSTRNO)
699 ||
700 (rreg_live_after[j] != INVALID_INSTRNO
701 && rreg_dead_before[j] != INVALID_INSTRNO)
702 );
703
704 if (rreg_live_after[j] == INVALID_INSTRNO)
705 continue;
706
707 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
708 if (0)
709 vex_printf("FLUSH 2 (%d,%d)\n",
710 rreg_live_after[j], rreg_dead_before[j]);
711 rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
712 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
713 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
714 rreg_lrs_used++;
715 }
716
717 /* Compute summary hints for choosing real regs. If a real reg is
718 involved in a hard live range, record that fact in the fixed
719 part of the running rreg_state. Later, when offered a choice between
720 rregs, it's better to choose one which is not marked as having
721 any HLRs, since ones with HLRs may need to be spilled around
722 their HLRs. Correctness of final assignment is unaffected by
723 this mechanism -- it is only an optimisation. */
724
725 for (Int j = 0; j < rreg_lrs_used; j++) {
726 HReg rreg = rreg_lrs_la[j].rreg;
727 vassert(!hregIsVirtual(rreg));
728 /* rreg is involved in a HLR. Record this info in the array, if
729 there is space. */
730 UInt ix = hregIndex(rreg);
731 vassert(ix < n_rregs);
732 rreg_state[ix].has_hlrs = True;
733 }
734 if (0) {
735 for (Int j = 0; j < n_rregs; j++) {
736 if (!rreg_state[j].has_hlrs)
737 continue;
738 con->ppReg(con->univ->regs[j]);
739 vex_printf(" hinted\n");
740 }
741 }
742
743 /* Finally, copy the _la variant into the _db variant and
744 sort both by their respective fields. */
745 rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
746 for (Int j = 0; j < rreg_lrs_used; j++)
747 rreg_lrs_db[j] = rreg_lrs_la[j];
748
749 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
750 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
751
752 /* And set up the cursors. */
753 rreg_lrs_la_next = 0;
754 rreg_lrs_db_next = 0;
755
756 for (Int j = 1; j < rreg_lrs_used; j++) {
757 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
758 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
759 }
760
761 /* ------ end of FINALISE RREG LIVE RANGES ------ */
762
763 if (DEBUG_REGALLOC) {
764 for (Int j = 0; j < n_vregs; j++) {
765 vex_printf("vreg %d: la = %d, db = %d\n",
766 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
767 }
768 }
769
770 if (DEBUG_REGALLOC) {
771 vex_printf("RRegLRs by LA:\n");
772 for (Int j = 0; j < rreg_lrs_used; j++) {
773 vex_printf(" ");
774 con->ppReg(rreg_lrs_la[j].rreg);
775 vex_printf(" la = %d, db = %d\n",
776 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
777 }
778 vex_printf("RRegLRs by DB:\n");
779 for (Int j = 0; j < rreg_lrs_used; j++) {
780 vex_printf(" ");
781 con->ppReg(rreg_lrs_db[j].rreg);
782 vex_printf(" la = %d, db = %d\n",
783 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
784 }
785 }
786
787 /* --------- Stage 3: allocate spill slots. --------- */
788
789 /* Each spill slot is 8 bytes long. For vregs which take more than
790 64 bits to spill (classes Flt64 and Vec128), we have to allocate
791 two consecutive spill slots. For 256 bit registers (class
792 Vec256), we have to allocate four consecutive spill slots.
793
794 For Vec128-class on PowerPC, the spill slot's actual address
795 must be 16-byte aligned. Since the spill slot's address is
796 computed as an offset from the guest state pointer, and since
797 the user of the generated code must set that pointer to a
798 32-aligned value, we have the residual obligation here of
799 choosing a 16-aligned spill slot offset for Vec128-class values.
800 Since each spill slot is 8 bytes long, that means for
801 Vec128-class values we must allocated a spill slot number which
802 is zero mod 2.
803
804 Similarly, for Vec256 class on amd64, find a spill slot number
805 which is zero mod 4. This guarantees it will be 32 byte
806 aligned, which isn't actually necessary on amd64 (we use movUpd
807 etc to spill), but seems like good practice.
808
809 Do a rank-based allocation of vregs to spill slot numbers. We
810 put as few values as possible in spill slots, but nevertheless
811 need to have a spill slot available for all vregs, just in case.
812 */
813 /* Int max_ss_no = -1; */
814
815 vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before));
816
817 for (Int j = 0; j < n_vregs; j++) {
818
819 /* True iff this vreg is unused. In which case we also expect
820 that the reg_class field for it has not been set. */
821 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
822 vassert(vreg_lrs[j].reg_class == HRcINVALID);
823 continue;
824 }
825
826 /* The spill slots are 64 bits in size. As per the comment on
827 definition of HRegClass in host_generic_regs.h, that means,
828 to spill a vreg of class Flt64 or Vec128, we'll need to find
829 two adjacent spill slots to use. For Vec256, we'll need to
830 find four adjacent slots to use. Note, this logic needs to
831 kept in sync with the size info on the definition of
832 HRegClass. */
833 Int ss_no = -1;
834 switch (vreg_lrs[j].reg_class) {
835
836 case HRcVec128: case HRcFlt64:
837 /* Find two adjacent free slots in which between them
838 provide up to 128 bits in which to spill the vreg.
839 Since we are trying to find an even:odd pair, move
840 along in steps of 2 (slots). */
841 for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
842 if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
843 && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
844 break;
845 if (ss_no >= N_SPILL64S-1) {
846 vpanic("LibVEX_N_SPILL_BYTES is too low. "
847 "Increase and recompile.");
848 }
849 ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
850 ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
851 break;
852
853 default:
854 /* The ordinary case -- just find a single spill slot. */
855 /* Find the lowest-numbered spill slot which is available
856 at the start point of this interval, and assign the
857 interval to it. */
858 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
859 if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
860 break;
861 if (ss_no == N_SPILL64S) {
862 vpanic("LibVEX_N_SPILL_BYTES is too low. "
863 "Increase and recompile.");
864 }
865 ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
866 break;
867
868 } /* switch (vreg_lrs[j].reg_class) */
869
870 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
871 layout: the guest state, then two equal sized areas following
872 it for two sets of shadow state, and then the spill area. */
873 vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8);
874
875 /* Independent check that we've made a sane choice of slot */
876 sanity_check_spill_offset( &vreg_lrs[j] );
877 /* if (j > max_ss_no) */
878 /* max_ss_no = j; */
879 }
880
881 if (0) {
882 vex_printf("\n\n");
883 for (Int j = 0; j < n_vregs; j++)
884 vex_printf("vreg %d --> spill offset %d\n",
885 j, vreg_lrs[j].spill_offset);
886 }
887
888 /* --------- Stage 4: establish rreg preferences --------- */
889
890 /* It may be advantageous to allocating certain vregs to specific
891 rregs, as a way of avoiding reg-reg moves later. Here we
892 establish which, if any, rreg each vreg would prefer to be in.
893 Note that this constrains the allocator -- ideally we end up
894 with as few as possible vregs expressing a preference.
895
896 This is an optimisation: if the .preferred_rreg field is never
897 set to anything different from INVALID_HREG, the allocator still
898 works. */
899
900 /* 30 Dec 04: removed this mechanism as it does not seem to
901 help. */
902
903 /* --------- Stage 5: process instructions --------- */
904
905 /* This is the main loop of the allocator. First, we need to
906 correctly set up our running state, which tracks the status of
907 each real register. */
908
909 /* ------ BEGIN: Process each insn in turn. ------ */
910
911 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
912
913 if (DEBUG_REGALLOC) {
914 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
915 vex_printf("---- ");
916 con->ppInstr(instrs_in->arr[ii], con->mode64);
917 vex_printf("\n\nInitial state:\n");
918 PRINT_STATE;
919 vex_printf("\n");
920 }
921
922 /* ------------ Sanity checks ------------ */
923
924 /* Sanity checks are expensive. So they are done only once
925 every 17 instructions, and just before the last
926 instruction. */
927 do_sanity_check
928 = toBool(
929 False /* Set to True for sanity checking of all insns. */
930 || ii == instrs_in->arr_used-1
931 || (ii > 0 && (ii % 17) == 0)
932 );
933
934 if (do_sanity_check) {
935
936 /* Sanity check 1: all rregs with a hard live range crossing
937 this insn must be marked as unavailable in the running
938 state. */
939 for (Int j = 0; j < rreg_lrs_used; j++) {
940 if (rreg_lrs_la[j].live_after < ii
941 && ii < rreg_lrs_la[j].dead_before) {
942 /* ii is the middle of a hard live range for some real
943 reg. Check it's marked as such in the running
944 state. */
945 HReg reg = rreg_lrs_la[j].rreg;
946
947 if (0) {
948 vex_printf("considering la %d .. db %d reg = ",
949 rreg_lrs_la[j].live_after,
950 rreg_lrs_la[j].dead_before);
951 con->ppReg(reg);
952 vex_printf("\n");
953 }
954
955 /* assert that this rreg is marked as unavailable */
956 vassert(!hregIsVirtual(reg));
957 vassert(rreg_state[hregIndex(reg)].disp == Unavail);
958 }
959 }
960
961 /* Sanity check 2: conversely, all rregs marked as
962 unavailable in the running rreg_state must have a
963 corresponding hard live range entry in the rreg_lrs
964 array. */
965 for (Int j = 0; j < n_rregs; j++) {
966 vassert(rreg_state[j].disp == Bound
967 || rreg_state[j].disp == Free
968 || rreg_state[j].disp == Unavail);
969 if (rreg_state[j].disp != Unavail)
970 continue;
971 Int k;
972 for (k = 0; k < rreg_lrs_used; k++) {
973 HReg reg = rreg_lrs_la[k].rreg;
974 vassert(!hregIsVirtual(reg));
975 if (hregIndex(reg) == j
976 && rreg_lrs_la[k].live_after < ii
977 && ii < rreg_lrs_la[k].dead_before)
978 break;
979 }
980 /* If this vassertion fails, we couldn't find a
981 corresponding HLR. */
982 vassert(k < rreg_lrs_used);
983 }
984
985 /* Sanity check 3: all vreg-rreg bindings must bind registers
986 of the same class. */
987 for (Int j = 0; j < n_rregs; j++) {
988 if (rreg_state[j].disp != Bound) {
989 vassert(rreg_state[j].eq_spill_slot == False);
990 continue;
991 }
992 vassert(hregClass(con->univ->regs[j])
993 == hregClass(rreg_state[j].vreg));
994 vassert( hregIsVirtual(rreg_state[j].vreg));
995 }
996
997 /* Sanity check 4: the vreg_state and rreg_state
998 mutually-redundant mappings are consistent. If
999 rreg_state[j].vreg points at some vreg_state entry then
1000 that vreg_state entry should point back at
1001 rreg_state[j]. */
1002 for (Int j = 0; j < n_rregs; j++) {
1003 if (rreg_state[j].disp != Bound)
1004 continue;
1005 Int k = hregIndex(rreg_state[j].vreg);
1006 vassert(IS_VALID_VREGNO(k));
1007 vassert(vreg_state[k] == j);
1008 }
1009 for (Int j = 0; j < n_vregs; j++) {
1010 Int k = vreg_state[j];
1011 if (k == INVALID_RREG_NO)
1012 continue;
1013 vassert(IS_VALID_RREGNO(k));
1014 vassert(rreg_state[k].disp == Bound);
1015 vassert(hregIndex(rreg_state[k].vreg) == j);
1016 }
1017
1018 } /* if (do_sanity_check) */
1019
1020 /* ------------ end of Sanity checks ------------ */
1021
1022 /* Do various optimisations pertaining to register coalescing
1023 and preferencing:
1024 MOV v <-> v coalescing (done here).
1025 MOV v <-> r coalescing (not yet, if ever)
1026 */
1027 /* If doing a reg-reg move between two vregs, and the src's live
1028 range ends here and the dst's live range starts here, bind
1029 the dst to the src's rreg, and that's all. */
1030 if (reg_usage_arr[ii].isVregVregMove) {
1031 HReg vregS = reg_usage_arr[ii].regMoveSrc;
1032 HReg vregD = reg_usage_arr[ii].regMoveDst;
1033 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
1034 vassert(hregClass(vregS) == hregClass(vregD));
1035 Int k = hregIndex(vregS);
1036 Int m = hregIndex(vregD);
1037 vassert(IS_VALID_VREGNO(k));
1038 vassert(IS_VALID_VREGNO(m));
1039 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1040 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1041 if (DEBUG_REGALLOC) {
1042 vex_printf("COALESCE ");
1043 con->ppReg(vregS);
1044 vex_printf(" -> ");
1045 con->ppReg(vregD);
1046 vex_printf("\n\n");
1047 }
1048 /* Find the state entry for vregS. */
1049 Int n = vreg_state[k]; /* k is the index of vregS */
1050 if (n == INVALID_RREG_NO) {
1051 /* vregS is not currently in a real register. So we can't
1052 do the coalescing. Give up. */
1053 goto cannot_coalesce;
1054 }
1055 vassert(IS_VALID_RREGNO(n));
1056
1057 /* Finally, we can do the coalescing. It's trivial -- merely
1058 claim vregS's register for vregD. */
1059 rreg_state[n].vreg = vregD;
1060 vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1061 vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1062 vreg_state[hregIndex(vregD)] = toShort(n);
1063 vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1064
1065 /* This rreg has become associated with a different vreg and
1066 hence with a different spill slot. Play safe. */
1067 rreg_state[n].eq_spill_slot = False;
1068
1069 /* Move on to the next insn. We skip the post-insn stuff for
1070 fixed registers, since this move should not interact with
1071 them in any way. */
1072 continue;
1073 }
1074 cannot_coalesce:
1075
1076 /* ------ Free up rregs bound to dead vregs ------ */
1077
1078 /* Look for vregs whose live range has just ended, and
1079 mark the associated rreg as free. */
1080
1081 for (Int j = 0; j < n_rregs; j++) {
1082 if (rreg_state[j].disp != Bound)
1083 continue;
1084 UInt vregno = hregIndex(rreg_state[j].vreg);
1085 vassert(IS_VALID_VREGNO(vregno));
1086 if (vreg_lrs[vregno].dead_before <= ii) {
1087 rreg_state[j].disp = Free;
1088 rreg_state[j].eq_spill_slot = False;
1089 Int m = hregIndex(rreg_state[j].vreg);
1090 vassert(IS_VALID_VREGNO(m));
1091 vreg_state[m] = INVALID_RREG_NO;
1092 if (DEBUG_REGALLOC) {
1093 vex_printf("free up ");
1094 con->ppReg(con->univ->regs[j]);
1095 vex_printf("\n");
1096 }
1097 }
1098 }
1099
1100 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1101
1102 /* Now we have to deal with rregs which are about to be made
1103 live by this instruction -- in other words, are entering into
1104 one of their live ranges. If any such rreg holds a vreg, we
1105 will have to free up the rreg. The simplest solution which
1106 is correct is to spill the rreg.
1107
1108 Note we could do better:
1109 * Could move it into some other free rreg, if one is available
1110
1111 Do this efficiently, by incrementally stepping along an array
1112 of rreg HLRs that are known to be sorted by start point
1113 (their .live_after field).
1114 */
1115 while (True) {
1116 vassert(rreg_lrs_la_next >= 0);
1117 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1118 if (rreg_lrs_la_next == rreg_lrs_used)
1119 break; /* no more real reg live ranges to consider */
1120 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1121 break; /* next live range does not yet start */
1122 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1123 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1124 Find the associated rreg_state entry. */
1125 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1126 Real register live ranges are guaranteed to be well-formed
1127 in that they start with a write to the register -- Stage 2
1128 rejects any code not satisfying this. So the correct
1129 question to ask is whether
1130 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1131 whether the reg becomes live after this insn -- rather
1132 than before it. */
1133 if (DEBUG_REGALLOC) {
1134 vex_printf("need to free up rreg: ");
1135 con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg);
1136 vex_printf("\n\n");
1137 }
1138 Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1139
1140 /* If this fails, we don't have an entry for this rreg.
1141 Which we should. */
1142 vassert(IS_VALID_RREGNO(k));
1143 Int m = hregIndex(rreg_state[k].vreg);
1144 if (rreg_state[k].disp == Bound) {
1145 /* Yes, there is an associated vreg. Spill it if it's
1146 still live. */
1147 vassert(IS_VALID_VREGNO(m));
1148 vreg_state[m] = INVALID_RREG_NO;
1149 if (vreg_lrs[m].dead_before > ii) {
1150 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1151 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1152 HInstr* spill1 = NULL;
1153 HInstr* spill2 = NULL;
1154 con->genSpill(&spill1, &spill2, con->univ->regs[k],
1155 vreg_lrs[m].spill_offset, con->mode64);
1156 vassert(spill1 || spill2); /* can't both be NULL */
1157 if (spill1)
1158 EMIT_INSTR(spill1);
1159 if (spill2)
1160 EMIT_INSTR(spill2);
1161 }
1162 rreg_state[k].eq_spill_slot = True;
1163 }
1164 }
1165 rreg_state[k].disp = Unavail;
1166 rreg_state[k].vreg = INVALID_HREG;
1167 rreg_state[k].eq_spill_slot = False;
1168
1169 /* check for further rregs entering HLRs at this point */
1170 rreg_lrs_la_next++;
1171 }
1172
1173 if (DEBUG_REGALLOC) {
1174 vex_printf("After pre-insn actions for fixed regs:\n");
1175 PRINT_STATE;
1176 vex_printf("\n");
1177 }
1178
1179 /* ------ Deal with the current instruction. ------ */
1180
1181 /* Finally we can begin the processing of this instruction
1182 itself. The aim is to free up enough rregs for this insn.
1183 This may generate spill stores since we may have to evict
1184 some vregs currently in rregs. Also generates spill loads.
1185 We also build up the final vreg->rreg mapping to be applied
1186 to the insn. */
1187
1188 initHRegRemap(&remap);
1189
1190 /* ------------ BEGIN directReload optimisation ----------- */
1191
1192 /* If the instruction reads exactly one vreg which is currently
1193 in a spill slot, and this is last use of that vreg, see if we
1194 can convert the instruction into one that reads directly from
1195 the spill slot. This is clearly only possible for x86 and
1196 amd64 targets, since ppc and arm are load-store
1197 architectures. If successful, replace instrs_in->arr[ii]
1198 with this new instruction, and recompute its reg usage, so
1199 that the change is invisible to the standard-case handling
1200 that follows. */
1201
1202 if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) {
1203 Bool debug_direct_reload = False;
1204 HReg cand = INVALID_HREG;
1205 Bool nreads = 0;
1206 Short spilloff = 0;
1207
1208 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1209
1210 HReg vreg = reg_usage_arr[ii].vRegs[j];
1211 vassert(hregIsVirtual(vreg));
1212
1213 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1214 nreads++;
1215 Int m = hregIndex(vreg);
1216 vassert(IS_VALID_VREGNO(m));
1217 Int k = vreg_state[m];
1218 if (!IS_VALID_RREGNO(k)) {
1219 /* ok, it is spilled. Now, is this its last use? */
1220 vassert(vreg_lrs[m].dead_before >= ii+1);
1221 if (vreg_lrs[m].dead_before == ii+1
1222 && hregIsInvalid(cand)) {
1223 spilloff = vreg_lrs[m].spill_offset;
1224 cand = vreg;
1225 }
1226 }
1227 }
1228 }
1229
1230 if (nreads == 1 && ! hregIsInvalid(cand)) {
1231 HInstr* reloaded;
1232 if (reg_usage_arr[ii].n_vRegs == 2)
1233 vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1234 reg_usage_arr[ii].vRegs[1]));
1235
1236 reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff);
1237 if (debug_direct_reload && !reloaded) {
1238 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1239 con->ppInstr(instrs_in->arr[ii], con->mode64);
1240 }
1241 if (reloaded) {
1242 /* Update info about the insn, so it looks as if it had
1243 been in this form all along. */
1244 instrs_in->arr[ii] = reloaded;
1245 con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii],
1246 con->mode64);
1247 if (debug_direct_reload && !reloaded) {
1248 vex_printf(" --> ");
1249 con->ppInstr(reloaded, con->mode64);
1250 }
1251 }
1252
1253 if (debug_direct_reload && !reloaded)
1254 vex_printf("\n");
1255 }
1256
1257 }
1258
1259 /* ------------ END directReload optimisation ------------ */
1260
1261 /* for each virtual reg mentioned in the insn ... */
1262 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1263
1264 HReg vreg = reg_usage_arr[ii].vRegs[j];
1265 vassert(hregIsVirtual(vreg));
1266
1267 if (0) {
1268 vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
1269 }
1270
1271 /* Now we're trying to find a rreg for "vreg". First of all,
1272 if it already has an rreg assigned, we don't need to do
1273 anything more. Inspect the current state to find out. */
1274 Int m = hregIndex(vreg);
1275 vassert(IS_VALID_VREGNO(m));
1276 Int n = vreg_state[m];
1277 if (IS_VALID_RREGNO(n)) {
1278 vassert(rreg_state[n].disp == Bound);
1279 addToHRegRemap(&remap, vreg, con->univ->regs[n]);
1280 /* If this rreg is written or modified, mark it as different
1281 from any spill slot value. */
1282 if (reg_usage_arr[ii].vMode[j] != HRmRead)
1283 rreg_state[n].eq_spill_slot = False;
1284 continue;
1285 } else {
1286 vassert(n == INVALID_RREG_NO);
1287 }
1288
1289 /* No luck. The next thing to do is see if there is a
1290 currently free rreg available, of the correct class. If
1291 so, bag it. NOTE, we could improve this by selecting an
1292 rreg for which the next live-range event is as far ahead
1293 as possible. */
1294 Int k_suboptimal = -1;
1295 Int k;
1296 for (k = con->univ->allocable_start[hregClass(vreg)];
1297 k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1298 if (rreg_state[k].disp != Free)
1299 continue;
1300 if (rreg_state[k].has_hlrs) {
1301 /* Well, at least we can use k_suboptimal if we really
1302 have to. Keep on looking for a better candidate. */
1303 k_suboptimal = k;
1304 } else {
1305 /* Found a preferable reg. Use it. */
1306 k_suboptimal = -1;
1307 break;
1308 }
1309 }
1310 if (k_suboptimal >= 0)
1311 k = k_suboptimal;
1312
1313 if (k <= con->univ->allocable_end[hregClass(vreg)]) {
1314 rreg_state[k].disp = Bound;
1315 rreg_state[k].vreg = vreg;
1316 Int p = hregIndex(vreg);
1317 vassert(IS_VALID_VREGNO(p));
1318 vreg_state[p] = toShort(k);
1319 addToHRegRemap(&remap, vreg, con->univ->regs[k]);
1320 /* Generate a reload if needed. This only creates needed
1321 reloads because the live range builder for vregs will
1322 guarantee that the first event for a vreg is a write.
1323 Hence, if this reference is not a write, it cannot be
1324 the first reference for this vreg, and so a reload is
1325 indeed needed. */
1326 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1327 vassert(vreg_lrs[p].reg_class != HRcINVALID);
1328 HInstr* reload1 = NULL;
1329 HInstr* reload2 = NULL;
1330 con->genReload(&reload1, &reload2, con->univ->regs[k],
1331 vreg_lrs[p].spill_offset, con->mode64);
1332 vassert(reload1 || reload2); /* can't both be NULL */
1333 if (reload1)
1334 EMIT_INSTR(reload1);
1335 if (reload2)
1336 EMIT_INSTR(reload2);
1337 /* This rreg is read or modified by the instruction.
1338 If it's merely read we can claim it now equals the
1339 spill slot, but not so if it is modified. */
1340 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1341 rreg_state[k].eq_spill_slot = True;
1342 } else {
1343 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1344 rreg_state[k].eq_spill_slot = False;
1345 }
1346 } else {
1347 rreg_state[k].eq_spill_slot = False;
1348 }
1349
1350 continue;
1351 }
1352
1353 /* Well, now we have no option but to spill a vreg. It's
1354 important to make a good choice of vreg to spill, and of
1355 course we need to be careful not to spill a vreg which is
1356 needed by this insn. */
1357
1358 /* First, mark in the rreg_state, those rregs which are not spill
1359 candidates, due to holding a vreg mentioned by this
1360 instruction. Or being of the wrong class. */
1361 for (k = con->univ->allocable_start[hregClass(vreg)];
1362 k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1363 rreg_state[k].is_spill_cand = False;
1364 if (rreg_state[k].disp != Bound)
1365 continue;
1366 rreg_state[k].is_spill_cand = True;
1367 /* Note, the following loop visits only the virtual regs
1368 mentioned by the instruction. */
1369 for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1370 if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1371 rreg_state[k].is_spill_cand = False;
1372 break;
1373 }
1374 }
1375 }
1376
1377 /* We can choose to spill any rreg satisfying
1378 rreg_state[r].is_spill_cand (so to speak). Choose r so that
1379 the next use of its associated vreg is as far ahead as
1380 possible, in the hope that this will minimise the number
1381 of consequent reloads required. */
1382 Int spillee
1383 = findMostDistantlyMentionedVReg (
1384 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state,
1385 hregClass(vreg), con);
1386
1387 if (spillee == -1) {
1388 /* Hmmmmm. There don't appear to be any spill candidates.
1389 We're hosed. */
1390 vex_printf("reg_alloc: can't find a register in class: ");
1391 ppHRegClass(hregClass(vreg));
1392 vex_printf("\n");
1393 vpanic("reg_alloc: can't create a free register.");
1394 }
1395
1396 /* Right. So we're going to spill rreg_state[spillee]. */
1397 vassert(IS_VALID_RREGNO(spillee));
1398 vassert(rreg_state[spillee].disp == Bound);
1399 /* check it's the right class */
1400 vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg));
1401 /* check we're not ejecting the vreg for which we are trying
1402 to free up a register. */
1403 vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1404
1405 m = hregIndex(rreg_state[spillee].vreg);
1406 vassert(IS_VALID_VREGNO(m));
1407
1408 /* So here's the spill store. Assert that we're spilling a
1409 live vreg. */
1410 vassert(vreg_lrs[m].dead_before > ii);
1411 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1412 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1413 HInstr* spill1 = NULL;
1414 HInstr* spill2 = NULL;
1415 con->genSpill(&spill1, &spill2, con->univ->regs[spillee],
1416 vreg_lrs[m].spill_offset, con->mode64);
1417 vassert(spill1 || spill2); /* can't both be NULL */
1418 if (spill1)
1419 EMIT_INSTR(spill1);
1420 if (spill2)
1421 EMIT_INSTR(spill2);
1422 }
1423
1424 /* Update the rreg_state to reflect the new assignment for this
1425 rreg. */
1426 rreg_state[spillee].vreg = vreg;
1427 vreg_state[m] = INVALID_RREG_NO;
1428
1429 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1430
1431 m = hregIndex(vreg);
1432 vassert(IS_VALID_VREGNO(m));
1433 vreg_state[m] = toShort(spillee);
1434
1435 /* Now, if this vreg is being read or modified (as opposed to
1436 written), we have to generate a reload for it. */
1437 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1438 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1439 HInstr* reload1 = NULL;
1440 HInstr* reload2 = NULL;
1441 con->genReload(&reload1, &reload2, con->univ->regs[spillee],
1442 vreg_lrs[m].spill_offset, con->mode64);
1443 vassert(reload1 || reload2); /* can't both be NULL */
1444 if (reload1)
1445 EMIT_INSTR(reload1);
1446 if (reload2)
1447 EMIT_INSTR(reload2);
1448 /* This rreg is read or modified by the instruction.
1449 If it's merely read we can claim it now equals the
1450 spill slot, but not so if it is modified. */
1451 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1452 rreg_state[spillee].eq_spill_slot = True;
1453 } else {
1454 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1455 rreg_state[spillee].eq_spill_slot = False;
1456 }
1457 }
1458
1459 /* So after much twisting and turning, we have vreg mapped to
1460 rreg_state[spillee].rreg. Note that in the map. */
1461 addToHRegRemap(&remap, vreg, con->univ->regs[spillee]);
1462
1463 } /* iterate over virtual registers in this instruction. */
1464
1465 /* We've finished clowning around with registers in this instruction.
1466 Three results:
1467 - the running rreg_state[] has been updated
1468 - a suitable vreg->rreg mapping for this instruction has been
1469 constructed
1470 - spill and reload instructions may have been emitted.
1471
1472 The final step is to apply the mapping to the instruction,
1473 and emit that.
1474 */
1475
1476 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1477 con->mapRegs(&remap, instrs_in->arr[ii], con->mode64);
1478 EMIT_INSTR( instrs_in->arr[ii] );
1479
1480 if (DEBUG_REGALLOC) {
1481 vex_printf("After dealing with current insn:\n");
1482 PRINT_STATE;
1483 vex_printf("\n");
1484 }
1485
1486 /* ------ Post-instruction actions for fixed rreg uses ------ */
1487
1488 /* Now we need to check for rregs exiting fixed live ranges
1489 after this instruction, and if so mark them as free. */
1490 while (True) {
1491 vassert(rreg_lrs_db_next >= 0);
1492 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1493 if (rreg_lrs_db_next == rreg_lrs_used)
1494 break; /* no more real reg live ranges to consider */
1495 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1496 break; /* next live range does not yet start */
1497 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1498 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1499 range. Mark it as such in the main rreg_state array. */
1500 HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1501 vassert(!hregIsVirtual(reg));
1502 Int k = hregIndex(reg);
1503 vassert(IS_VALID_RREGNO(k));
1504 vassert(rreg_state[k].disp == Unavail);
1505 rreg_state[k].disp = Free;
1506 rreg_state[k].vreg = INVALID_HREG;
1507 rreg_state[k].eq_spill_slot = False;
1508
1509 /* check for further rregs leaving HLRs at this point */
1510 rreg_lrs_db_next++;
1511 }
1512
1513 if (DEBUG_REGALLOC) {
1514 vex_printf("After post-insn actions for fixed regs:\n");
1515 PRINT_STATE;
1516 vex_printf("\n");
1517 }
1518
1519 } /* iterate over insns */
1520
1521 /* ------ END: Process each insn in turn. ------ */
1522
1523 /* free(rreg_state); */
1524 /* free(rreg_lrs); */
1525 /* if (vreg_lrs) free(vreg_lrs); */
1526
1527 /* Paranoia */
1528 vassert(rreg_lrs_la_next == rreg_lrs_used);
1529 vassert(rreg_lrs_db_next == rreg_lrs_used);
1530
1531 return instrs_out;
1532
1533 # undef INVALID_INSTRNO
1534 # undef EMIT_INSTR
1535 # undef PRINT_STATE
1536 }
1537
1538
1539
1540 /*---------------------------------------------------------------*/
1541 /*--- host_reg_alloc2.c ---*/
1542 /*---------------------------------------------------------------*/
1543