1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                 host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41 
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44 
45 
46 /* TODO 27 Oct 04:
47 
48    We can possibly do V-V coalescing even when the src is spilled,
49    providing we can arrange for the dst to have the same spill slot.
50 
51    Note that state[].hreg is the same as the available real regs.
52 
53    Generally rationalise data structures.  */
54 
55 
56 /* Records information on virtual register live ranges.  Computed once
57    and remains unchanged after that. */
58 typedef
59    struct {
60       /* Becomes live for the first time after this insn ... */
61       Short live_after;
62       /* Becomes dead for the last time before this insn ... */
63       Short dead_before;
64       /* The "home" spill slot, if needed.  Never changes. */
65       Short spill_offset;
66       Short spill_size;
67       /* What kind of register this is. */
68       HRegClass reg_class;
69    }
70    VRegLR;
71 
72 
73 /* Records information on real-register live ranges.  Computed once
74    and remains unchanged after that. */
75 typedef
76    struct {
77       HReg rreg;
78       /* Becomes live after this insn ... */
79       Short live_after;
80       /* Becomes dead before this insn ... */
81       Short dead_before;
82    }
83    RRegLR;
84 
85 
86 /* An array of the following structs (rreg_state) comprises the
87    running state of the allocator.  It indicates what the current
88    disposition of each allocatable real register is.  The array gets
89    updated as the allocator processes instructions.  The identity of
90    the register is not recorded here, because the index of this
91    structure in doRegisterAllocation()'s |rreg_state| is the index
92    number of the register, and the register itself can be extracted
93    from the RRegUniverse supplied to doRegisterAllocation(). */
94 typedef
95    struct {
96       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
97       /* Is this involved in any HLRs?  (only an optimisation hint) */
98       Bool has_hlrs;
99       /* ------ FIELDS WHICH DO CHANGE ------ */
100       /* 6 May 07: rearranged fields below so the whole struct fits
101          into 16 bytes on both x86 and amd64. */
102       /* Used when .disp == Bound and we are looking for vregs to
103          spill. */
104       Bool is_spill_cand;
105       /* Optimisation: used when .disp == Bound.  Indicates when the
106          rreg has the same value as the spill slot for the associated
107          vreg.  Is safely left at False, and becomes True after a
108          spill store or reload for this rreg. */
109       Bool eq_spill_slot;
110       /* What's it's current disposition? */
111       enum { Free,     /* available for use */
112              Unavail,  /* in a real-reg live range */
113              Bound     /* in use (holding value of some vreg) */
114            }
115            disp;
116       /* If .disp == Bound, what vreg is it bound to? */
117       HReg vreg;
118    }
119    RRegState;
120 
121 
122 /* The allocator also maintains a redundant array of indexes
123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
124    is redundant because iff vreg_state[i] == j then
125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126    point at each other.  The purpose of this is to speed up activities
127    which involve looking for a particular vreg: there is no need to
128    scan the rreg_state looking for it, just index directly into
129    vreg_state.  The FAQ "does this vreg already have an associated
130    rreg" is the main beneficiary.
131 
132    To indicate, in vreg_state[i], that a given vreg is not currently
133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
134 
135    Because the vreg_state entries are signed Shorts, the max number
136    of vregs that can be handed by regalloc is 32767.
137 */
138 
139 #define INVALID_RREG_NO ((Short)(-1))
140 
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143 
144 
145 /* Search forward from some given point in the incoming instruction
146    sequence.  Point is to select a virtual register to spill, by
147    finding the vreg which is mentioned as far ahead as possible, in
148    the hope that this will minimise the number of consequent reloads.
149 
150    Only do the search for vregs which are Bound in the running state,
151    and for which the .is_spill_cand field is set.  This allows the
152    caller to arbitrarily restrict the set of spill candidates to be
153    considered.
154 
155    To do this we don't actually need to see the incoming instruction
156    stream.  Rather, what we need us the HRegUsage records for the
157    incoming instruction stream.  Hence that is passed in.
158 
159    Returns an index into the state array indicating the (v,r) pair to
160    spill, or -1 if none was found.  */
161 static
findMostDistantlyMentionedVReg(HRegUsage * reg_usages_in,Int search_from_instr,Int num_instrs,RRegState * rreg_state,HRegClass hreg_class,const RegAllocControl * con)162 Int findMostDistantlyMentionedVReg (
163    HRegUsage*             reg_usages_in,
164    Int                    search_from_instr,
165    Int                    num_instrs,
166    RRegState*             rreg_state,
167    HRegClass              hreg_class,
168    const RegAllocControl* con
169 )
170 {
171    Int m;
172    Int furthest_k = -1;
173    Int furthest   = -1;
174    vassert(search_from_instr >= 0);
175    for (UInt k = con->univ->allocable_start[hreg_class];
176         k <= con->univ->allocable_end[hreg_class]; k++) {
177       if (!rreg_state[k].is_spill_cand)
178          continue;
179       vassert(rreg_state[k].disp == Bound);
180       for (m = search_from_instr; m < num_instrs; m++) {
181          if (HRegUsage__contains(&reg_usages_in[m], rreg_state[k].vreg))
182             break;
183       }
184       if (m > furthest) {
185          furthest   = m;
186          furthest_k = k;
187       }
188    }
189    return furthest_k;
190 }
191 
192 
193 /* Check that this vreg has been assigned a sane spill offset. */
194 inline
sanity_check_spill_offset(VRegLR * vreg)195 static void sanity_check_spill_offset ( VRegLR* vreg )
196 {
197    switch (vreg->reg_class) {
198       case HRcVec128: case HRcFlt64:
199          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
200       default:
201          vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
202    }
203 }
204 
205 
206 /* Double the size of the real-reg live-range array, if needed. */
207 __attribute__((noinline))
ensureRRLRspace_SLOW(RRegLR ** info,Int * size,Int used)208 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
209 {
210    Int     k;
211    RRegLR* arr2;
212    if (0)
213       vex_printf("ensureRRLRspace: %d -> %d\n", *size, 2 * *size);
214    vassert(used == *size);
215    arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
216    for (k = 0; k < *size; k++)
217       arr2[k] = (*info)[k];
218    *size *= 2;
219    *info = arr2;
220 }
221 inline
ensureRRLRspace(RRegLR ** info,Int * size,Int used)222 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223 {
224    if (LIKELY(used < *size)) return;
225    ensureRRLRspace_SLOW(info, size, used);
226 }
227 
228 
229 /* Sort an array of RRegLR entries by either the .live_after or
230    .dead_before fields.  This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)231 static void sortRRLRarray ( RRegLR* arr,
232                             Int size, Bool by_live_after )
233 {
234    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
235                        9841, 29524, 88573, 265720,
236                        797161, 2391484 };
237    Int    lo = 0;
238    Int    hi = size-1;
239    Int    i, j, h, bigN, hp;
240    RRegLR v;
241 
242    vassert(size >= 0);
243    if (size == 0)
244       return;
245 
246    bigN = hi - lo + 1; if (bigN < 2) return;
247    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248 
249    if (by_live_after) {
250 
251       for ( ; hp >= 0; hp--) {
252          h = incs[hp];
253          for (i = lo + h; i <= hi; i++) {
254             v = arr[i];
255             j = i;
256             while (arr[j-h].live_after > v.live_after) {
257                arr[j] = arr[j-h];
258                j = j - h;
259                if (j <= (lo + h - 1)) break;
260             }
261             arr[j] = v;
262          }
263       }
264 
265    } else {
266 
267       for ( ; hp >= 0; hp--) {
268          h = incs[hp];
269          for (i = lo + h; i <= hi; i++) {
270             v = arr[i];
271             j = i;
272             while (arr[j-h].dead_before > v.dead_before) {
273                arr[j] = arr[j-h];
274                j = j - h;
275                if (j <= (lo + h - 1)) break;
276             }
277             arr[j] = v;
278          }
279       }
280 
281    }
282 }
283 
284 
285 /* Compute the index of the highest and lowest 1 in a ULong,
286    respectively.  Results are undefined if the argument is zero.
287    Don't pass it zero :) */
ULong__maxIndex(ULong w64)288 static inline UInt ULong__maxIndex ( ULong w64 ) {
289    return 63 - __builtin_clzll(w64);
290 }
291 
ULong__minIndex(ULong w64)292 static inline UInt ULong__minIndex ( ULong w64 ) {
293    return __builtin_ctzll(w64);
294 }
295 
296 
297 /* A target-independent register allocator.  Requires various
298    functions which it uses to deal abstractly with instructions and
299    registers, since it cannot have any target-specific knowledge.
300 
301    Returns a new list of instructions, which, as a result of the
302    behaviour of mapRegs, will be in-place modifications of the
303    original instructions.
304 
305    Requires that the incoming code has been generated using
306    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
307    that range is a checked run-time error.
308 
309    Takes an expandable array of pointers to unallocated insns.
310    Returns an expandable array of pointers to allocated insns.
311 */
doRegisterAllocation_v2(HInstrArray * instrs_in,const RegAllocControl * con)312 HInstrArray* doRegisterAllocation_v2 (
313 
314    /* Incoming virtual-registerised code. */
315    HInstrArray* instrs_in,
316 
317    /* Register allocator controls to use. */
318    const RegAllocControl* con
319 )
320 {
321 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
322 
323    const Bool eq_spill_opt = True;
324 
325    /* Info on vregs and rregs.  Computed once and remains
326       unchanged. */
327    Int     n_vregs;
328    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
329 
330    /* We keep two copies of the real-reg live range info, one sorted
331       by .live_after and the other by .dead_before.  First the
332       unsorted info is created in the _la variant is copied into the
333       _db variant.  Once that's done both of them are sorted.
334       We also need two integer cursors which record the next
335       location in the two arrays to consider. */
336    RRegLR* rreg_lrs_la;
337    RRegLR* rreg_lrs_db;
338    Int     rreg_lrs_size;
339    Int     rreg_lrs_used;
340    Int     rreg_lrs_la_next;
341    Int     rreg_lrs_db_next;
342 
343    /* Info on register usage in the incoming instruction array.
344       Computed once and remains unchanged, more or less; updated
345       sometimes by the direct-reload optimisation. */
346    HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
347 
348    /* Used when constructing vreg_lrs (for allocating stack
349       slots). */
350    Short ss_busy_until_before[N_SPILL64S];
351 
352    /* Used when constructing rreg_lrs. */
353    Int* rreg_live_after;
354    Int* rreg_dead_before;
355 
356    /* Running state of the core allocation algorithm. */
357    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
358    Int        n_rregs;
359 
360    /* .. and the redundant backward map */
361    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
362       This implies n_rregs must be <= 32768. */
363    Short*     vreg_state;  /* [0 .. n_vregs-1] */
364 
365    /* The vreg -> rreg map constructed and then applied to each
366       instr. */
367    HRegRemap remap;
368 
369    /* The output array of instructions. */
370    HInstrArray* instrs_out;
371 
372    /* Sanity checks are expensive.  They are only done periodically,
373       not at each insn processed. */
374    Bool do_sanity_check;
375 
376    vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
377    vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
378    vassert(0 == (N_SPILL64S % 2));
379 
380    /* The live range numbers are signed shorts, and so limiting the
381       number of insns to 15000 comfortably guards against them
382       overflowing 32k. */
383    vassert(instrs_in->arr_used <= 15000);
384 
385 #  define INVALID_INSTRNO (-2)
386 
387 #  define EMIT_INSTR(_instr)                  \
388       do {                                    \
389         HInstr* _tmp = (_instr);              \
390         if (DEBUG_REGALLOC) {                 \
391            vex_printf("**  ");                \
392            con->ppInstr(_tmp, con->mode64);   \
393            vex_printf("\n\n");                \
394         }                                     \
395         addHInstr ( instrs_out, _tmp );       \
396       } while (0)
397 
398 #   define PRINT_STATE						   \
399       do {							   \
400          Int z, q;						   \
401          for (z = 0; z < n_rregs; z++) {			   \
402             vex_printf("  rreg_state[%2d] = ", z);		   \
403             con->ppReg(con->univ->regs[z]);    			   \
404             vex_printf("  \t");					   \
405             switch (rreg_state[z].disp) {			   \
406                case Free:    vex_printf("Free\n"); break;	   \
407                case Unavail: vex_printf("Unavail\n"); break;	   \
408                case Bound:   vex_printf("BoundTo "); 		   \
409                              con->ppReg(rreg_state[z].vreg);	   \
410                              vex_printf("\n"); break;		   \
411             }							   \
412          }							   \
413          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
414          q = 0;                                                    \
415          for (z = 0; z < n_vregs; z++) {                           \
416             if (vreg_state[z] == INVALID_RREG_NO)                  \
417                continue;                                           \
418             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
419             q++;                                                   \
420             if (q > 0 && (q % 6) == 0)                             \
421                vex_printf("\n    ");                               \
422          }                                                         \
423          vex_printf("\n");                                         \
424       } while (0)
425 
426 
427    /* --------- Stage 0: set up output array --------- */
428    /* --------- and allocate/initialise running state. --------- */
429 
430    instrs_out = newHInstrArray();
431 
432    /* ... and initialise running state. */
433    /* n_rregs is no more than a short name for n_available_real_regs. */
434    n_rregs = con->univ->allocable;
435    n_vregs = instrs_in->n_vregs;
436 
437    /* If this is not so, vreg_state entries will overflow. */
438    vassert(n_vregs < 32767);
439 
440    /* If this is not so, the universe we have is nonsensical. */
441    vassert(n_rregs > 0);
442 
443    rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
444    vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
445 
446    for (Int j = 0; j < n_rregs; j++) {
447       rreg_state[j].has_hlrs      = False;
448       rreg_state[j].disp          = Free;
449       rreg_state[j].vreg          = INVALID_HREG;
450       rreg_state[j].is_spill_cand = False;
451       rreg_state[j].eq_spill_slot = False;
452    }
453 
454    for (Int j = 0; j < n_vregs; j++)
455       vreg_state[j] = INVALID_RREG_NO;
456 
457 
458    /* --------- Stage 1: compute vreg live ranges. --------- */
459    /* --------- Stage 2: compute rreg live ranges. --------- */
460 
461    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
462 
463    /* This is relatively simple, because (1) we only seek the complete
464       end-to-end live range of each vreg, and are not interested in
465       any holes in it, and (2) the vregs are conveniently numbered 0
466       .. n_vregs-1, so we can just dump the results in a
467       pre-allocated array. */
468 
469    vreg_lrs = NULL;
470    if (n_vregs > 0)
471       vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
472 
473    for (Int j = 0; j < n_vregs; j++) {
474       vreg_lrs[j].live_after     = INVALID_INSTRNO;
475       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
476       vreg_lrs[j].spill_offset   = 0;
477       vreg_lrs[j].spill_size     = 0;
478       vreg_lrs[j].reg_class      = HRcINVALID;
479    }
480 
481    /* An array to hold the reg-usage info for the incoming
482       instructions. */
483    reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
484 
485    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486 
487    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
488 
489    /* This is more complex than Stage 1, because we need to compute
490       exactly all the live ranges of all the allocatable real regs,
491       and we don't know in advance how many there will be. */
492 
493    rreg_lrs_used = 0;
494    rreg_lrs_size = 4;
495    rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
496    rreg_lrs_db = NULL; /* we'll create this later */
497 
498    /* We'll need to track live range start/end points seperately for
499       each rreg.  Sigh. */
500    vassert(n_rregs > 0);
501    rreg_live_after  = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
502    rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
503 
504    for (Int j = 0; j < n_rregs; j++) {
505       rreg_live_after[j] =
506       rreg_dead_before[j] = INVALID_INSTRNO;
507    }
508 
509    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
510 
511    /* ------ start of ITERATE OVER INSNS ------ */
512 
513    for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
514 
515       con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii], con->mode64);
516       reg_usage_arr[ii].isVregVregMove
517          = reg_usage_arr[ii].isRegRegMove
518            && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
519            && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
520 
521       if (0) {
522          vex_printf("\n%d  stage1: ", ii);
523          con->ppInstr(instrs_in->arr[ii], con->mode64);
524          vex_printf("\n");
525          ppHRegUsage(con->univ, &reg_usage_arr[ii]);
526       }
527 
528       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
529 
530       /* for each virtual reg mentioned in the insn ... */
531       for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
532 
533          HReg vreg = reg_usage_arr[ii].vRegs[j];
534          vassert(hregIsVirtual(vreg));
535 
536          Int k = hregIndex(vreg);
537          if (k < 0 || k >= n_vregs) {
538             vex_printf("\n");
539             con->ppInstr(instrs_in->arr[ii], con->mode64);
540             vex_printf("\n");
541             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
542             vpanic("doRegisterAllocation: out-of-range vreg");
543          }
544 
545          /* Take the opportunity to note its regclass.  We'll need
546             that when allocating spill slots. */
547          if (vreg_lrs[k].reg_class == HRcINVALID) {
548             /* First mention of this vreg. */
549             vreg_lrs[k].reg_class = hregClass(vreg);
550          } else {
551             /* Seen it before, so check for consistency. */
552             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
553          }
554 
555          /* Now consider live ranges. */
556          switch (reg_usage_arr[ii].vMode[j]) {
557             case HRmRead:
558                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
559                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
560                   vpanic("doRegisterAllocation: "
561                          "first event for vreg is Read");
562                }
563                vreg_lrs[k].dead_before = toShort(ii + 1);
564                break;
565             case HRmWrite:
566                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
567                   vreg_lrs[k].live_after = toShort(ii);
568                vreg_lrs[k].dead_before = toShort(ii + 1);
569                break;
570             case HRmModify:
571                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
572                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
573                   vpanic("doRegisterAllocation: "
574                          "first event for vreg is Modify");
575                }
576                vreg_lrs[k].dead_before = toShort(ii + 1);
577                break;
578             default:
579                vpanic("doRegisterAllocation(1)");
580          } /* switch */
581 
582       } /* iterate over virtual registers */
583 
584       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
585 
586       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
587 
588       /* If this doesn't hold, the following iteration over real registers
589          will fail miserably. */
590       vassert(N_RREGUNIVERSE_REGS == 64);
591 
592       const ULong rRead      = reg_usage_arr[ii].rRead;
593       const ULong rWritten   = reg_usage_arr[ii].rWritten;
594       const ULong rMentioned = rRead | rWritten;
595 
596       UInt rReg_minIndex;
597       UInt rReg_maxIndex;
598       if (rMentioned == 0) {
599          /* There are no real register uses in this insn.  Set
600             rReg_{min,max}Index so that the following loop doesn't iterate
601             at all, so as to avoid wasting time. */
602          rReg_minIndex = 1;
603          rReg_maxIndex = 0;
604       } else {
605          rReg_minIndex = ULong__minIndex(rMentioned);
606          rReg_maxIndex = ULong__maxIndex(rMentioned);
607          /* Don't bother to look at registers which are not available
608             to the allocator.  We asserted above that n_rregs > 0, so
609             n_rregs-1 is safe. */
610          if (rReg_maxIndex >= n_rregs)
611             rReg_maxIndex = n_rregs-1;
612       }
613 
614       /* for each allocator-available real reg mentioned in the insn ... */
615       /* Note.  We are allocating only over the real regs available to
616          the allocator.  Others, eg the stack or baseblock pointers,
617          are unavailable to allocation and so we never visit them.
618          Hence the iteration is cut off at n_rregs-1, since n_rregs ==
619          univ->allocable. */
620       for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
621 
622          const ULong jMask = 1ULL << j;
623          if (LIKELY((rMentioned & jMask) == 0))
624             continue;
625 
626          const Bool isR = (rRead    & jMask) != 0;
627          const Bool isW = (rWritten & jMask) != 0;
628 
629          /* Dummy initialisations of flush_la and flush_db to avoid
630             possible bogus uninit-var warnings from gcc. */
631          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
632          Bool flush = False;
633 
634          if (isW && !isR) {
635             flush_la = rreg_live_after[j];
636             flush_db = rreg_dead_before[j];
637             if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
638                flush = True;
639             rreg_live_after[j]  = ii;
640             rreg_dead_before[j] = ii+1;
641          } else if (!isW && isR) {
642             if (rreg_live_after[j] == INVALID_INSTRNO) {
643                vex_printf("\nOFFENDING RREG = ");
644                con->ppReg(con->univ->regs[j]);
645                vex_printf("\n");
646                vex_printf("\nOFFENDING instr = ");
647                con->ppInstr(instrs_in->arr[ii], con->mode64);
648                vex_printf("\n");
649                vpanic("doRegisterAllocation: "
650                       "first event for rreg is Read");
651             }
652             rreg_dead_before[j] = ii+1;
653          } else {
654             vassert(isR && isW);
655             if (rreg_live_after[j] == INVALID_INSTRNO) {
656                vex_printf("\nOFFENDING RREG = ");
657                con->ppReg(con->univ->regs[j]);
658                vex_printf("\n");
659                vex_printf("\nOFFENDING instr = ");
660                con->ppInstr(instrs_in->arr[ii], con->mode64);
661                vex_printf("\n");
662                vpanic("doRegisterAllocation: "
663                       "first event for rreg is Modify");
664             }
665             rreg_dead_before[j] = ii+1;
666          }
667 
668          if (flush) {
669             vassert(flush_la != INVALID_INSTRNO);
670             vassert(flush_db != INVALID_INSTRNO);
671             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
672             if (0)
673                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
674             rreg_lrs_la[rreg_lrs_used].rreg        = con->univ->regs[j];
675             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
676             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
677             rreg_lrs_used++;
678          }
679 
680       } /* iterate over rregs in the instr */
681 
682       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
683 
684    } /* iterate over insns */
685 
686    /* ------ end of ITERATE OVER INSNS ------ */
687 
688    /* ------ start of FINALISE RREG LIVE RANGES ------ */
689 
690    /* Now finish up any live ranges left over. */
691    for (Int j = 0; j < n_rregs; j++) {
692 
693       if (0) {
694          vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
695                                                 rreg_dead_before[j]);
696       }
697       vassert( (rreg_live_after[j] == INVALID_INSTRNO
698                 && rreg_dead_before[j] == INVALID_INSTRNO)
699               ||
700                (rreg_live_after[j] != INVALID_INSTRNO
701                 && rreg_dead_before[j] != INVALID_INSTRNO)
702             );
703 
704       if (rreg_live_after[j] == INVALID_INSTRNO)
705          continue;
706 
707       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
708       if (0)
709          vex_printf("FLUSH 2 (%d,%d)\n",
710                     rreg_live_after[j], rreg_dead_before[j]);
711       rreg_lrs_la[rreg_lrs_used].rreg        = con->univ->regs[j];
712       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
713       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
714       rreg_lrs_used++;
715    }
716 
717    /* Compute summary hints for choosing real regs.  If a real reg is
718       involved in a hard live range, record that fact in the fixed
719       part of the running rreg_state.  Later, when offered a choice between
720       rregs, it's better to choose one which is not marked as having
721       any HLRs, since ones with HLRs may need to be spilled around
722       their HLRs.  Correctness of final assignment is unaffected by
723       this mechanism -- it is only an optimisation. */
724 
725    for (Int j = 0; j < rreg_lrs_used; j++) {
726       HReg rreg = rreg_lrs_la[j].rreg;
727       vassert(!hregIsVirtual(rreg));
728       /* rreg is involved in a HLR.  Record this info in the array, if
729          there is space. */
730       UInt ix = hregIndex(rreg);
731       vassert(ix < n_rregs);
732       rreg_state[ix].has_hlrs = True;
733    }
734    if (0) {
735       for (Int j = 0; j < n_rregs; j++) {
736          if (!rreg_state[j].has_hlrs)
737             continue;
738          con->ppReg(con->univ->regs[j]);
739          vex_printf(" hinted\n");
740       }
741    }
742 
743    /* Finally, copy the _la variant into the _db variant and
744       sort both by their respective fields. */
745    rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
746    for (Int j = 0; j < rreg_lrs_used; j++)
747       rreg_lrs_db[j] = rreg_lrs_la[j];
748 
749    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
750    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
751 
752    /* And set up the cursors. */
753    rreg_lrs_la_next = 0;
754    rreg_lrs_db_next = 0;
755 
756    for (Int j = 1; j < rreg_lrs_used; j++) {
757       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
758       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
759    }
760 
761    /* ------ end of FINALISE RREG LIVE RANGES ------ */
762 
763    if (DEBUG_REGALLOC) {
764       for (Int j = 0; j < n_vregs; j++) {
765          vex_printf("vreg %d:  la = %d,  db = %d\n",
766                     j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
767       }
768    }
769 
770    if (DEBUG_REGALLOC) {
771       vex_printf("RRegLRs by LA:\n");
772       for (Int j = 0; j < rreg_lrs_used; j++) {
773          vex_printf("  ");
774          con->ppReg(rreg_lrs_la[j].rreg);
775          vex_printf("      la = %d,  db = %d\n",
776                     rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
777       }
778       vex_printf("RRegLRs by DB:\n");
779       for (Int j = 0; j < rreg_lrs_used; j++) {
780          vex_printf("  ");
781          con->ppReg(rreg_lrs_db[j].rreg);
782          vex_printf("      la = %d,  db = %d\n",
783                     rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
784       }
785    }
786 
787    /* --------- Stage 3: allocate spill slots. --------- */
788 
789    /* Each spill slot is 8 bytes long.  For vregs which take more than
790       64 bits to spill (classes Flt64 and Vec128), we have to allocate
791       two consecutive spill slots.  For 256 bit registers (class
792       Vec256), we have to allocate four consecutive spill slots.
793 
794       For Vec128-class on PowerPC, the spill slot's actual address
795       must be 16-byte aligned.  Since the spill slot's address is
796       computed as an offset from the guest state pointer, and since
797       the user of the generated code must set that pointer to a
798       32-aligned value, we have the residual obligation here of
799       choosing a 16-aligned spill slot offset for Vec128-class values.
800       Since each spill slot is 8 bytes long, that means for
801       Vec128-class values we must allocated a spill slot number which
802       is zero mod 2.
803 
804       Similarly, for Vec256 class on amd64, find a spill slot number
805       which is zero mod 4.  This guarantees it will be 32 byte
806       aligned, which isn't actually necessary on amd64 (we use movUpd
807       etc to spill), but seems like good practice.
808 
809       Do a rank-based allocation of vregs to spill slot numbers.  We
810       put as few values as possible in spill slots, but nevertheless
811       need to have a spill slot available for all vregs, just in case.
812    */
813    /* Int max_ss_no = -1; */
814 
815    vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before));
816 
817    for (Int j = 0; j < n_vregs; j++) {
818 
819       /* True iff this vreg is unused.  In which case we also expect
820          that the reg_class field for it has not been set.  */
821       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
822          vassert(vreg_lrs[j].reg_class == HRcINVALID);
823          continue;
824       }
825 
826       /* The spill slots are 64 bits in size.  As per the comment on
827          definition of HRegClass in host_generic_regs.h, that means,
828          to spill a vreg of class Flt64 or Vec128, we'll need to find
829          two adjacent spill slots to use.  For Vec256, we'll need to
830          find four adjacent slots to use.  Note, this logic needs to
831          kept in sync with the size info on the definition of
832          HRegClass. */
833       Int ss_no = -1;
834       switch (vreg_lrs[j].reg_class) {
835 
836          case HRcVec128: case HRcFlt64:
837             /* Find two adjacent free slots in which between them
838                provide up to 128 bits in which to spill the vreg.
839                Since we are trying to find an even:odd pair, move
840                along in steps of 2 (slots). */
841             for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
842                if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
843                    && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
844                   break;
845             if (ss_no >= N_SPILL64S-1) {
846                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
847                       "Increase and recompile.");
848             }
849             ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
850             ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
851             break;
852 
853          default:
854             /* The ordinary case -- just find a single spill slot. */
855             /* Find the lowest-numbered spill slot which is available
856                at the start point of this interval, and assign the
857                interval to it. */
858             for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
859                if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
860                   break;
861             if (ss_no == N_SPILL64S) {
862                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
863                       "Increase and recompile.");
864             }
865             ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
866             break;
867 
868       } /* switch (vreg_lrs[j].reg_class) */
869 
870       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
871          layout: the guest state, then two equal sized areas following
872          it for two sets of shadow state, and then the spill area. */
873       vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8);
874 
875       /* Independent check that we've made a sane choice of slot */
876       sanity_check_spill_offset( &vreg_lrs[j] );
877       /* if (j > max_ss_no) */
878       /*    max_ss_no = j; */
879    }
880 
881    if (0) {
882       vex_printf("\n\n");
883       for (Int j = 0; j < n_vregs; j++)
884          vex_printf("vreg %d    --> spill offset %d\n",
885                     j, vreg_lrs[j].spill_offset);
886    }
887 
888    /* --------- Stage 4: establish rreg preferences --------- */
889 
890    /* It may be advantageous to allocating certain vregs to specific
891       rregs, as a way of avoiding reg-reg moves later.  Here we
892       establish which, if any, rreg each vreg would prefer to be in.
893       Note that this constrains the allocator -- ideally we end up
894       with as few as possible vregs expressing a preference.
895 
896       This is an optimisation: if the .preferred_rreg field is never
897       set to anything different from INVALID_HREG, the allocator still
898       works. */
899 
900    /* 30 Dec 04: removed this mechanism as it does not seem to
901       help. */
902 
903    /* --------- Stage 5: process instructions --------- */
904 
905    /* This is the main loop of the allocator.  First, we need to
906       correctly set up our running state, which tracks the status of
907       each real register. */
908 
909    /* ------ BEGIN: Process each insn in turn. ------ */
910 
911    for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
912 
913       if (DEBUG_REGALLOC) {
914          vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
915          vex_printf("---- ");
916          con->ppInstr(instrs_in->arr[ii], con->mode64);
917          vex_printf("\n\nInitial state:\n");
918          PRINT_STATE;
919          vex_printf("\n");
920       }
921 
922       /* ------------ Sanity checks ------------ */
923 
924       /* Sanity checks are expensive.  So they are done only once
925          every 17 instructions, and just before the last
926          instruction. */
927       do_sanity_check
928          = toBool(
929               False /* Set to True for sanity checking of all insns. */
930               || ii == instrs_in->arr_used-1
931               || (ii > 0 && (ii % 17) == 0)
932            );
933 
934       if (do_sanity_check) {
935 
936          /* Sanity check 1: all rregs with a hard live range crossing
937             this insn must be marked as unavailable in the running
938             state. */
939          for (Int j = 0; j < rreg_lrs_used; j++) {
940             if (rreg_lrs_la[j].live_after < ii
941                 && ii < rreg_lrs_la[j].dead_before) {
942                /* ii is the middle of a hard live range for some real
943                   reg.  Check it's marked as such in the running
944                   state. */
945                HReg reg = rreg_lrs_la[j].rreg;
946 
947                if (0) {
948                   vex_printf("considering la %d .. db %d   reg = ",
949                              rreg_lrs_la[j].live_after,
950                              rreg_lrs_la[j].dead_before);
951                   con->ppReg(reg);
952                   vex_printf("\n");
953                }
954 
955                /* assert that this rreg is marked as unavailable */
956                vassert(!hregIsVirtual(reg));
957                vassert(rreg_state[hregIndex(reg)].disp == Unavail);
958             }
959          }
960 
961          /* Sanity check 2: conversely, all rregs marked as
962             unavailable in the running rreg_state must have a
963             corresponding hard live range entry in the rreg_lrs
964             array. */
965          for (Int j = 0; j < n_rregs; j++) {
966             vassert(rreg_state[j].disp == Bound
967                     || rreg_state[j].disp == Free
968                     || rreg_state[j].disp == Unavail);
969             if (rreg_state[j].disp != Unavail)
970                continue;
971             Int k;
972             for (k = 0; k < rreg_lrs_used; k++) {
973                HReg reg = rreg_lrs_la[k].rreg;
974                vassert(!hregIsVirtual(reg));
975                if (hregIndex(reg) == j
976                    && rreg_lrs_la[k].live_after < ii
977                    && ii < rreg_lrs_la[k].dead_before)
978                   break;
979             }
980             /* If this vassertion fails, we couldn't find a
981                corresponding HLR. */
982             vassert(k < rreg_lrs_used);
983          }
984 
985          /* Sanity check 3: all vreg-rreg bindings must bind registers
986             of the same class. */
987          for (Int j = 0; j < n_rregs; j++) {
988             if (rreg_state[j].disp != Bound) {
989                vassert(rreg_state[j].eq_spill_slot == False);
990                continue;
991             }
992             vassert(hregClass(con->univ->regs[j])
993                     == hregClass(rreg_state[j].vreg));
994             vassert( hregIsVirtual(rreg_state[j].vreg));
995          }
996 
997          /* Sanity check 4: the vreg_state and rreg_state
998             mutually-redundant mappings are consistent.  If
999             rreg_state[j].vreg points at some vreg_state entry then
1000             that vreg_state entry should point back at
1001             rreg_state[j]. */
1002          for (Int j = 0; j < n_rregs; j++) {
1003             if (rreg_state[j].disp != Bound)
1004                continue;
1005             Int k = hregIndex(rreg_state[j].vreg);
1006             vassert(IS_VALID_VREGNO(k));
1007             vassert(vreg_state[k] == j);
1008          }
1009          for (Int j = 0; j < n_vregs; j++) {
1010             Int k = vreg_state[j];
1011             if (k == INVALID_RREG_NO)
1012                continue;
1013             vassert(IS_VALID_RREGNO(k));
1014             vassert(rreg_state[k].disp == Bound);
1015             vassert(hregIndex(rreg_state[k].vreg) == j);
1016          }
1017 
1018       } /* if (do_sanity_check) */
1019 
1020       /* ------------ end of Sanity checks ------------ */
1021 
1022       /* Do various optimisations pertaining to register coalescing
1023          and preferencing:
1024             MOV  v <-> v   coalescing (done here).
1025             MOV  v <-> r   coalescing (not yet, if ever)
1026       */
1027       /* If doing a reg-reg move between two vregs, and the src's live
1028          range ends here and the dst's live range starts here, bind
1029          the dst to the src's rreg, and that's all. */
1030       if (reg_usage_arr[ii].isVregVregMove) {
1031          HReg vregS = reg_usage_arr[ii].regMoveSrc;
1032          HReg vregD = reg_usage_arr[ii].regMoveDst;
1033          /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
1034          vassert(hregClass(vregS) == hregClass(vregD));
1035          Int k = hregIndex(vregS);
1036          Int m = hregIndex(vregD);
1037          vassert(IS_VALID_VREGNO(k));
1038          vassert(IS_VALID_VREGNO(m));
1039          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1040          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1041          if (DEBUG_REGALLOC) {
1042          vex_printf("COALESCE ");
1043             con->ppReg(vregS);
1044             vex_printf(" -> ");
1045             con->ppReg(vregD);
1046             vex_printf("\n\n");
1047          }
1048          /* Find the state entry for vregS. */
1049          Int n = vreg_state[k]; /* k is the index of vregS */
1050          if (n == INVALID_RREG_NO) {
1051             /* vregS is not currently in a real register.  So we can't
1052                do the coalescing.  Give up. */
1053             goto cannot_coalesce;
1054          }
1055          vassert(IS_VALID_RREGNO(n));
1056 
1057          /* Finally, we can do the coalescing.  It's trivial -- merely
1058             claim vregS's register for vregD. */
1059          rreg_state[n].vreg = vregD;
1060          vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1061          vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1062          vreg_state[hregIndex(vregD)] = toShort(n);
1063          vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1064 
1065          /* This rreg has become associated with a different vreg and
1066             hence with a different spill slot.  Play safe. */
1067          rreg_state[n].eq_spill_slot = False;
1068 
1069          /* Move on to the next insn.  We skip the post-insn stuff for
1070             fixed registers, since this move should not interact with
1071             them in any way. */
1072          continue;
1073       }
1074      cannot_coalesce:
1075 
1076       /* ------ Free up rregs bound to dead vregs ------ */
1077 
1078       /* Look for vregs whose live range has just ended, and
1079 	 mark the associated rreg as free. */
1080 
1081       for (Int j = 0; j < n_rregs; j++) {
1082          if (rreg_state[j].disp != Bound)
1083             continue;
1084          UInt vregno = hregIndex(rreg_state[j].vreg);
1085          vassert(IS_VALID_VREGNO(vregno));
1086          if (vreg_lrs[vregno].dead_before <= ii) {
1087             rreg_state[j].disp = Free;
1088             rreg_state[j].eq_spill_slot = False;
1089             Int m = hregIndex(rreg_state[j].vreg);
1090             vassert(IS_VALID_VREGNO(m));
1091             vreg_state[m] = INVALID_RREG_NO;
1092             if (DEBUG_REGALLOC) {
1093                vex_printf("free up ");
1094                con->ppReg(con->univ->regs[j]);
1095                vex_printf("\n");
1096             }
1097          }
1098       }
1099 
1100       /* ------ Pre-instruction actions for fixed rreg uses ------ */
1101 
1102       /* Now we have to deal with rregs which are about to be made
1103          live by this instruction -- in other words, are entering into
1104          one of their live ranges.  If any such rreg holds a vreg, we
1105          will have to free up the rreg.  The simplest solution which
1106          is correct is to spill the rreg.
1107 
1108          Note we could do better:
1109          * Could move it into some other free rreg, if one is available
1110 
1111          Do this efficiently, by incrementally stepping along an array
1112          of rreg HLRs that are known to be sorted by start point
1113          (their .live_after field).
1114       */
1115       while (True) {
1116          vassert(rreg_lrs_la_next >= 0);
1117          vassert(rreg_lrs_la_next <= rreg_lrs_used);
1118          if (rreg_lrs_la_next == rreg_lrs_used)
1119             break; /* no more real reg live ranges to consider */
1120          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1121             break; /* next live range does not yet start */
1122          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1123          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1124             Find the associated rreg_state entry. */
1125          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1126             Real register live ranges are guaranteed to be well-formed
1127             in that they start with a write to the register -- Stage 2
1128             rejects any code not satisfying this.  So the correct
1129             question to ask is whether
1130             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1131             whether the reg becomes live after this insn -- rather
1132             than before it. */
1133          if (DEBUG_REGALLOC) {
1134             vex_printf("need to free up rreg: ");
1135             con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg);
1136             vex_printf("\n\n");
1137          }
1138          Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1139 
1140          /* If this fails, we don't have an entry for this rreg.
1141             Which we should. */
1142          vassert(IS_VALID_RREGNO(k));
1143          Int m = hregIndex(rreg_state[k].vreg);
1144          if (rreg_state[k].disp == Bound) {
1145             /* Yes, there is an associated vreg.  Spill it if it's
1146                still live. */
1147             vassert(IS_VALID_VREGNO(m));
1148             vreg_state[m] = INVALID_RREG_NO;
1149             if (vreg_lrs[m].dead_before > ii) {
1150                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1151                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1152                   HInstr* spill1 = NULL;
1153                   HInstr* spill2 = NULL;
1154                   con->genSpill(&spill1, &spill2, con->univ->regs[k],
1155                                 vreg_lrs[m].spill_offset, con->mode64);
1156                   vassert(spill1 || spill2); /* can't both be NULL */
1157                   if (spill1)
1158                      EMIT_INSTR(spill1);
1159                   if (spill2)
1160                      EMIT_INSTR(spill2);
1161                }
1162                rreg_state[k].eq_spill_slot = True;
1163             }
1164          }
1165          rreg_state[k].disp = Unavail;
1166          rreg_state[k].vreg = INVALID_HREG;
1167          rreg_state[k].eq_spill_slot = False;
1168 
1169          /* check for further rregs entering HLRs at this point */
1170          rreg_lrs_la_next++;
1171       }
1172 
1173       if (DEBUG_REGALLOC) {
1174          vex_printf("After pre-insn actions for fixed regs:\n");
1175          PRINT_STATE;
1176          vex_printf("\n");
1177       }
1178 
1179       /* ------ Deal with the current instruction. ------ */
1180 
1181       /* Finally we can begin the processing of this instruction
1182          itself.  The aim is to free up enough rregs for this insn.
1183          This may generate spill stores since we may have to evict
1184          some vregs currently in rregs.  Also generates spill loads.
1185          We also build up the final vreg->rreg mapping to be applied
1186          to the insn. */
1187 
1188       initHRegRemap(&remap);
1189 
1190       /* ------------ BEGIN directReload optimisation ----------- */
1191 
1192       /* If the instruction reads exactly one vreg which is currently
1193          in a spill slot, and this is last use of that vreg, see if we
1194          can convert the instruction into one that reads directly from
1195          the spill slot.  This is clearly only possible for x86 and
1196          amd64 targets, since ppc and arm are load-store
1197          architectures.  If successful, replace instrs_in->arr[ii]
1198          with this new instruction, and recompute its reg usage, so
1199          that the change is invisible to the standard-case handling
1200          that follows. */
1201 
1202       if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) {
1203          Bool  debug_direct_reload = False;
1204          HReg  cand     = INVALID_HREG;
1205          Bool  nreads   = 0;
1206          Short spilloff = 0;
1207 
1208          for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1209 
1210             HReg vreg = reg_usage_arr[ii].vRegs[j];
1211             vassert(hregIsVirtual(vreg));
1212 
1213             if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1214                nreads++;
1215                Int m = hregIndex(vreg);
1216                vassert(IS_VALID_VREGNO(m));
1217                Int k = vreg_state[m];
1218                if (!IS_VALID_RREGNO(k)) {
1219                   /* ok, it is spilled.  Now, is this its last use? */
1220                   vassert(vreg_lrs[m].dead_before >= ii+1);
1221                   if (vreg_lrs[m].dead_before == ii+1
1222                       && hregIsInvalid(cand)) {
1223                      spilloff = vreg_lrs[m].spill_offset;
1224                      cand = vreg;
1225                   }
1226                }
1227             }
1228          }
1229 
1230          if (nreads == 1 && ! hregIsInvalid(cand)) {
1231             HInstr* reloaded;
1232             if (reg_usage_arr[ii].n_vRegs == 2)
1233                vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1234                                   reg_usage_arr[ii].vRegs[1]));
1235 
1236             reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff);
1237             if (debug_direct_reload && !reloaded) {
1238                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1239                con->ppInstr(instrs_in->arr[ii], con->mode64);
1240             }
1241             if (reloaded) {
1242                /* Update info about the insn, so it looks as if it had
1243                   been in this form all along. */
1244                instrs_in->arr[ii] = reloaded;
1245                con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii],
1246                                 con->mode64);
1247                if (debug_direct_reload && !reloaded) {
1248                   vex_printf("  -->  ");
1249                   con->ppInstr(reloaded, con->mode64);
1250                }
1251             }
1252 
1253             if (debug_direct_reload && !reloaded)
1254                vex_printf("\n");
1255          }
1256 
1257       }
1258 
1259       /* ------------ END directReload optimisation ------------ */
1260 
1261       /* for each virtual reg mentioned in the insn ... */
1262       for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1263 
1264          HReg vreg = reg_usage_arr[ii].vRegs[j];
1265          vassert(hregIsVirtual(vreg));
1266 
1267          if (0) {
1268             vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
1269          }
1270 
1271          /* Now we're trying to find a rreg for "vreg".  First of all,
1272             if it already has an rreg assigned, we don't need to do
1273             anything more.  Inspect the current state to find out. */
1274          Int m = hregIndex(vreg);
1275          vassert(IS_VALID_VREGNO(m));
1276          Int n = vreg_state[m];
1277          if (IS_VALID_RREGNO(n)) {
1278             vassert(rreg_state[n].disp == Bound);
1279             addToHRegRemap(&remap, vreg, con->univ->regs[n]);
1280             /* If this rreg is written or modified, mark it as different
1281                from any spill slot value. */
1282             if (reg_usage_arr[ii].vMode[j] != HRmRead)
1283                rreg_state[n].eq_spill_slot = False;
1284             continue;
1285          } else {
1286             vassert(n == INVALID_RREG_NO);
1287          }
1288 
1289          /* No luck.  The next thing to do is see if there is a
1290             currently free rreg available, of the correct class.  If
1291             so, bag it.  NOTE, we could improve this by selecting an
1292             rreg for which the next live-range event is as far ahead
1293             as possible. */
1294          Int k_suboptimal = -1;
1295          Int k;
1296          for (k = con->univ->allocable_start[hregClass(vreg)];
1297               k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1298             if (rreg_state[k].disp != Free)
1299                continue;
1300             if (rreg_state[k].has_hlrs) {
1301                /* Well, at least we can use k_suboptimal if we really
1302                   have to.  Keep on looking for a better candidate. */
1303                k_suboptimal = k;
1304             } else {
1305                /* Found a preferable reg.  Use it. */
1306                k_suboptimal = -1;
1307                break;
1308             }
1309          }
1310          if (k_suboptimal >= 0)
1311             k = k_suboptimal;
1312 
1313          if (k <= con->univ->allocable_end[hregClass(vreg)]) {
1314             rreg_state[k].disp = Bound;
1315             rreg_state[k].vreg = vreg;
1316             Int p = hregIndex(vreg);
1317             vassert(IS_VALID_VREGNO(p));
1318             vreg_state[p] = toShort(k);
1319             addToHRegRemap(&remap, vreg, con->univ->regs[k]);
1320             /* Generate a reload if needed.  This only creates needed
1321                reloads because the live range builder for vregs will
1322                guarantee that the first event for a vreg is a write.
1323                Hence, if this reference is not a write, it cannot be
1324                the first reference for this vreg, and so a reload is
1325                indeed needed. */
1326             if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1327                vassert(vreg_lrs[p].reg_class != HRcINVALID);
1328                HInstr* reload1 = NULL;
1329                HInstr* reload2 = NULL;
1330                con->genReload(&reload1, &reload2, con->univ->regs[k],
1331                               vreg_lrs[p].spill_offset, con->mode64);
1332                vassert(reload1 || reload2); /* can't both be NULL */
1333                if (reload1)
1334                   EMIT_INSTR(reload1);
1335                if (reload2)
1336                   EMIT_INSTR(reload2);
1337                /* This rreg is read or modified by the instruction.
1338                   If it's merely read we can claim it now equals the
1339                   spill slot, but not so if it is modified. */
1340                if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1341                   rreg_state[k].eq_spill_slot = True;
1342                } else {
1343                   vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1344                   rreg_state[k].eq_spill_slot = False;
1345                }
1346             } else {
1347                rreg_state[k].eq_spill_slot = False;
1348             }
1349 
1350             continue;
1351          }
1352 
1353          /* Well, now we have no option but to spill a vreg.  It's
1354             important to make a good choice of vreg to spill, and of
1355             course we need to be careful not to spill a vreg which is
1356             needed by this insn. */
1357 
1358          /* First, mark in the rreg_state, those rregs which are not spill
1359             candidates, due to holding a vreg mentioned by this
1360             instruction.  Or being of the wrong class. */
1361          for (k = con->univ->allocable_start[hregClass(vreg)];
1362               k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1363             rreg_state[k].is_spill_cand = False;
1364             if (rreg_state[k].disp != Bound)
1365                continue;
1366             rreg_state[k].is_spill_cand = True;
1367             /* Note, the following loop visits only the virtual regs
1368                mentioned by the instruction. */
1369             for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1370                if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1371                   rreg_state[k].is_spill_cand = False;
1372                   break;
1373                }
1374             }
1375          }
1376 
1377          /* We can choose to spill any rreg satisfying
1378             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1379             the next use of its associated vreg is as far ahead as
1380             possible, in the hope that this will minimise the number
1381             of consequent reloads required. */
1382          Int spillee
1383             = findMostDistantlyMentionedVReg (
1384                  reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state,
1385                  hregClass(vreg), con);
1386 
1387          if (spillee == -1) {
1388             /* Hmmmmm.  There don't appear to be any spill candidates.
1389                We're hosed. */
1390             vex_printf("reg_alloc: can't find a register in class: ");
1391             ppHRegClass(hregClass(vreg));
1392             vex_printf("\n");
1393             vpanic("reg_alloc: can't create a free register.");
1394          }
1395 
1396          /* Right.  So we're going to spill rreg_state[spillee]. */
1397          vassert(IS_VALID_RREGNO(spillee));
1398          vassert(rreg_state[spillee].disp == Bound);
1399          /* check it's the right class */
1400          vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg));
1401          /* check we're not ejecting the vreg for which we are trying
1402             to free up a register. */
1403          vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1404 
1405          m = hregIndex(rreg_state[spillee].vreg);
1406          vassert(IS_VALID_VREGNO(m));
1407 
1408          /* So here's the spill store.  Assert that we're spilling a
1409             live vreg. */
1410          vassert(vreg_lrs[m].dead_before > ii);
1411          vassert(vreg_lrs[m].reg_class != HRcINVALID);
1412          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1413             HInstr* spill1 = NULL;
1414             HInstr* spill2 = NULL;
1415             con->genSpill(&spill1, &spill2, con->univ->regs[spillee],
1416                           vreg_lrs[m].spill_offset, con->mode64);
1417             vassert(spill1 || spill2); /* can't both be NULL */
1418             if (spill1)
1419                EMIT_INSTR(spill1);
1420             if (spill2)
1421                EMIT_INSTR(spill2);
1422          }
1423 
1424          /* Update the rreg_state to reflect the new assignment for this
1425             rreg. */
1426          rreg_state[spillee].vreg = vreg;
1427          vreg_state[m] = INVALID_RREG_NO;
1428 
1429          rreg_state[spillee].eq_spill_slot = False; /* be safe */
1430 
1431          m = hregIndex(vreg);
1432          vassert(IS_VALID_VREGNO(m));
1433          vreg_state[m] = toShort(spillee);
1434 
1435          /* Now, if this vreg is being read or modified (as opposed to
1436             written), we have to generate a reload for it. */
1437          if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1438             vassert(vreg_lrs[m].reg_class != HRcINVALID);
1439             HInstr* reload1 = NULL;
1440             HInstr* reload2 = NULL;
1441             con->genReload(&reload1, &reload2, con->univ->regs[spillee],
1442                            vreg_lrs[m].spill_offset, con->mode64);
1443             vassert(reload1 || reload2); /* can't both be NULL */
1444             if (reload1)
1445                EMIT_INSTR(reload1);
1446             if (reload2)
1447                EMIT_INSTR(reload2);
1448             /* This rreg is read or modified by the instruction.
1449                If it's merely read we can claim it now equals the
1450                spill slot, but not so if it is modified. */
1451             if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1452                rreg_state[spillee].eq_spill_slot = True;
1453             } else {
1454                vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1455                rreg_state[spillee].eq_spill_slot = False;
1456             }
1457          }
1458 
1459          /* So after much twisting and turning, we have vreg mapped to
1460             rreg_state[spillee].rreg.  Note that in the map. */
1461          addToHRegRemap(&remap, vreg, con->univ->regs[spillee]);
1462 
1463       } /* iterate over virtual registers in this instruction. */
1464 
1465       /* We've finished clowning around with registers in this instruction.
1466          Three results:
1467          - the running rreg_state[] has been updated
1468          - a suitable vreg->rreg mapping for this instruction has been
1469            constructed
1470          - spill and reload instructions may have been emitted.
1471 
1472         The final step is to apply the mapping to the instruction,
1473         and emit that.
1474       */
1475 
1476       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1477       con->mapRegs(&remap, instrs_in->arr[ii], con->mode64);
1478       EMIT_INSTR( instrs_in->arr[ii] );
1479 
1480       if (DEBUG_REGALLOC) {
1481          vex_printf("After dealing with current insn:\n");
1482          PRINT_STATE;
1483          vex_printf("\n");
1484       }
1485 
1486       /* ------ Post-instruction actions for fixed rreg uses ------ */
1487 
1488       /* Now we need to check for rregs exiting fixed live ranges
1489          after this instruction, and if so mark them as free. */
1490       while (True) {
1491          vassert(rreg_lrs_db_next >= 0);
1492          vassert(rreg_lrs_db_next <= rreg_lrs_used);
1493          if (rreg_lrs_db_next == rreg_lrs_used)
1494             break; /* no more real reg live ranges to consider */
1495          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1496             break; /* next live range does not yet start */
1497          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1498          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1499             range.  Mark it as such in the main rreg_state array. */
1500          HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1501          vassert(!hregIsVirtual(reg));
1502          Int k = hregIndex(reg);
1503          vassert(IS_VALID_RREGNO(k));
1504          vassert(rreg_state[k].disp == Unavail);
1505          rreg_state[k].disp = Free;
1506          rreg_state[k].vreg = INVALID_HREG;
1507          rreg_state[k].eq_spill_slot = False;
1508 
1509          /* check for further rregs leaving HLRs at this point */
1510          rreg_lrs_db_next++;
1511       }
1512 
1513       if (DEBUG_REGALLOC) {
1514          vex_printf("After post-insn actions for fixed regs:\n");
1515          PRINT_STATE;
1516          vex_printf("\n");
1517       }
1518 
1519    } /* iterate over insns */
1520 
1521    /* ------ END: Process each insn in turn. ------ */
1522 
1523    /* free(rreg_state); */
1524    /* free(rreg_lrs); */
1525    /* if (vreg_lrs) free(vreg_lrs); */
1526 
1527    /* Paranoia */
1528    vassert(rreg_lrs_la_next == rreg_lrs_used);
1529    vassert(rreg_lrs_db_next == rreg_lrs_used);
1530 
1531    return instrs_out;
1532 
1533 #  undef INVALID_INSTRNO
1534 #  undef EMIT_INSTR
1535 #  undef PRINT_STATE
1536 }
1537 
1538 
1539 
1540 /*---------------------------------------------------------------*/
1541 /*---                                       host_reg_alloc2.c ---*/
1542 /*---------------------------------------------------------------*/
1543