/*---------------------------------------------------------------*/ /*--- begin host_reg_alloc2.c ---*/ /*---------------------------------------------------------------*/ /* This file is part of Valgrind, a dynamic binary instrumentation framework. Copyright (C) 2004-2015 OpenWorks LLP info@open-works.net This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. The GNU General Public License is contained in the file COPYING. Neither the names of the U.S. Department of Energy nor the University of California nor the names of its contributors may be used to endorse or promote products derived from this software without prior written permission. */ #include "libvex_basictypes.h" #include "libvex.h" #include "main_util.h" #include "host_generic_regs.h" /* Set to 1 for lots of debugging output. */ #define DEBUG_REGALLOC 0 /* TODO 27 Oct 04: Better consistency checking from what isMove tells us. We can possibly do V-V coalescing even when the src is spilled, providing we can arrange for the dst to have the same spill slot. Note that state[].hreg is the same as the available real regs. Generally rationalise data structures. */ /* Records information on virtual register live ranges. Computed once and remains unchanged after that. */ typedef struct { /* Becomes live for the first time after this insn ... */ Short live_after; /* Becomes dead for the last time before this insn ... */ Short dead_before; /* The "home" spill slot, if needed. Never changes. */ Short spill_offset; Short spill_size; /* What kind of register this is. */ HRegClass reg_class; } VRegLR; /* Records information on real-register live ranges. Computed once and remains unchanged after that. */ typedef struct { HReg rreg; /* Becomes live after this insn ... */ Short live_after; /* Becomes dead before this insn ... */ Short dead_before; } RRegLR; /* An array of the following structs (rreg_state) comprises the running state of the allocator. It indicates what the current disposition of each allocatable real register is. The array gets updated as the allocator processes instructions. The identity of the register is not recorded here, because the index of this structure in doRegisterAllocation()'s |rreg_state| is the index number of the register, and the register itself can be extracted from the RRegUniverse supplied to doRegisterAllocation(). */ typedef struct { /* ------ FIELDS WHICH DO NOT CHANGE ------ */ /* Is this involved in any HLRs? (only an optimisation hint) */ Bool has_hlrs; /* ------ FIELDS WHICH DO CHANGE ------ */ /* 6 May 07: rearranged fields below so the whole struct fits into 16 bytes on both x86 and amd64. */ /* Used when .disp == Bound and we are looking for vregs to spill. */ Bool is_spill_cand; /* Optimisation: used when .disp == Bound. Indicates when the rreg has the same value as the spill slot for the associated vreg. Is safely left at False, and becomes True after a spill store or reload for this rreg. */ Bool eq_spill_slot; /* What's it's current disposition? */ enum { Free, /* available for use */ Unavail, /* in a real-reg live range */ Bound /* in use (holding value of some vreg) */ } disp; /* If .disp == Bound, what vreg is it bound to? */ HReg vreg; } RRegState; /* The allocator also maintains a redundant array of indexes (vreg_state) from vreg numbers back to entries in rreg_state. It is redundant because iff vreg_state[i] == j then hregNumber(rreg_state[j].vreg) == i -- that is, the two entries point at each other. The purpose of this is to speed up activities which involve looking for a particular vreg: there is no need to scan the rreg_state looking for it, just index directly into vreg_state. The FAQ "does this vreg already have an associated rreg" is the main beneficiary. To indicate, in vreg_state[i], that a given vreg is not currently associated with any rreg, that entry can be set to INVALID_RREG_NO. Because the vreg_state entries are signed Shorts, the max number of vregs that can be handed by regalloc is 32767. */ #define INVALID_RREG_NO ((Short)(-1)) #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) /* Search forward from some given point in the incoming instruction sequence. Point is to select a virtual register to spill, by finding the vreg which is mentioned as far ahead as possible, in the hope that this will minimise the number of consequent reloads. Only do the search for vregs which are Bound in the running state, and for which the .is_spill_cand field is set. This allows the caller to arbitrarily restrict the set of spill candidates to be considered. To do this we don't actually need to see the incoming instruction stream. Rather, what we need us the HRegUsage records for the incoming instruction stream. Hence that is passed in. Returns an index into the state array indicating the (v,r) pair to spill, or -1 if none was found. */ static Int findMostDistantlyMentionedVReg ( HRegUsage* reg_usages_in, Int search_from_instr, Int num_instrs, RRegState* state, Int n_state ) { Int k, m; Int furthest_k = -1; Int furthest = -1; vassert(search_from_instr >= 0); for (k = 0; k < n_state; k++) { if (!state[k].is_spill_cand) continue; vassert(state[k].disp == Bound); for (m = search_from_instr; m < num_instrs; m++) { if (HRegUsage__contains(®_usages_in[m], state[k].vreg)) break; } if (m > furthest) { furthest = m; furthest_k = k; } } return furthest_k; } /* Check that this vreg has been assigned a sane spill offset. */ inline static void sanity_check_spill_offset ( VRegLR* vreg ) { switch (vreg->reg_class) { case HRcVec128: case HRcFlt64: vassert(0 == ((UShort)vreg->spill_offset % 16)); break; default: vassert(0 == ((UShort)vreg->spill_offset % 8)); break; } } /* Double the size of the real-reg live-range array, if needed. */ __attribute__((noinline)) static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used ) { Int k; RRegLR* arr2; if (0) vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); vassert(used == *size); arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR)); for (k = 0; k < *size; k++) arr2[k] = (*info)[k]; *size *= 2; *info = arr2; } inline static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) { if (LIKELY(used < *size)) return; ensureRRLRspace_SLOW(info, size, used); } /* Sort an array of RRegLR entries by either the .live_after or .dead_before fields. This is performance-critical. */ static void sortRRLRarray ( RRegLR* arr, Int size, Bool by_live_after ) { Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 }; Int lo = 0; Int hi = size-1; Int i, j, h, bigN, hp; RRegLR v; vassert(size >= 0); if (size == 0) return; bigN = hi - lo + 1; if (bigN < 2) return; hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; if (by_live_after) { for ( ; hp >= 0; hp--) { h = incs[hp]; for (i = lo + h; i <= hi; i++) { v = arr[i]; j = i; while (arr[j-h].live_after > v.live_after) { arr[j] = arr[j-h]; j = j - h; if (j <= (lo + h - 1)) break; } arr[j] = v; } } } else { for ( ; hp >= 0; hp--) { h = incs[hp]; for (i = lo + h; i <= hi; i++) { v = arr[i]; j = i; while (arr[j-h].dead_before > v.dead_before) { arr[j] = arr[j-h]; j = j - h; if (j <= (lo + h - 1)) break; } arr[j] = v; } } } } /* Compute the index of the highest and lowest 1 in a ULong, respectively. Results are undefined if the argument is zero. Don't pass it zero :) */ static inline UInt ULong__maxIndex ( ULong w64 ) { return 63 - __builtin_clzll(w64); } static inline UInt ULong__minIndex ( ULong w64 ) { return __builtin_ctzll(w64); } /* Vectorised memset, copied from Valgrind's m_libcbase.c. */ static void* local_memset ( void *destV, Int c, SizeT sz ) { # define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) UInt c4; UChar* d = destV; UChar uc = c; while ((!IS_4_ALIGNED(d)) && sz >= 1) { d[0] = uc; d++; sz--; } if (sz == 0) return destV; c4 = uc; c4 |= (c4 << 8); c4 |= (c4 << 16); while (sz >= 16) { ((UInt*)d)[0] = c4; ((UInt*)d)[1] = c4; ((UInt*)d)[2] = c4; ((UInt*)d)[3] = c4; d += 16; sz -= 16; } while (sz >= 4) { ((UInt*)d)[0] = c4; d += 4; sz -= 4; } while (sz >= 1) { d[0] = c; d++; sz--; } return destV; # undef IS_4_ALIGNED } /* A target-independent register allocator. Requires various functions which it uses to deal abstractly with instructions and registers, since it cannot have any target-specific knowledge. Returns a new list of instructions, which, as a result of the behaviour of mapRegs, will be in-place modifications of the original instructions. Requires that the incoming code has been generated using vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked run-time error. Takes an expandable array of pointers to unallocated insns. Returns an expandable array of pointers to allocated insns. */ HInstrArray* doRegisterAllocation ( /* Incoming virtual-registerised code. */ HInstrArray* instrs_in, /* The real-register universe to use. This contains facts about real registers, one of which is the set of registers available for allocation. */ const RRegUniverse* univ, /* Return True iff the given insn is a reg-reg move, in which case also return the src and dst regs. */ Bool (*isMove) ( const HInstr*, HReg*, HReg* ), /* Get info about register usage in this insn. */ void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), /* Apply a reg-reg mapping to an insn. */ void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), /* Return one, or, if we're unlucky, two insn(s) to spill/restore a real reg to a spill slot byte offset. The two leading HInstr** args are out parameters, through which the generated insns are returned. Also (optionally) a 'directReload' function, which attempts to replace a given instruction by one which reads directly from a specified spill slot. May be NULL, in which case the optimisation is not attempted. */ void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), HInstr* (*directReload) ( HInstr*, HReg, Short ), Int guest_sizeB, /* For debug printing only. */ void (*ppInstr) ( const HInstr*, Bool ), void (*ppReg) ( HReg ), /* 32/64bit mode */ Bool mode64 ) { # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) const Bool eq_spill_opt = True; /* Info on vregs and rregs. Computed once and remains unchanged. */ Int n_vregs; VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ /* We keep two copies of the real-reg live range info, one sorted by .live_after and the other by .dead_before. First the unsorted info is created in the _la variant is copied into the _db variant. Once that's done both of them are sorted. We also need two integer cursors which record the next location in the two arrays to consider. */ RRegLR* rreg_lrs_la; RRegLR* rreg_lrs_db; Int rreg_lrs_size; Int rreg_lrs_used; Int rreg_lrs_la_next; Int rreg_lrs_db_next; /* Info on register usage in the incoming instruction array. Computed once and remains unchanged, more or less; updated sometimes by the direct-reload optimisation. */ HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */ /* Used when constructing vreg_lrs (for allocating stack slots). */ Short ss_busy_until_before[N_SPILL64S]; /* Used when constructing rreg_lrs. */ Int* rreg_live_after; Int* rreg_dead_before; /* Running state of the core allocation algorithm. */ RRegState* rreg_state; /* [0 .. n_rregs-1] */ Int n_rregs; /* .. and the redundant backward map */ /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. This inplies n_rregs must be <= 32768. */ Short* vreg_state; /* [0 .. n_vregs-1] */ /* The vreg -> rreg map constructed and then applied to each instr. */ HRegRemap remap; /* The output array of instructions. */ HInstrArray* instrs_out; /* Sanity checks are expensive. They are only done periodically, not at each insn processed. */ Bool do_sanity_check; vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (N_SPILL64S % 2)); /* The live range numbers are signed shorts, and so limiting the number of insns to 15000 comfortably guards against them overflowing 32k. */ vassert(instrs_in->arr_used <= 15000); # define INVALID_INSTRNO (-2) # define EMIT_INSTR(_instr) \ do { \ HInstr* _tmp = (_instr); \ if (DEBUG_REGALLOC) { \ vex_printf("** "); \ (*ppInstr)(_tmp, mode64); \ vex_printf("\n\n"); \ } \ addHInstr ( instrs_out, _tmp ); \ } while (0) # define PRINT_STATE \ do { \ Int z, q; \ for (z = 0; z < n_rregs; z++) { \ vex_printf(" rreg_state[%2d] = ", z); \ (*ppReg)(univ->regs[z]); \ vex_printf(" \t"); \ switch (rreg_state[z].disp) { \ case Free: vex_printf("Free\n"); break; \ case Unavail: vex_printf("Unavail\n"); break; \ case Bound: vex_printf("BoundTo "); \ (*ppReg)(rreg_state[z].vreg); \ vex_printf("\n"); break; \ } \ } \ vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ q = 0; \ for (z = 0; z < n_vregs; z++) { \ if (vreg_state[z] == INVALID_RREG_NO) \ continue; \ vex_printf("[%d] -> %d ", z, vreg_state[z]); \ q++; \ if (q > 0 && (q % 6) == 0) \ vex_printf("\n "); \ } \ vex_printf("\n"); \ } while (0) /* --------- Stage 0: set up output array --------- */ /* --------- and allocate/initialise running state. --------- */ instrs_out = newHInstrArray(); /* ... and initialise running state. */ /* n_rregs is no more than a short name for n_available_real_regs. */ n_rregs = univ->allocable; n_vregs = instrs_in->n_vregs; /* If this is not so, vreg_state entries will overflow. */ vassert(n_vregs < 32767); /* If this is not so, the universe we have is nonsensical. */ vassert(n_rregs > 0); rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short)); for (Int j = 0; j < n_rregs; j++) { rreg_state[j].has_hlrs = False; rreg_state[j].disp = Free; rreg_state[j].vreg = INVALID_HREG; rreg_state[j].is_spill_cand = False; rreg_state[j].eq_spill_slot = False; } for (Int j = 0; j < n_vregs; j++) vreg_state[j] = INVALID_RREG_NO; /* --------- Stage 1: compute vreg live ranges. --------- */ /* --------- Stage 2: compute rreg live ranges. --------- */ /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ /* This is relatively simple, because (1) we only seek the complete end-to-end live range of each vreg, and are not interested in any holes in it, and (2) the vregs are conveniently numbered 0 .. n_vregs-1, so we can just dump the results in a pre-allocated array. */ vreg_lrs = NULL; if (n_vregs > 0) vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs); for (Int j = 0; j < n_vregs; j++) { vreg_lrs[j].live_after = INVALID_INSTRNO; vreg_lrs[j].dead_before = INVALID_INSTRNO; vreg_lrs[j].spill_offset = 0; vreg_lrs[j].spill_size = 0; vreg_lrs[j].reg_class = HRcINVALID; } /* An array to hold the reg-usage info for the incoming instructions. */ reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1); /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ /* This is more complex than Stage 1, because we need to compute exactly all the live ranges of all the allocatable real regs, and we don't know in advance how many there will be. */ rreg_lrs_used = 0; rreg_lrs_size = 4; rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR)); rreg_lrs_db = NULL; /* we'll create this later */ /* We'll need to track live range start/end points seperately for each rreg. Sigh. */ vassert(n_rregs > 0); rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); for (Int j = 0; j < n_rregs; j++) { rreg_live_after[j] = rreg_dead_before[j] = INVALID_INSTRNO; } /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ /* ------ start of ITERATE OVER INSNS ------ */ for (Int ii = 0; ii < instrs_in->arr_used; ii++) { (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); if (0) { vex_printf("\n%d stage1: ", ii); (*ppInstr)(instrs_in->arr[ii], mode64); vex_printf("\n"); ppHRegUsage(univ, ®_usage_arr[ii]); } /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ /* for each virtual reg mentioned in the insn ... */ for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { HReg vreg = reg_usage_arr[ii].vRegs[j]; vassert(hregIsVirtual(vreg)); Int k = hregIndex(vreg); if (k < 0 || k >= n_vregs) { vex_printf("\n"); (*ppInstr)(instrs_in->arr[ii], mode64); vex_printf("\n"); vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); vpanic("doRegisterAllocation: out-of-range vreg"); } /* Take the opportunity to note its regclass. We'll need that when allocating spill slots. */ if (vreg_lrs[k].reg_class == HRcINVALID) { /* First mention of this vreg. */ vreg_lrs[k].reg_class = hregClass(vreg); } else { /* Seen it before, so check for consistency. */ vassert(vreg_lrs[k].reg_class == hregClass(vreg)); } /* Now consider live ranges. */ switch (reg_usage_arr[ii].vMode[j]) { case HRmRead: if (vreg_lrs[k].live_after == INVALID_INSTRNO) { vex_printf("\n\nOFFENDING VREG = %d\n", k); vpanic("doRegisterAllocation: " "first event for vreg is Read"); } vreg_lrs[k].dead_before = toShort(ii + 1); break; case HRmWrite: if (vreg_lrs[k].live_after == INVALID_INSTRNO) vreg_lrs[k].live_after = toShort(ii); vreg_lrs[k].dead_before = toShort(ii + 1); break; case HRmModify: if (vreg_lrs[k].live_after == INVALID_INSTRNO) { vex_printf("\n\nOFFENDING VREG = %d\n", k); vpanic("doRegisterAllocation: " "first event for vreg is Modify"); } vreg_lrs[k].dead_before = toShort(ii + 1); break; default: vpanic("doRegisterAllocation(1)"); } /* switch */ } /* iterate over virtual registers */ /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ /* If this doesn't hold, the following iteration over real registers will fail miserably. */ vassert(N_RREGUNIVERSE_REGS == 64); const ULong rRead = reg_usage_arr[ii].rRead; const ULong rWritten = reg_usage_arr[ii].rWritten; const ULong rMentioned = rRead | rWritten; UInt rReg_minIndex; UInt rReg_maxIndex; if (rMentioned == 0) { /* There are no real register uses in this insn. Set rReg_{min,max}Index so that the following loop doesn't iterate at all, so as to avoid wasting time. */ rReg_minIndex = 1; rReg_maxIndex = 0; } else { rReg_minIndex = ULong__minIndex(rMentioned); rReg_maxIndex = ULong__maxIndex(rMentioned); /* Don't bother to look at registers which are not available to the allocator. We asserted above that n_rregs > 0, so n_rregs-1 is safe. */ if (rReg_maxIndex >= n_rregs) rReg_maxIndex = n_rregs-1; } /* for each allocator-available real reg mentioned in the insn ... */ /* Note. We are allocating only over the real regs available to the allocator. Others, eg the stack or baseblock pointers, are unavailable to allocation and so we never visit them. Hence the iteration is cut off at n_rregs-1, since n_rregs == univ->allocable. */ for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) { const ULong jMask = 1ULL << j; if (LIKELY((rMentioned & jMask) == 0)) continue; const Bool isR = (rRead & jMask) != 0; const Bool isW = (rWritten & jMask) != 0; /* Dummy initialisations of flush_la and flush_db to avoid possible bogus uninit-var warnings from gcc. */ Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; Bool flush = False; if (isW && !isR) { flush_la = rreg_live_after[j]; flush_db = rreg_dead_before[j]; if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO) flush = True; rreg_live_after[j] = ii; rreg_dead_before[j] = ii+1; } else if (!isW && isR) { if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); (*ppReg)(univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); (*ppInstr)(instrs_in->arr[ii], mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Read"); } rreg_dead_before[j] = ii+1; } else { vassert(isR && isW); if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); (*ppReg)(univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); (*ppInstr)(instrs_in->arr[ii], mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Modify"); } rreg_dead_before[j] = ii+1; } if (flush) { vassert(flush_la != INVALID_INSTRNO); vassert(flush_db != INVALID_INSTRNO); ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); if (0) vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); rreg_lrs_used++; } } /* iterate over rregs in the instr */ /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ } /* iterate over insns */ /* ------ end of ITERATE OVER INSNS ------ */ /* ------ start of FINALISE RREG LIVE RANGES ------ */ /* Now finish up any live ranges left over. */ for (Int j = 0; j < n_rregs; j++) { if (0) { vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], rreg_dead_before[j]); } vassert( (rreg_live_after[j] == INVALID_INSTRNO && rreg_dead_before[j] == INVALID_INSTRNO) || (rreg_live_after[j] != INVALID_INSTRNO && rreg_dead_before[j] != INVALID_INSTRNO) ); if (rreg_live_after[j] == INVALID_INSTRNO) continue; ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); if (0) vex_printf("FLUSH 2 (%d,%d)\n", rreg_live_after[j], rreg_dead_before[j]); rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); rreg_lrs_used++; } /* Compute summary hints for choosing real regs. If a real reg is involved in a hard live range, record that fact in the fixed part of the running rreg_state. Later, when offered a choice between rregs, it's better to choose one which is not marked as having any HLRs, since ones with HLRs may need to be spilled around their HLRs. Correctness of final assignment is unaffected by this mechanism -- it is only an optimisation. */ for (Int j = 0; j < rreg_lrs_used; j++) { HReg rreg = rreg_lrs_la[j].rreg; vassert(!hregIsVirtual(rreg)); /* rreg is involved in a HLR. Record this info in the array, if there is space. */ UInt ix = hregIndex(rreg); vassert(ix < n_rregs); rreg_state[ix].has_hlrs = True; } if (0) { for (Int j = 0; j < n_rregs; j++) { if (!rreg_state[j].has_hlrs) continue; ppReg(univ->regs[j]); vex_printf(" hinted\n"); } } /* Finally, copy the _la variant into the _db variant and sort both by their respective fields. */ rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR)); for (Int j = 0; j < rreg_lrs_used; j++) rreg_lrs_db[j] = rreg_lrs_la[j]; sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); /* And set up the cursors. */ rreg_lrs_la_next = 0; rreg_lrs_db_next = 0; for (Int j = 1; j < rreg_lrs_used; j++) { vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); } /* ------ end of FINALISE RREG LIVE RANGES ------ */ if (DEBUG_REGALLOC) { for (Int j = 0; j < n_vregs; j++) { vex_printf("vreg %d: la = %d, db = %d\n", j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); } } if (DEBUG_REGALLOC) { vex_printf("RRegLRs by LA:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); (*ppReg)(rreg_lrs_la[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); } vex_printf("RRegLRs by DB:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); (*ppReg)(rreg_lrs_db[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); } } /* --------- Stage 3: allocate spill slots. --------- */ /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits to spill (classes Flt64 and Vec128), we have to allocate two consecutive spill slots. For 256 bit registers (class Vec256), we have to allocate four consecutive spill slots. For Vec128-class on PowerPC, the spill slot's actual address must be 16-byte aligned. Since the spill slot's address is computed as an offset from the guest state pointer, and since the user of the generated code must set that pointer to a 32-aligned value, we have the residual obligation here of choosing a 16-aligned spill slot offset for Vec128-class values. Since each spill slot is 8 bytes long, that means for Vec128-class values we must allocated a spill slot number which is zero mod 2. Similarly, for Vec256 class on amd64, find a spill slot number which is zero mod 4. This guarantees it will be 32 byte aligned, which isn't actually necessary on amd64 (we use movUpd etc to spill), but seems like good practice. Do a rank-based allocation of vregs to spill slot numbers. We put as few values as possible in spill slots, but nevertheless need to have a spill slot available for all vregs, just in case. */ /* Int max_ss_no = -1; */ local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); for (Int j = 0; j < n_vregs; j++) { /* True iff this vreg is unused. In which case we also expect that the reg_class field for it has not been set. */ if (vreg_lrs[j].live_after == INVALID_INSTRNO) { vassert(vreg_lrs[j].reg_class == HRcINVALID); continue; } /* The spill slots are 64 bits in size. As per the comment on definition of HRegClass in host_generic_regs.h, that means, to spill a vreg of class Flt64 or Vec128, we'll need to find two adjacent spill slots to use. For Vec256, we'll need to find four adjacent slots to use. Note, this logic needs to kept in sync with the size info on the definition of HRegClass. */ Int ss_no = -1; switch (vreg_lrs[j].reg_class) { case HRcVec128: case HRcFlt64: /* Find two adjacent free slots in which between them provide up to 128 bits in which to spill the vreg. Since we are trying to find an even:odd pair, move along in steps of 2 (slots). */ for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2) if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after) break; if (ss_no >= N_SPILL64S-1) { vpanic("LibVEX_N_SPILL_BYTES is too low. " "Increase and recompile."); } ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before; ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before; break; default: /* The ordinary case -- just find a single spill slot. */ /* Find the lowest-numbered spill slot which is available at the start point of this interval, and assign the interval to it. */ for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after) break; if (ss_no == N_SPILL64S) { vpanic("LibVEX_N_SPILL_BYTES is too low. " "Increase and recompile."); } ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before; break; } /* switch (vreg_lrs[j].reg_class) */ /* This reflects LibVEX's hard-wired knowledge of the baseBlock layout: the guest state, then two equal sized areas following it for two sets of shadow state, and then the spill area. */ vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); /* Independent check that we've made a sane choice of slot */ sanity_check_spill_offset( &vreg_lrs[j] ); /* if (j > max_ss_no) */ /* max_ss_no = j; */ } if (0) { vex_printf("\n\n"); for (Int j = 0; j < n_vregs; j++) vex_printf("vreg %d --> spill offset %d\n", j, vreg_lrs[j].spill_offset); } /* --------- Stage 4: establish rreg preferences --------- */ /* It may be advantageous to allocating certain vregs to specific rregs, as a way of avoiding reg-reg moves later. Here we establish which, if any, rreg each vreg would prefer to be in. Note that this constrains the allocator -- ideally we end up with as few as possible vregs expressing a preference. This is an optimisation: if the .preferred_rreg field is never set to anything different from INVALID_HREG, the allocator still works. */ /* 30 Dec 04: removed this mechanism as it does not seem to help. */ /* --------- Stage 5: process instructions --------- */ /* This is the main loop of the allocator. First, we need to correctly set up our running state, which tracks the status of each real register. */ /* ------ BEGIN: Process each insn in turn. ------ */ for (Int ii = 0; ii < instrs_in->arr_used; ii++) { if (DEBUG_REGALLOC) { vex_printf("\n====----====---- Insn %d ----====----====\n", ii); vex_printf("---- "); (*ppInstr)(instrs_in->arr[ii], mode64); vex_printf("\n\nInitial state:\n"); PRINT_STATE; vex_printf("\n"); } /* ------------ Sanity checks ------------ */ /* Sanity checks are expensive. So they are done only once every 13 instructions, and just before the last instruction. */ do_sanity_check = toBool( False /* Set to True for sanity checking of all insns. */ || ii == instrs_in->arr_used-1 || (ii > 0 && (ii % 13) == 0) ); if (do_sanity_check) { /* Sanity check 1: all rregs with a hard live range crossing this insn must be marked as unavailable in the running state. */ for (Int j = 0; j < rreg_lrs_used; j++) { if (rreg_lrs_la[j].live_after < ii && ii < rreg_lrs_la[j].dead_before) { /* ii is the middle of a hard live range for some real reg. Check it's marked as such in the running state. */ HReg reg = rreg_lrs_la[j].rreg; if (0) { vex_printf("considering la %d .. db %d reg = ", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before); (*ppReg)(reg); vex_printf("\n"); } /* assert that this rreg is marked as unavailable */ vassert(!hregIsVirtual(reg)); vassert(rreg_state[hregIndex(reg)].disp == Unavail); } } /* Sanity check 2: conversely, all rregs marked as unavailable in the running rreg_state must have a corresponding hard live range entry in the rreg_lrs array. */ for (Int j = 0; j < n_rregs; j++) { vassert(rreg_state[j].disp == Bound || rreg_state[j].disp == Free || rreg_state[j].disp == Unavail); if (rreg_state[j].disp != Unavail) continue; Int k; for (k = 0; k < rreg_lrs_used; k++) { HReg reg = rreg_lrs_la[k].rreg; vassert(!hregIsVirtual(reg)); if (hregIndex(reg) == j && rreg_lrs_la[k].live_after < ii && ii < rreg_lrs_la[k].dead_before) break; } /* If this vassertion fails, we couldn't find a corresponding HLR. */ vassert(k < rreg_lrs_used); } /* Sanity check 3: all vreg-rreg bindings must bind registers of the same class. */ for (Int j = 0; j < n_rregs; j++) { if (rreg_state[j].disp != Bound) { vassert(rreg_state[j].eq_spill_slot == False); continue; } vassert(hregClass(univ->regs[j]) == hregClass(rreg_state[j].vreg)); vassert( hregIsVirtual(rreg_state[j].vreg)); } /* Sanity check 4: the vreg_state and rreg_state mutually-redundant mappings are consistent. If rreg_state[j].vreg points at some vreg_state entry then that vreg_state entry should point back at rreg_state[j]. */ for (Int j = 0; j < n_rregs; j++) { if (rreg_state[j].disp != Bound) continue; Int k = hregIndex(rreg_state[j].vreg); vassert(IS_VALID_VREGNO(k)); vassert(vreg_state[k] == j); } for (Int j = 0; j < n_vregs; j++) { Int k = vreg_state[j]; if (k == INVALID_RREG_NO) continue; vassert(IS_VALID_RREGNO(k)); vassert(rreg_state[k].disp == Bound); vassert(hregIndex(rreg_state[k].vreg) == j); } } /* if (do_sanity_check) */ /* ------------ end of Sanity checks ------------ */ /* Do various optimisations pertaining to register coalescing and preferencing: MOV v <-> v coalescing (done here). MOV v <-> r coalescing (not yet, if ever) */ /* If doing a reg-reg move between two vregs, and the src's live range ends here and the dst's live range starts here, bind the dst to the src's rreg, and that's all. */ HReg vregS = INVALID_HREG; HReg vregD = INVALID_HREG; if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { if (!hregIsVirtual(vregS)) goto cannot_coalesce; if (!hregIsVirtual(vregD)) goto cannot_coalesce; /* Check that *isMove is not telling us a bunch of lies ... */ vassert(hregClass(vregS) == hregClass(vregD)); Int k = hregIndex(vregS); Int m = hregIndex(vregD); vassert(IS_VALID_VREGNO(k)); vassert(IS_VALID_VREGNO(m)); if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; if (DEBUG_REGALLOC) { vex_printf("COALESCE "); (*ppReg)(vregS); vex_printf(" -> "); (*ppReg)(vregD); vex_printf("\n\n"); } /* Find the state entry for vregS. */ Int n = vreg_state[k]; /* k is the index of vregS */ if (n == INVALID_RREG_NO) { /* vregS is not currently in a real register. So we can't do the coalescing. Give up. */ goto cannot_coalesce; } vassert(IS_VALID_RREGNO(n)); /* Finally, we can do the coalescing. It's trivial -- merely claim vregS's register for vregD. */ rreg_state[n].vreg = vregD; vassert(IS_VALID_VREGNO(hregIndex(vregD))); vassert(IS_VALID_VREGNO(hregIndex(vregS))); vreg_state[hregIndex(vregD)] = toShort(n); vreg_state[hregIndex(vregS)] = INVALID_RREG_NO; /* This rreg has become associated with a different vreg and hence with a different spill slot. Play safe. */ rreg_state[n].eq_spill_slot = False; /* Move on to the next insn. We skip the post-insn stuff for fixed registers, since this move should not interact with them in any way. */ continue; } cannot_coalesce: /* ------ Free up rregs bound to dead vregs ------ */ /* Look for vregs whose live range has just ended, and mark the associated rreg as free. */ for (Int j = 0; j < n_rregs; j++) { if (rreg_state[j].disp != Bound) continue; UInt vregno = hregIndex(rreg_state[j].vreg); vassert(IS_VALID_VREGNO(vregno)); if (vreg_lrs[vregno].dead_before <= ii) { rreg_state[j].disp = Free; rreg_state[j].eq_spill_slot = False; Int m = hregIndex(rreg_state[j].vreg); vassert(IS_VALID_VREGNO(m)); vreg_state[m] = INVALID_RREG_NO; if (DEBUG_REGALLOC) { vex_printf("free up "); (*ppReg)(univ->regs[j]); vex_printf("\n"); } } } /* ------ Pre-instruction actions for fixed rreg uses ------ */ /* Now we have to deal with rregs which are about to be made live by this instruction -- in other words, are entering into one of their live ranges. If any such rreg holds a vreg, we will have to free up the rreg. The simplest solution which is correct is to spill the rreg. Note we could do better: * Could move it into some other free rreg, if one is available Do this efficiently, by incrementally stepping along an array of rreg HLRs that are known to be sorted by start point (their .live_after field). */ while (True) { vassert(rreg_lrs_la_next >= 0); vassert(rreg_lrs_la_next <= rreg_lrs_used); if (rreg_lrs_la_next == rreg_lrs_used) break; /* no more real reg live ranges to consider */ if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) break; /* next live range does not yet start */ vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. Find the associated rreg_state entry. */ /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. Real register live ranges are guaranteed to be well-formed in that they start with a write to the register -- Stage 2 rejects any code not satisfying this. So the correct question to ask is whether rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, whether the reg becomes live after this insn -- rather than before it. */ if (DEBUG_REGALLOC) { vex_printf("need to free up rreg: "); (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); vex_printf("\n\n"); } Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); /* If this fails, we don't have an entry for this rreg. Which we should. */ vassert(IS_VALID_RREGNO(k)); Int m = hregIndex(rreg_state[k].vreg); if (rreg_state[k].disp == Bound) { /* Yes, there is an associated vreg. Spill it if it's still live. */ vassert(IS_VALID_VREGNO(m)); vreg_state[m] = INVALID_RREG_NO; if (vreg_lrs[m].dead_before > ii) { vassert(vreg_lrs[m].reg_class != HRcINVALID); if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; (*genSpill)( &spill1, &spill2, univ->regs[k], vreg_lrs[m].spill_offset, mode64 ); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); if (spill2) EMIT_INSTR(spill2); } rreg_state[k].eq_spill_slot = True; } } rreg_state[k].disp = Unavail; rreg_state[k].vreg = INVALID_HREG; rreg_state[k].eq_spill_slot = False; /* check for further rregs entering HLRs at this point */ rreg_lrs_la_next++; } if (DEBUG_REGALLOC) { vex_printf("After pre-insn actions for fixed regs:\n"); PRINT_STATE; vex_printf("\n"); } /* ------ Deal with the current instruction. ------ */ /* Finally we can begin the processing of this instruction itself. The aim is to free up enough rregs for this insn. This may generate spill stores since we may have to evict some vregs currently in rregs. Also generates spill loads. We also build up the final vreg->rreg mapping to be applied to the insn. */ initHRegRemap(&remap); /* ------------ BEGIN directReload optimisation ----------- */ /* If the instruction reads exactly one vreg which is currently in a spill slot, and this is last use of that vreg, see if we can convert the instruction into one that reads directly from the spill slot. This is clearly only possible for x86 and amd64 targets, since ppc and arm are load-store architectures. If successful, replace instrs_in->arr[ii] with this new instruction, and recompute its reg usage, so that the change is invisible to the standard-case handling that follows. */ if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { Bool debug_direct_reload = False; HReg cand = INVALID_HREG; Bool nreads = 0; Short spilloff = 0; for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { HReg vreg = reg_usage_arr[ii].vRegs[j]; vassert(hregIsVirtual(vreg)); if (reg_usage_arr[ii].vMode[j] == HRmRead) { nreads++; Int m = hregIndex(vreg); vassert(IS_VALID_VREGNO(m)); Int k = vreg_state[m]; if (!IS_VALID_RREGNO(k)) { /* ok, it is spilled. Now, is this its last use? */ vassert(vreg_lrs[m].dead_before >= ii+1); if (vreg_lrs[m].dead_before == ii+1 && hregIsInvalid(cand)) { spilloff = vreg_lrs[m].spill_offset; cand = vreg; } } } } if (nreads == 1 && ! hregIsInvalid(cand)) { HInstr* reloaded; if (reg_usage_arr[ii].n_vRegs == 2) vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], reg_usage_arr[ii].vRegs[1])); reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); if (debug_direct_reload && !reloaded) { vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); ppInstr(instrs_in->arr[ii], mode64); } if (reloaded) { /* Update info about the insn, so it looks as if it had been in this form all along. */ instrs_in->arr[ii] = reloaded; (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); if (debug_direct_reload && !reloaded) { vex_printf(" --> "); ppInstr(reloaded, mode64); } } if (debug_direct_reload && !reloaded) vex_printf("\n"); } } /* ------------ END directReload optimisation ------------ */ /* for each virtual reg mentioned in the insn ... */ for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { HReg vreg = reg_usage_arr[ii].vRegs[j]; vassert(hregIsVirtual(vreg)); if (0) { vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); } /* Now we're trying to find a rreg for "vreg". First of all, if it already has an rreg assigned, we don't need to do anything more. Inspect the current state to find out. */ Int m = hregIndex(vreg); vassert(IS_VALID_VREGNO(m)); Int n = vreg_state[m]; if (IS_VALID_RREGNO(n)) { vassert(rreg_state[n].disp == Bound); addToHRegRemap(&remap, vreg, univ->regs[n]); /* If this rreg is written or modified, mark it as different from any spill slot value. */ if (reg_usage_arr[ii].vMode[j] != HRmRead) rreg_state[n].eq_spill_slot = False; continue; } else { vassert(n == INVALID_RREG_NO); } /* No luck. The next thing to do is see if there is a currently free rreg available, of the correct class. If so, bag it. NOTE, we could improve this by selecting an rreg for which the next live-range event is as far ahead as possible. */ Int k_suboptimal = -1; Int k; for (k = 0; k < n_rregs; k++) { if (rreg_state[k].disp != Free || hregClass(univ->regs[k]) != hregClass(vreg)) continue; if (rreg_state[k].has_hlrs) { /* Well, at least we can use k_suboptimal if we really have to. Keep on looking for a better candidate. */ k_suboptimal = k; } else { /* Found a preferable reg. Use it. */ k_suboptimal = -1; break; } } if (k_suboptimal >= 0) k = k_suboptimal; if (k < n_rregs) { rreg_state[k].disp = Bound; rreg_state[k].vreg = vreg; Int p = hregIndex(vreg); vassert(IS_VALID_VREGNO(p)); vreg_state[p] = toShort(k); addToHRegRemap(&remap, vreg, univ->regs[k]); /* Generate a reload if needed. This only creates needed reloads because the live range builder for vregs will guarantee that the first event for a vreg is a write. Hence, if this reference is not a write, it cannot be the first reference for this vreg, and so a reload is indeed needed. */ if (reg_usage_arr[ii].vMode[j] != HRmWrite) { vassert(vreg_lrs[p].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; (*genReload)( &reload1, &reload2, univ->regs[k], vreg_lrs[p].spill_offset, mode64 ); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); if (reload2) EMIT_INSTR(reload2); /* This rreg is read or modified by the instruction. If it's merely read we can claim it now equals the spill slot, but not so if it is modified. */ if (reg_usage_arr[ii].vMode[j] == HRmRead) { rreg_state[k].eq_spill_slot = True; } else { vassert(reg_usage_arr[ii].vMode[j] == HRmModify); rreg_state[k].eq_spill_slot = False; } } else { rreg_state[k].eq_spill_slot = False; } continue; } /* Well, now we have no option but to spill a vreg. It's important to make a good choice of vreg to spill, and of course we need to be careful not to spill a vreg which is needed by this insn. */ /* First, mark in the rreg_state, those rregs which are not spill candidates, due to holding a vreg mentioned by this instruction. Or being of the wrong class. */ for (k = 0; k < n_rregs; k++) { rreg_state[k].is_spill_cand = False; if (rreg_state[k].disp != Bound) continue; if (hregClass(univ->regs[k]) != hregClass(vreg)) continue; rreg_state[k].is_spill_cand = True; /* Note, the following loop visits only the virtual regs mentioned by the instruction. */ for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) { if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) { rreg_state[k].is_spill_cand = False; break; } } } /* We can choose to spill any rreg satisfying rreg_state[r].is_spill_cand (so to speak). Choose r so that the next use of its associated vreg is as far ahead as possible, in the hope that this will minimise the number of consequent reloads required. */ Int spillee = findMostDistantlyMentionedVReg ( reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs ); if (spillee == -1) { /* Hmmmmm. There don't appear to be any spill candidates. We're hosed. */ vex_printf("reg_alloc: can't find a register in class: "); ppHRegClass(hregClass(vreg)); vex_printf("\n"); vpanic("reg_alloc: can't create a free register."); } /* Right. So we're going to spill rreg_state[spillee]. */ vassert(IS_VALID_RREGNO(spillee)); vassert(rreg_state[spillee].disp == Bound); /* check it's the right class */ vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); /* check we're not ejecting the vreg for which we are trying to free up a register. */ vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); m = hregIndex(rreg_state[spillee].vreg); vassert(IS_VALID_VREGNO(m)); /* So here's the spill store. Assert that we're spilling a live vreg. */ vassert(vreg_lrs[m].dead_before > ii); vassert(vreg_lrs[m].reg_class != HRcINVALID); if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; (*genSpill)( &spill1, &spill2, univ->regs[spillee], vreg_lrs[m].spill_offset, mode64 ); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); if (spill2) EMIT_INSTR(spill2); } /* Update the rreg_state to reflect the new assignment for this rreg. */ rreg_state[spillee].vreg = vreg; vreg_state[m] = INVALID_RREG_NO; rreg_state[spillee].eq_spill_slot = False; /* be safe */ m = hregIndex(vreg); vassert(IS_VALID_VREGNO(m)); vreg_state[m] = toShort(spillee); /* Now, if this vreg is being read or modified (as opposed to written), we have to generate a reload for it. */ if (reg_usage_arr[ii].vMode[j] != HRmWrite) { vassert(vreg_lrs[m].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; (*genReload)( &reload1, &reload2, univ->regs[spillee], vreg_lrs[m].spill_offset, mode64 ); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); if (reload2) EMIT_INSTR(reload2); /* This rreg is read or modified by the instruction. If it's merely read we can claim it now equals the spill slot, but not so if it is modified. */ if (reg_usage_arr[ii].vMode[j] == HRmRead) { rreg_state[spillee].eq_spill_slot = True; } else { vassert(reg_usage_arr[ii].vMode[j] == HRmModify); rreg_state[spillee].eq_spill_slot = False; } } /* So after much twisting and turning, we have vreg mapped to rreg_state[spillee].rreg. Note that in the map. */ addToHRegRemap(&remap, vreg, univ->regs[spillee]); } /* iterate over virtual registers in this instruction. */ /* We've finished clowning around with registers in this instruction. Three results: - the running rreg_state[] has been updated - a suitable vreg->rreg mapping for this instruction has been constructed - spill and reload instructions may have been emitted. The final step is to apply the mapping to the instruction, and emit that. */ /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); EMIT_INSTR( instrs_in->arr[ii] ); if (DEBUG_REGALLOC) { vex_printf("After dealing with current insn:\n"); PRINT_STATE; vex_printf("\n"); } /* ------ Post-instruction actions for fixed rreg uses ------ */ /* Now we need to check for rregs exiting fixed live ranges after this instruction, and if so mark them as free. */ while (True) { vassert(rreg_lrs_db_next >= 0); vassert(rreg_lrs_db_next <= rreg_lrs_used); if (rreg_lrs_db_next == rreg_lrs_used) break; /* no more real reg live ranges to consider */ if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) break; /* next live range does not yet start */ vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live range. Mark it as such in the main rreg_state array. */ HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg; vassert(!hregIsVirtual(reg)); Int k = hregIndex(reg); vassert(IS_VALID_RREGNO(k)); vassert(rreg_state[k].disp == Unavail); rreg_state[k].disp = Free; rreg_state[k].vreg = INVALID_HREG; rreg_state[k].eq_spill_slot = False; /* check for further rregs leaving HLRs at this point */ rreg_lrs_db_next++; } if (DEBUG_REGALLOC) { vex_printf("After post-insn actions for fixed regs:\n"); PRINT_STATE; vex_printf("\n"); } } /* iterate over insns */ /* ------ END: Process each insn in turn. ------ */ /* free(rreg_state); */ /* free(rreg_lrs); */ /* if (vreg_lrs) free(vreg_lrs); */ /* Paranoia */ vassert(rreg_lrs_la_next == rreg_lrs_used); vassert(rreg_lrs_db_next == rreg_lrs_used); return instrs_out; # undef INVALID_INSTRNO # undef EMIT_INSTR # undef PRINT_STATE } /*---------------------------------------------------------------*/ /*--- host_reg_alloc2.c ---*/ /*---------------------------------------------------------------*/