//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains a pass that fixes AArch64 branches which have ended up out // of range for their immediate operands. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "aarch64-branch-fixup" #include "AArch64.h" #include "AArch64InstrInfo.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); /// Return the worst case padding that could result from unknown offset bits. /// This does not include alignment padding caused by known offset bits. /// /// @param LogAlign log2(alignment) /// @param KnownBits Number of known low offset bits. static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { if (KnownBits < LogAlign) return (1u << LogAlign) - (1u << KnownBits); return 0; } namespace { /// Due to limited PC-relative displacements, conditional branches to distant /// blocks may need converting into an unconditional equivalent. For example: /// tbz w1, #0, far_away /// becomes /// tbnz w1, #0, skip /// b far_away /// skip: class AArch64BranchFixup : public MachineFunctionPass { /// Information about the offset and size of a single basic block. struct BasicBlockInfo { /// Distance from the beginning of the function to the beginning of this /// basic block. /// /// Offsets are computed assuming worst case padding before an aligned /// block. This means that subtracting basic block offsets always gives a /// conservative estimate of the real distance which may be smaller. /// /// Because worst case padding is used, the computed offset of an aligned /// block may not actually be aligned. unsigned Offset; /// Size of the basic block in bytes. If the block contains inline /// assembly, this is a worst case estimate. /// /// The size does not include any alignment padding whether from the /// beginning of the block, or from an aligned jump table at the end. unsigned Size; /// The number of low bits in Offset that are known to be exact. The /// remaining bits of Offset are an upper bound. uint8_t KnownBits; /// When non-zero, the block contains instructions (inline asm) of unknown /// size. The real size may be smaller than Size bytes by a multiple of 1 /// << Unalign. uint8_t Unalign; BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} /// Compute the number of known offset bits internally to this block. /// This number should be used to predict worst case padding when /// splitting the block. unsigned internalKnownBits() const { unsigned Bits = Unalign ? Unalign : KnownBits; // If the block size isn't a multiple of the known bits, assume the // worst case padding. if (Size & ((1u << Bits) - 1)) Bits = countTrailingZeros(Size); return Bits; } /// Compute the offset immediately following this block. If LogAlign is /// specified, return the offset the successor block will get if it has /// this alignment. unsigned postOffset(unsigned LogAlign = 0) const { unsigned PO = Offset + Size; if (!LogAlign) return PO; // Add alignment padding from the terminator. return PO + UnknownPadding(LogAlign, internalKnownBits()); } /// Compute the number of known low bits of postOffset. If this block /// contains inline asm, the number of known bits drops to the /// instruction alignment. An aligned terminator may increase the number /// of know bits. /// If LogAlign is given, also consider the alignment of the next block. unsigned postKnownBits(unsigned LogAlign = 0) const { return std::max(LogAlign, internalKnownBits()); } }; std::vector<BasicBlockInfo> BBInfo; /// One per immediate branch, keeping the machine instruction pointer, /// conditional or unconditional, the max displacement, and (if IsCond is /// true) the corresponding inverted branch opcode. struct ImmBranch { MachineInstr *MI; unsigned OffsetBits : 31; bool IsCond : 1; ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} }; /// Keep track of all the immediate branch instructions. /// std::vector<ImmBranch> ImmBranches; MachineFunction *MF; const AArch64InstrInfo *TII; public: static char ID; AArch64BranchFixup() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "AArch64 branch fixup pass"; } private: void initializeFunctionInfo(); MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); void adjustBBOffsetsAfter(MachineBasicBlock *BB); bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned OffsetBits); bool fixupImmediateBr(ImmBranch &Br); bool fixupConditionalBr(ImmBranch &Br); void computeBlockSize(MachineBasicBlock *MBB); unsigned getOffsetOf(MachineInstr *MI) const; void dumpBBs(); void verify(); }; char AArch64BranchFixup::ID = 0; } /// check BBOffsets void AArch64BranchFixup::verify() { #ifndef NDEBUG for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; unsigned MBBId = MBB->getNumber(); assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } #endif } /// print block size and offset information - debugging void AArch64BranchFixup::dumpBBs() { DEBUG({ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; dbgs() << format("%08x BB#%u\t", BBI.Offset, J) << " kb=" << unsigned(BBI.KnownBits) << " ua=" << unsigned(BBI.Unalign) << format(" size=%#x\n", BBInfo[J].Size); } }); } /// Returns an instance of the branch fixup pass. FunctionPass *llvm::createAArch64BranchFixupPass() { return new AArch64BranchFixup(); } bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { MF = &mf; DEBUG(dbgs() << "***** AArch64BranchFixup ******"); TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); // This pass invalidates liveness information when it splits basic blocks. MF->getRegInfo().invalidateLiveness(); // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); // Do the initial scan of the function, building up information about the // sizes of each block and location of each immediate branch. initializeFunctionInfo(); // Iteratively fix up branches until there is no change. unsigned NoBRIters = 0; bool MadeChange = false; while (true) { DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) BRChange |= fixupImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); if (!BRChange) break; MadeChange = true; } // After a while, this might be made debug-only, but it is not expensive. verify(); DEBUG(dbgs() << '\n'; dumpBBs()); BBInfo.clear(); ImmBranches.clear(); return MadeChange; } /// Return true if the specified basic block can fallthrough into the block /// immediately after it. static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; // Can't fall off end of function. if (llvm::next(MBBI) == MBB->getParent()->end()) return false; MachineBasicBlock *NextBB = llvm::next(MBBI); for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I == NextBB) return true; return false; } /// Do the initial scan of the function, building up information about the sizes /// of each block, and each immediate branch. void AArch64BranchFixup::initializeFunctionInfo() { BBInfo.clear(); BBInfo.resize(MF->getNumBlockIDs()); // First thing, compute the size of all basic blocks, and see if the function // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) computeBlockSize(I); // The known bits of the entry block offset are determined by the function // alignment. BBInfo.front().KnownBits = MF->getAlignment(); // Compute block offsets and known bits. adjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; int Opc = I->getOpcode(); if (I->isBranch()) { bool IsCond = false; // The offsets encoded in instructions here scale by the instruction // size (4 bytes), effectively increasing their range by 2 bits. unsigned Bits = 0; switch (Opc) { default: continue; // Ignore other JT branches case AArch64::TBZxii: case AArch64::TBZwii: case AArch64::TBNZxii: case AArch64::TBNZwii: IsCond = true; Bits = 14 + 2; break; case AArch64::Bcc: case AArch64::CBZx: case AArch64::CBZw: case AArch64::CBNZx: case AArch64::CBNZw: IsCond = true; Bits = 19 + 2; break; case AArch64::Bimm: Bits = 26 + 2; break; } // Record this immediate branch. ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); } } } } /// Compute the size and some alignment information for MBB. This function /// updates BBInfo directly. void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; BBI.Size = 0; BBI.Unalign = 0; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { BBI.Size += TII->getInstSizeInBytes(*I); // For inline asm, GetInstSizeInBytes returns a conservative estimate. // The actual size may be smaller, but still a multiple of the instr size. if (I->isInlineAsm()) BBI.Unalign = 2; } } /// Return the current offset of the specified machine instruction from the /// start of the function. This offset changes as stuff is moved around inside /// the function. unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); // The offset is composed of two things: the sum of the sizes of all MBB's // before this instruction's block, and the offset from the start of the block // it is in. unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); Offset += TII->getInstSizeInBytes(*I); } return Offset; } /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. MachineBasicBlock * AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MachineFunction::iterator MBBI = OrigBB; ++MBBI; MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); // Add an unconditional branch from OrigBB to NewBB. // Note the new unconditional branch is not being recorded. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. NewBB->transferSuccessors(OrigBB); // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); // Update internal data structures to account for the newly inserted MBB. MF->RenumberBlocks(NewBB); // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) computeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. computeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. adjustBBOffsetsAfter(OrigBB); return NewBB; } void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { unsigned BBNum = BB->getNumber(); for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); // This is where block i begins. Stop if the offset is already correct, // and we have updated 2 blocks. This is the maximum number of blocks // changed before calling this function. if (i > BBNum + 2 && BBInfo[i].Offset == Offset && BBInfo[i].KnownBits == KnownBits) break; BBInfo[i].Offset = Offset; BBInfo[i].KnownBits = KnownBits; } } /// Returns true if the distance between specific MI and specific BB can fit in /// MI's displacement field. bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, MachineBasicBlock *DestBB, unsigned OffsetBits) { int64_t BrOffset = getOffsetOf(MI); int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " bits available=" << OffsetBits << " from " << getOffsetOf(MI) << " to " << DestOffset << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); return isIntN(OffsetBits, DestOffset - BrOffset); } /// Fix up an immediate branch whose destination is too far away to fit in its /// displacement field. bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isMBB()) { DestBB = MI->getOperand(i).getMBB(); break; } } assert(DestBB && "Branch with no destination BB?"); // Check to see if the DestBB is already in-range. if (isBBInRange(MI, DestBB, Br.OffsetBits)) return false; assert(Br.IsCond && "Only conditional branches should need fixup"); return fixupConditionalBr(Br); } /// Fix up a conditional branch whose destination is too far away to fit in its /// displacement field. It is converted to an inverse conditional branch + an /// unconditional branch to the destination. bool AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); unsigned CondBrMBBOperand = 0; // The general idea is to add an unconditional branch to the destination and // invert the conditional branch to jump over it. Complications occur around // fallthrough and unreachable ends to the block. // b.lt L1 // => // b.ge L2 // b L1 // L2: // First we invert the conditional branch, by creating a replacement if // necessary. This if statement contains all the special handling of different // branch types. if (MI->getOpcode() == AArch64::Bcc) { // The basic block is operand number 1 for Bcc CondBrMBBOperand = 1; A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); CC = A64InvertCondCode(CC); MI->getOperand(0).setImm(CC); } else { MachineInstrBuilder InvertedMI; int InvertedOpcode; switch (MI->getOpcode()) { default: llvm_unreachable("Unknown branch type"); case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; } InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { InvertedMI.addOperand(MI->getOperand(i)); if (MI->getOperand(i).isMBB()) CondBrMBBOperand = i; } MI->eraseFromParent(); MI = Br.MI = InvertedMI; } // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through // block. Otherwise, split the MBB before the next instruction. MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); ++NumCBrFixed; if (BMI != MI) { if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && BMI->getOpcode() == AArch64::Bimm) { // Last MI in the BB is an unconditional branch. We can swap destinations: // b.eq L1 (temporarily b.ne L1 after first change) // b L2 // => // b.ne L2 // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); if (isBBInRange(MI, NewDest, Br.OffsetBits)) { DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); BMI->getOperand(0).setMBB(DestBB); MI->getOperand(CondBrMBBOperand).setMBB(NewDest); return true; } } } if (NeedSplit) { MachineBasicBlock::iterator MBBI = MI; ++MBBI; splitBlockBeforeInstr(MBBI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->getInstSizeInBytes(MBB->back()); BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } // After splitting and removing the unconditional branch from the original BB, // the structure is now: // oldbb: // [things] // b.invertedCC L1 // splitbb/fallthroughbb: // [old b L2/real continuation] // // We now have to change the conditional branch to point to splitbb and add an // unconditional branch after it to L1, giving the final structure: // oldbb: // [things] // b.invertedCC splitbb // b L1 // splitbb/fallthroughbb: // [old b L2/real continuation] MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() << " also invert condition and change dest. to BB#" << NextBB->getNumber() << "\n"); // Insert a new unconditional branch and fixup the destination of the // conditional one. Also update the ImmBranch as well as adding a new entry // for the new branch. BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); MI->getOperand(CondBrMBBOperand).setMBB(NextBB); BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); // 26 bits written down in Bimm, specifying a multiple of 4. unsigned OffsetBits = 26 + 2; ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); adjustBBOffsetsAfter(MBB); return true; }