//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// Copies from VGPR to SGPR registers are illegal and the register coalescer /// will sometimes generate these illegal copies in situations like this: /// /// Register Class <vsrc> is the union of <vgpr> and <sgpr> /// /// BB0: /// %vreg0 <sgpr> = SCALAR_INST /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> /// ... /// BRANCH %cond BB1, BB2 /// BB1: /// %vreg2 <vgpr> = VECTOR_INST /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> /// BB2: /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> /// /// /// The coalescer will begin at BB0 and eliminate its copy, then the resulting /// code will look like this: /// /// BB0: /// %vreg0 <sgpr> = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: /// %vreg2 <vgpr> = VECTOR_INST /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> /// BB2: /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> /// /// Now that the result of the PHI instruction is an SGPR, the register /// allocator is now forced to constrain the register class of %vreg3 to /// <sgpr> so we end up with final code like this: /// /// BB0: /// %vreg0 <sgpr> = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: /// %vreg2 <vgpr> = VECTOR_INST /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> /// BB2: /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> /// /// Now this code contains an illegal copy from a VGPR to an SGPR. /// /// In order to avoid this problem, this pass searches for PHI instructions /// which define a <vsrc> register and constrains its definition class to /// <vgpr> if the user of the PHI's definition register is a vector instruction. /// If the PHI's definition class is constrained to <vgpr> then the coalescer /// will be unable to perform the COPY removal from the above example which /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; namespace { class SIFixSGPRCopies : public MachineFunctionPass { private: static char ID; const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI, const MachineRegisterInfo &MRI, unsigned Reg) const; public: SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } virtual bool runOnMachineFunction(MachineFunction &MF); const char *getPassName() const { return "SI Fix SGPR copies"; } }; } // End anonymous namespace char SIFixSGPRCopies::ID = 0; FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { return new SIFixSGPRCopies(tm); } /// This functions walks the use/def chains starting with the definition of /// \p Reg until it finds an Instruction that isn't a COPY returns /// the register class of that instruction. const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( const TargetRegisterInfo *TRI, const MachineRegisterInfo &MRI, unsigned Reg) const { // The Reg parameter to the function must always be defined by either a PHI // or a COPY, therefore it cannot be a physical register. assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Reg cannot be a physical register"); const TargetRegisterClass *RC = MRI.getRegClass(Reg); for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), E = MRI.use_end(); I != E; ++I) { switch (I->getOpcode()) { case AMDGPU::COPY: RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI, I->getOperand(0).getReg())); break; } } return RC; } bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; if (MI.getOpcode() != AMDGPU::PHI) { continue; } unsigned Reg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg); if (RC == &AMDGPU::VSrc_32RegClass) { MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); } } } return false; }