//== llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector -*- C++ -*-==//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file This file describes the interface of the MachineFunctionPass
/// responsible for assigning the generic virtual registers to register bank.

/// By default, the reg bank selector relies on local decisions to
/// assign the register bank. In other words, it looks at one instruction
/// at a time to decide where the operand of that instruction should live.
///
/// At higher optimization level, we could imagine that the reg bank selector
/// would use more global analysis and do crazier thing like duplicating
/// instructions and so on. This is future work.
///
/// For now, the pass uses a greedy algorithm to decide where the operand
/// of an instruction should live. It asks the target which banks may be
/// used for each operand of the instruction and what is the cost. Then,
/// it chooses the solution which minimize the cost of the instruction plus
/// the cost of any move that may be needed to to the values into the right
/// register bank.
/// In other words, the cost for an instruction on a register bank RegBank
/// is: Cost of I on RegBank plus the sum of the cost for bringing the
/// input operands from their current register bank to RegBank.
/// Thus, the following formula:
/// cost(I, RegBank) = cost(I.Opcode, RegBank) +
///    sum(for each arg in I.arguments: costCrossCopy(arg.RegBank, RegBank))
///
/// E.g., Let say we are assigning the register bank for the instruction
/// defining v2.
/// v0(A_REGBANK) = ...
/// v1(A_REGBANK) = ...
/// v2 = G_ADD i32 v0, v1 <-- MI
///
/// The target may say it can generate G_ADD i32 on register bank A and B
/// with a cost of respectively 5 and 1.
/// Then, let say the cost of a cross register bank copies from A to B is 1.
/// The reg bank selector would compare the following two costs:
/// cost(MI, A_REGBANK) = cost(G_ADD, A_REGBANK) + cost(v0.RegBank, A_REGBANK) +
///    cost(v1.RegBank, A_REGBANK)
///                     = 5 + cost(A_REGBANK, A_REGBANK) + cost(A_REGBANK,
///                                                             A_REGBANK)
///                     = 5 + 0 + 0 = 5
/// cost(MI, B_REGBANK) = cost(G_ADD, B_REGBANK) + cost(v0.RegBank, B_REGBANK) +
///    cost(v1.RegBank, B_REGBANK)
///                     = 1 + cost(A_REGBANK, B_REGBANK) + cost(A_REGBANK,
///                                                             B_REGBANK)
///                     = 1 + 1 + 1 = 3
/// Therefore, in this specific example, the reg bank selector would choose
/// bank B for MI.
/// v0(A_REGBANK) = ...
/// v1(A_REGBANK) = ...
/// tmp0(B_REGBANK) = COPY v0
/// tmp1(B_REGBANK) = COPY v1
/// v2(B_REGBANK) = G_ADD i32 tmp0, tmp1
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H
#define LLVM_CODEGEN_GLOBALISEL_REGBANKSELECT_H

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"

namespace llvm {
// Forward declarations.
class BlockFrequency;
class MachineBranchProbabilityInfo;
class MachineBlockFrequencyInfo;
class MachineRegisterInfo;
class TargetPassConfig;
class TargetRegisterInfo;
class raw_ostream;

/// This pass implements the reg bank selector pass used in the GlobalISel
/// pipeline. At the end of this pass, all register operands have been assigned
class RegBankSelect : public MachineFunctionPass {
public:
  static char ID;

  /// List of the modes supported by the RegBankSelect pass.
  enum Mode {
    /// Assign the register banks as fast as possible (default).
    Fast,
    /// Greedily minimize the cost of assigning register banks.
    /// This should produce code of greater quality, but will
    /// require more compile time.
    Greedy
  };

  /// Abstract class used to represent an insertion point in a CFG.
  /// This class records an insertion point and materializes it on
  /// demand.
  /// It allows to reason about the frequency of this insertion point,
  /// without having to logically materialize it (e.g., on an edge),
  /// before we actually need to insert something.
  class InsertPoint {
  protected:
    /// Tell if the insert point has already been materialized.
    bool WasMaterialized = false;
    /// Materialize the insertion point.
    ///
    /// If isSplit() is true, this involves actually splitting
    /// the block or edge.
    ///
    /// \post getPointImpl() returns a valid iterator.
    /// \post getInsertMBBImpl() returns a valid basic block.
    /// \post isSplit() == false ; no more splitting should be required.
    virtual void materialize() = 0;

    /// Return the materialized insertion basic block.
    /// Code will be inserted into that basic block.
    ///
    /// \pre ::materialize has been called.
    virtual MachineBasicBlock &getInsertMBBImpl() = 0;

    /// Return the materialized insertion point.
    /// Code will be inserted before that point.
    ///
    /// \pre ::materialize has been called.
    virtual MachineBasicBlock::iterator getPointImpl() = 0;

  public:
    virtual ~InsertPoint() {}

    /// The first call to this method will cause the splitting to
    /// happen if need be, then sub sequent calls just return
    /// the iterator to that point. I.e., no more splitting will
    /// occur.
    ///
    /// \return The iterator that should be used with
    /// MachineBasicBlock::insert. I.e., additional code happens
    /// before that point.
    MachineBasicBlock::iterator getPoint() {
      if (!WasMaterialized) {
        WasMaterialized = true;
        assert(canMaterialize() && "Impossible to materialize this point");
        materialize();
      }
      // When we materialized the point we should have done the splitting.
      assert(!isSplit() && "Wrong pre-condition");
      return getPointImpl();
    }

    /// The first call to this method will cause the splitting to
    /// happen if need be, then sub sequent calls just return
    /// the basic block that contains the insertion point.
    /// I.e., no more splitting will occur.
    ///
    /// \return The basic block should be used with
    /// MachineBasicBlock::insert and ::getPoint. The new code should
    /// happen before that point.
    MachineBasicBlock &getInsertMBB() {
      if (!WasMaterialized) {
        WasMaterialized = true;
        assert(canMaterialize() && "Impossible to materialize this point");
        materialize();
      }
      // When we materialized the point we should have done the splitting.
      assert(!isSplit() && "Wrong pre-condition");
      return getInsertMBBImpl();
    }

    /// Insert \p MI in the just before ::getPoint()
    MachineBasicBlock::iterator insert(MachineInstr &MI) {
      return getInsertMBB().insert(getPoint(), &MI);
    }

    /// Does this point involve splitting an edge or block?
    /// As soon as ::getPoint is called and thus, the point
    /// materialized, the point will not require splitting anymore,
    /// i.e., this will return false.
    virtual bool isSplit() const { return false; }

    /// Frequency of the insertion point.
    /// \p P is used to access the various analysis that will help to
    /// get that information, like MachineBlockFrequencyInfo.  If \p P
    /// does not contain enough enough to return the actual frequency,
    /// this returns 1.
    virtual uint64_t frequency(const Pass &P) const { return 1; }

    /// Check whether this insertion point can be materialized.
    /// As soon as ::getPoint is called and thus, the point materialized
    /// calling this method does not make sense.
    virtual bool canMaterialize() const { return false; }
  };

  /// Insertion point before or after an instruction.
  class InstrInsertPoint : public InsertPoint {
  private:
    /// Insertion point.
    MachineInstr &Instr;
    /// Does the insertion point is before or after Instr.
    bool Before;

    void materialize() override;

    MachineBasicBlock::iterator getPointImpl() override {
      if (Before)
        return Instr;
      return Instr.getNextNode() ? *Instr.getNextNode()
                                 : Instr.getParent()->end();
    }

    MachineBasicBlock &getInsertMBBImpl() override {
      return *Instr.getParent();
    }

  public:
    /// Create an insertion point before (\p Before=true) or after \p Instr.
    InstrInsertPoint(MachineInstr &Instr, bool Before = true);
    bool isSplit() const override;
    uint64_t frequency(const Pass &P) const override;

    // Worst case, we need to slice the basic block, but that is still doable.
    bool canMaterialize() const override { return true; }
  };

  /// Insertion point at the beginning or end of a basic block.
  class MBBInsertPoint : public InsertPoint {
  private:
    /// Insertion point.
    MachineBasicBlock &MBB;
    /// Does the insertion point is at the beginning or end of MBB.
    bool Beginning;

    void materialize() override { /*Nothing to do to materialize*/
    }

    MachineBasicBlock::iterator getPointImpl() override {
      return Beginning ? MBB.begin() : MBB.end();
    }

    MachineBasicBlock &getInsertMBBImpl() override { return MBB; }

  public:
    MBBInsertPoint(MachineBasicBlock &MBB, bool Beginning = true)
        : InsertPoint(), MBB(MBB), Beginning(Beginning) {
      // If we try to insert before phis, we should use the insertion
      // points on the incoming edges.
      assert((!Beginning || MBB.getFirstNonPHI() == MBB.begin()) &&
             "Invalid beginning point");
      // If we try to insert after the terminators, we should use the
      // points on the outcoming edges.
      assert((Beginning || MBB.getFirstTerminator() == MBB.end()) &&
             "Invalid end point");
    }
    bool isSplit() const override { return false; }
    uint64_t frequency(const Pass &P) const override;
    bool canMaterialize() const override { return true; };
  };

  /// Insertion point on an edge.
  class EdgeInsertPoint : public InsertPoint {
  private:
    /// Source of the edge.
    MachineBasicBlock &Src;
    /// Destination of the edge.
    /// After the materialization is done, this hold the basic block
    /// that resulted from the splitting.
    MachineBasicBlock *DstOrSplit;
    /// P is used to update the analysis passes as applicable.
    Pass &P;

    void materialize() override;

    MachineBasicBlock::iterator getPointImpl() override {
      // DstOrSplit should be the Split block at this point.
      // I.e., it should have one predecessor, Src, and one successor,
      // the original Dst.
      assert(DstOrSplit && DstOrSplit->isPredecessor(&Src) &&
             DstOrSplit->pred_size() == 1 && DstOrSplit->succ_size() == 1 &&
             "Did not split?!");
      return DstOrSplit->begin();
    }

    MachineBasicBlock &getInsertMBBImpl() override { return *DstOrSplit; }

  public:
    EdgeInsertPoint(MachineBasicBlock &Src, MachineBasicBlock &Dst, Pass &P)
        : InsertPoint(), Src(Src), DstOrSplit(&Dst), P(P) {}
    bool isSplit() const override {
      return Src.succ_size() > 1 && DstOrSplit->pred_size() > 1;
    }
    uint64_t frequency(const Pass &P) const override;
    bool canMaterialize() const override;
  };

  /// Struct used to represent the placement of a repairing point for
  /// a given operand.
  class RepairingPlacement {
  public:
    /// Define the kind of action this repairing needs.
    enum RepairingKind {
      /// Nothing to repair, just drop this action.
      None,
      /// Reparing code needs to happen before InsertPoints.
      Insert,
      /// (Re)assign the register bank of the operand.
      Reassign,
      /// Mark this repairing placement as impossible.
      Impossible
    };

    /// \name Convenient types for a list of insertion points.
    /// @{
    typedef SmallVector<std::unique_ptr<InsertPoint>, 2> InsertionPoints;
    typedef InsertionPoints::iterator insertpt_iterator;
    typedef InsertionPoints::const_iterator const_insertpt_iterator;
    /// @}

  private:
    /// Kind of repairing.
    RepairingKind Kind;
    /// Index of the operand that will be repaired.
    unsigned OpIdx;
    /// Are all the insert points materializeable?
    bool CanMaterialize;
    /// Is there any of the insert points needing splitting?
    bool HasSplit;
    /// Insertion point for the repair code.
    /// The repairing code needs to happen just before these points.
    InsertionPoints InsertPoints;
    /// Some insertion points may need to update the liveness and such.
    Pass &P;

  public:
    /// Create a repairing placement for the \p OpIdx-th operand of
    /// \p MI. \p TRI is used to make some checks on the register aliases
    /// if the machine operand is a physical register. \p P is used to
    /// to update liveness information and such when materializing the
    /// points.
    RepairingPlacement(MachineInstr &MI, unsigned OpIdx,
                       const TargetRegisterInfo &TRI, Pass &P,
                       RepairingKind Kind = RepairingKind::Insert);

    /// \name Getters.
    /// @{
    RepairingKind getKind() const { return Kind; }
    unsigned getOpIdx() const { return OpIdx; }
    bool canMaterialize() const { return CanMaterialize; }
    bool hasSplit() { return HasSplit; }
    /// @}

    /// \name Overloaded methods to add an insertion point.
    /// @{
    /// Add a MBBInsertionPoint to the list of InsertPoints.
    void addInsertPoint(MachineBasicBlock &MBB, bool Beginning);
    /// Add a InstrInsertionPoint to the list of InsertPoints.
    void addInsertPoint(MachineInstr &MI, bool Before);
    /// Add an EdgeInsertionPoint (\p Src, \p Dst) to the list of InsertPoints.
    void addInsertPoint(MachineBasicBlock &Src, MachineBasicBlock &Dst);
    /// Add an InsertPoint to the list of insert points.
    /// This method takes the ownership of &\p Point.
    void addInsertPoint(InsertPoint &Point);
    /// @}

    /// \name Accessors related to the insertion points.
    /// @{
    insertpt_iterator begin() { return InsertPoints.begin(); }
    insertpt_iterator end() { return InsertPoints.end(); }

    const_insertpt_iterator begin() const { return InsertPoints.begin(); }
    const_insertpt_iterator end() const { return InsertPoints.end(); }

    unsigned getNumInsertPoints() const { return InsertPoints.size(); }
    /// @}

    /// Change the type of this repairing placement to \p NewKind.
    /// It is not possible to switch a repairing placement to the
    /// RepairingKind::Insert. There is no fundamental problem with
    /// that, but no uses as well, so do not support it for now.
    ///
    /// \pre NewKind != RepairingKind::Insert
    /// \post getKind() == NewKind
    void switchTo(RepairingKind NewKind) {
      assert(NewKind != Kind && "Already of the right Kind");
      Kind = NewKind;
      InsertPoints.clear();
      CanMaterialize = NewKind != RepairingKind::Impossible;
      HasSplit = false;
      assert(NewKind != RepairingKind::Insert &&
             "We would need more MI to switch to Insert");
    }
  };

private:
  /// Helper class used to represent the cost for mapping an instruction.
  /// When mapping an instruction, we may introduce some repairing code.
  /// In most cases, the repairing code is local to the instruction,
  /// thus, we can omit the basic block frequency from the cost.
  /// However, some alternatives may produce non-local cost, e.g., when
  /// repairing a phi, and thus we then need to scale the local cost
  /// to the non-local cost. This class does this for us.
  /// \note: We could simply always scale the cost. The problem is that
  /// there are higher chances that we saturate the cost easier and end
  /// up having the same cost for actually different alternatives.
  /// Another option would be to use APInt everywhere.
  class MappingCost {
  private:
    /// Cost of the local instructions.
    /// This cost is free of basic block frequency.
    uint64_t LocalCost;
    /// Cost of the non-local instructions.
    /// This cost should include the frequency of the related blocks.
    uint64_t NonLocalCost;
    /// Frequency of the block where the local instructions live.
    uint64_t LocalFreq;

    MappingCost(uint64_t LocalCost, uint64_t NonLocalCost, uint64_t LocalFreq)
        : LocalCost(LocalCost), NonLocalCost(NonLocalCost),
          LocalFreq(LocalFreq) {}

    /// Check if this cost is saturated.
    bool isSaturated() const;

  public:
    /// Create a MappingCost assuming that most of the instructions
    /// will occur in a basic block with \p LocalFreq frequency.
    MappingCost(const BlockFrequency &LocalFreq);

    /// Add \p Cost to the local cost.
    /// \return true if this cost is saturated, false otherwise.
    bool addLocalCost(uint64_t Cost);

    /// Add \p Cost to the non-local cost.
    /// Non-local cost should reflect the frequency of their placement.
    /// \return true if this cost is saturated, false otherwise.
    bool addNonLocalCost(uint64_t Cost);

    /// Saturate the cost to the maximal representable value.
    void saturate();

    /// Return an instance of MappingCost that represents an
    /// impossible mapping.
    static MappingCost ImpossibleCost();

    /// Check if this is less than \p Cost.
    bool operator<(const MappingCost &Cost) const;
    /// Check if this is equal to \p Cost.
    bool operator==(const MappingCost &Cost) const;
    /// Check if this is not equal to \p Cost.
    bool operator!=(const MappingCost &Cost) const { return !(*this == Cost); }
    /// Check if this is greater than \p Cost.
    bool operator>(const MappingCost &Cost) const {
      return *this != Cost && Cost < *this;
    }

    /// Print this on dbgs() stream.
    void dump() const;

    /// Print this on \p OS;
    void print(raw_ostream &OS) const;

    /// Overload the stream operator for easy debug printing.
    friend raw_ostream &operator<<(raw_ostream &OS, const MappingCost &Cost) {
      Cost.print(OS);
      return OS;
    }
  };

  /// Interface to the target lowering info related
  /// to register banks.
  const RegisterBankInfo *RBI;

  /// MRI contains all the register class/bank information that this
  /// pass uses and updates.
  MachineRegisterInfo *MRI;

  /// Information on the register classes for the current function.
  const TargetRegisterInfo *TRI;

  /// Get the frequency of blocks.
  /// This is required for non-fast mode.
  MachineBlockFrequencyInfo *MBFI;

  /// Get the frequency of the edges.
  /// This is required for non-fast mode.
  MachineBranchProbabilityInfo *MBPI;

  /// Current optimization remark emitter. Used to report failures.
  std::unique_ptr<MachineOptimizationRemarkEmitter> MORE;

  /// Helper class used for every code morphing.
  MachineIRBuilder MIRBuilder;

  /// Optimization mode of the pass.
  Mode OptMode;

  /// Current target configuration. Controls how the pass handles errors.
  const TargetPassConfig *TPC;

  /// Assign the register bank of each operand of \p MI.
  /// \return True on success, false otherwise.
  bool assignInstr(MachineInstr &MI);

  /// Initialize the field members using \p MF.
  void init(MachineFunction &MF);

  /// Check if \p Reg is already assigned what is described by \p ValMapping.
  /// \p OnlyAssign == true means that \p Reg just needs to be assigned a
  /// register bank.  I.e., no repairing is necessary to have the
  /// assignment match.
  bool assignmentMatch(unsigned Reg,
                       const RegisterBankInfo::ValueMapping &ValMapping,
                       bool &OnlyAssign) const;

  /// Insert repairing code for \p Reg as specified by \p ValMapping.
  /// The repairing placement is specified by \p RepairPt.
  /// \p NewVRegs contains all the registers required to remap \p Reg.
  /// In other words, the number of registers in NewVRegs must be equal
  /// to ValMapping.BreakDown.size().
  ///
  /// The transformation could be sketched as:
  /// \code
  /// ... = op Reg
  /// \endcode
  /// Becomes
  /// \code
  /// <NewRegs> = COPY or extract Reg
  /// ... = op Reg
  /// \endcode
  ///
  /// and
  /// \code
  /// Reg = op ...
  /// \endcode
  /// Becomes
  /// \code
  /// Reg = op ...
  /// Reg = COPY or build_sequence <NewRegs>
  /// \endcode
  ///
  /// \pre NewVRegs.size() == ValMapping.BreakDown.size()
  ///
  /// \note The caller is supposed to do the rewriting of op if need be.
  /// I.e., Reg = op ... => <NewRegs> = NewOp ...
  ///
  /// \return True if the repairing worked, false otherwise.
  bool repairReg(MachineOperand &MO,
                 const RegisterBankInfo::ValueMapping &ValMapping,
                 RegBankSelect::RepairingPlacement &RepairPt,
                 const iterator_range<SmallVectorImpl<unsigned>::const_iterator>
                     &NewVRegs);

  /// Return the cost of the instruction needed to map \p MO to \p ValMapping.
  /// The cost is free of basic block frequencies.
  /// \pre MO.isReg()
  /// \pre MO is assigned to a register bank.
  /// \pre ValMapping is a valid mapping for MO.
  uint64_t
  getRepairCost(const MachineOperand &MO,
                const RegisterBankInfo::ValueMapping &ValMapping) const;

  /// Find the best mapping for \p MI from \p PossibleMappings.
  /// \return a reference on the best mapping in \p PossibleMappings.
  const RegisterBankInfo::InstructionMapping &
  findBestMapping(MachineInstr &MI,
                  RegisterBankInfo::InstructionMappings &PossibleMappings,
                  SmallVectorImpl<RepairingPlacement> &RepairPts);

  /// Compute the cost of mapping \p MI with \p InstrMapping and
  /// compute the repairing placement for such mapping in \p
  /// RepairPts.
  /// \p BestCost is used to specify when the cost becomes too high
  /// and thus it is not worth computing the RepairPts.  Moreover if
  /// \p BestCost == nullptr, the mapping cost is actually not
  /// computed.
  MappingCost
  computeMapping(MachineInstr &MI,
                 const RegisterBankInfo::InstructionMapping &InstrMapping,
                 SmallVectorImpl<RepairingPlacement> &RepairPts,
                 const MappingCost *BestCost = nullptr);

  /// When \p RepairPt involves splitting to repair \p MO for the
  /// given \p ValMapping, try to change the way we repair such that
  /// the splitting is not required anymore.
  ///
  /// \pre \p RepairPt.hasSplit()
  /// \pre \p MO == MO.getParent()->getOperand(\p RepairPt.getOpIdx())
  /// \pre \p ValMapping is the mapping of \p MO for MO.getParent()
  ///      that implied \p RepairPt.
  void tryAvoidingSplit(RegBankSelect::RepairingPlacement &RepairPt,
                        const MachineOperand &MO,
                        const RegisterBankInfo::ValueMapping &ValMapping) const;

  /// Apply \p Mapping to \p MI. \p RepairPts represents the different
  /// mapping action that need to happen for the mapping to be
  /// applied.
  /// \return True if the mapping was applied sucessfully, false otherwise.
  bool applyMapping(MachineInstr &MI,
                    const RegisterBankInfo::InstructionMapping &InstrMapping,
                    SmallVectorImpl<RepairingPlacement> &RepairPts);

public:
  /// Create a RegBankSelect pass with the specified \p RunningMode.
  RegBankSelect(Mode RunningMode = Fast);

  StringRef getPassName() const override { return "RegBankSelect"; }

  void getAnalysisUsage(AnalysisUsage &AU) const override;

  MachineFunctionProperties getRequiredProperties() const override {
    return MachineFunctionProperties()
        .set(MachineFunctionProperties::Property::IsSSA)
        .set(MachineFunctionProperties::Property::Legalized);
  }

  MachineFunctionProperties getSetProperties() const override {
    return MachineFunctionProperties().set(
        MachineFunctionProperties::Property::RegBankSelected);
  }

  /// Walk through \p MF and assign a register bank to every virtual register
  /// that are still mapped to nothing.
  /// The target needs to provide a RegisterBankInfo and in particular
  /// override RegisterBankInfo::getInstrMapping.
  ///
  /// Simplified algo:
  /// \code
  ///   RBI = MF.subtarget.getRegBankInfo()
  ///   MIRBuilder.setMF(MF)
  ///   for each bb in MF
  ///     for each inst in bb
  ///       MIRBuilder.setInstr(inst)
  ///       MappingCosts = RBI.getMapping(inst);
  ///       Idx = findIdxOfMinCost(MappingCosts)
  ///       CurRegBank = MappingCosts[Idx].RegBank
  ///       MRI.setRegBank(inst.getOperand(0).getReg(), CurRegBank)
  ///       for each argument in inst
  ///         if (CurRegBank != argument.RegBank)
  ///           ArgReg = argument.getReg()
  ///           Tmp = MRI.createNewVirtual(MRI.getSize(ArgReg), CurRegBank)
  ///           MIRBuilder.buildInstr(COPY, Tmp, ArgReg)
  ///           inst.getOperand(argument.getOperandNo()).setReg(Tmp)
  /// \endcode
  bool runOnMachineFunction(MachineFunction &MF) override;
};

} // End namespace llvm.

#endif