/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "parallel_move_resolver.h"

#include "base/stl_util.h"
#include "nodes.h"

namespace art {

void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
  // Perform a linear sweep of the moves to add them to the initial list of
  // moves to perform, ignoring any move that is redundant (the source is
  // the same as the destination, the destination is ignored and
  // unallocated, or the move was already eliminated).
  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
    MoveOperands* move = parallel_move->MoveOperandsAt(i);
    if (!move->IsRedundant()) {
      moves_.push_back(move);
    }
  }
}

void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
  DCHECK(moves_.empty());
  // Build up a worklist of moves.
  BuildInitialMoveList(parallel_move);

  // Move stack/stack slot to take advantage of a free register on constrained machines.
  for (size_t i = 0; i < moves_.size(); ++i) {
    const MoveOperands& move = *moves_[i];
    // Ignore constants and moves already eliminated.
    if (move.IsEliminated() || move.GetSource().IsConstant()) {
      continue;
    }

    if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
        (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
      PerformMove(i);
    }
  }

  for (size_t i = 0; i < moves_.size(); ++i) {
    const MoveOperands& move = *moves_[i];
    // Skip constants to perform them last.  They don't block other moves
    // and skipping such moves with register destinations keeps those
    // registers free for the whole algorithm.
    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
      PerformMove(i);
    }
  }

  // Perform the moves with constant sources.
  for (size_t i = 0; i < moves_.size(); ++i) {
    MoveOperands* move = moves_[i];
    if (!move->IsEliminated()) {
      DCHECK(move->GetSource().IsConstant());
      EmitMove(i);
      // Eliminate the move, in case following moves need a scratch register.
      move->Eliminate();
    }
  }

  moves_.clear();
}

Location LowOf(Location location) {
  if (location.IsRegisterPair()) {
    return Location::RegisterLocation(location.low());
  } else if (location.IsFpuRegisterPair()) {
    return Location::FpuRegisterLocation(location.low());
  } else if (location.IsDoubleStackSlot()) {
    return Location::StackSlot(location.GetStackIndex());
  } else {
    return Location::NoLocation();
  }
}

Location HighOf(Location location) {
  if (location.IsRegisterPair()) {
    return Location::RegisterLocation(location.high());
  } else if (location.IsFpuRegisterPair()) {
    return Location::FpuRegisterLocation(location.high());
  } else if (location.IsDoubleStackSlot()) {
    return Location::StackSlot(location.GetHighStackIndex(4));
  } else {
    return Location::NoLocation();
  }
}

// Update the source of `move`, knowing that `updated_location` has been swapped
// with `new_source`. Note that `updated_location` can be a pair, therefore if
// `move` is non-pair, we need to extract which register to use.
static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
  Location source = move->GetSource();
  if (LowOf(updated_location).Equals(source)) {
    move->SetSource(LowOf(new_source));
  } else if (HighOf(updated_location).Equals(source)) {
    move->SetSource(HighOf(new_source));
  } else {
    DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
    move->SetSource(new_source);
  }
}

MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
  // Each call to this function performs a move and deletes it from the move
  // graph.  We first recursively perform any move blocking this one.  We
  // mark a move as "pending" on entry to PerformMove in order to detect
  // cycles in the move graph.  We use operand swaps to resolve cycles,
  // which means that a call to PerformMove could change any source operand
  // in the move graph.

  MoveOperands* move = moves_[index];
  DCHECK(!move->IsPending());
  if (move->IsRedundant()) {
    // Because we swap register pairs first, following, un-pending
    // moves may become redundant.
    move->Eliminate();
    return nullptr;
  }

  // Clear this move's destination to indicate a pending move.  The actual
  // destination is saved in a stack-allocated local.  Recursion may allow
  // multiple moves to be pending.
  DCHECK(!move->GetSource().IsInvalid());
  Location destination = move->MarkPending();

  // Perform a depth-first traversal of the move graph to resolve
  // dependencies.  Any unperformed, unpending move with a source the same
  // as this one's destination blocks this one so recursively perform all
  // such moves.
  MoveOperands* required_swap = nullptr;
  for (size_t i = 0; i < moves_.size(); ++i) {
    const MoveOperands& other_move = *moves_[i];
    if (other_move.Blocks(destination) && !other_move.IsPending()) {
      // Though PerformMove can change any source operand in the move graph,
      // calling `PerformMove` cannot create a blocking move via a swap
      // (this loop does not miss any).
      // For example, assume there is a non-blocking move with source A
      // and this move is blocked on source B and there is a swap of A and
      // B.  Then A and B must be involved in the same cycle (or they would
      // not be swapped).  Since this move's destination is B and there is
      // only a single incoming edge to an operand, this move must also be
      // involved in the same cycle.  In that case, the blocking move will
      // be created but will be "pending" when we return from PerformMove.
      required_swap = PerformMove(i);

      if (required_swap == move) {
        // If this move is required to swap, we do so without looking
        // at the next moves. Swapping is not blocked by anything, it just
        // updates other moves's source.
        break;
      } else if (required_swap == moves_[i]) {
        // If `other_move` was swapped, we iterate again to find a new
        // potential cycle.
        required_swap = nullptr;
        i = -1;
      } else if (required_swap != nullptr) {
        // A move is required to swap. We walk back the cycle to find the
        // move by just returning from this `PerformMove`.
        moves_[index]->ClearPending(destination);
        return required_swap;
      }
    }
  }

  // We are about to resolve this move and don't need it marked as
  // pending, so restore its destination.
  move->ClearPending(destination);

  // This move's source may have changed due to swaps to resolve cycles and
  // so it may now be the last move in the cycle.  If so remove it.
  if (move->GetSource().Equals(destination)) {
    move->Eliminate();
    DCHECK(required_swap == nullptr);
    return nullptr;
  }

  // The move may be blocked on a (at most one) pending move, in which case
  // we have a cycle.  Search for such a blocking move and perform a swap to
  // resolve it.
  bool do_swap = false;
  if (required_swap != nullptr) {
    DCHECK_EQ(required_swap, move);
    do_swap = true;
  } else {
    for (MoveOperands* other_move : moves_) {
      if (other_move->Blocks(destination)) {
        DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
        if (!move->Is64BitMove() && other_move->Is64BitMove()) {
          // We swap 64bits moves before swapping 32bits moves. Go back from the
          // cycle by returning the move that must be swapped.
          return other_move;
        }
        do_swap = true;
        break;
      }
    }
  }

  if (do_swap) {
    EmitSwap(index);
    // Any unperformed (including pending) move with a source of either
    // this move's source or destination needs to have their source
    // changed to reflect the state of affairs after the swap.
    Location source = move->GetSource();
    Location swap_destination = move->GetDestination();
    move->Eliminate();
    for (MoveOperands* other_move : moves_) {
      if (other_move->Blocks(source)) {
        UpdateSourceOf(other_move, source, swap_destination);
      } else if (other_move->Blocks(swap_destination)) {
        UpdateSourceOf(other_move, swap_destination, source);
      }
    }
    // If the swap was required because of a 64bits move in the middle of a cycle,
    // we return the swapped move, so that the caller knows it needs to re-iterate
    // its dependency loop.
    return required_swap;
  } else {
    // This move is not blocked.
    EmitMove(index);
    move->Eliminate();
    DCHECK(required_swap == nullptr);
    return nullptr;
  }
}

bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
  for (MoveOperands* move : moves_) {
    if (move->Blocks(loc)) {
      return false;
    }
  }

  for (MoveOperands* move : moves_) {
    if (move->GetDestination().Equals(loc)) {
      return true;
    }
  }

  return false;
}

int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
                                                          int register_count,
                                                          int if_scratch,
                                                          bool* spilled) {
  DCHECK_NE(blocked, if_scratch);
  int scratch = -1;
  for (int reg = 0; reg < register_count; ++reg) {
    if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
      scratch = reg;
      break;
    }
  }

  if (scratch == -1) {
    *spilled = true;
    scratch = if_scratch;
  } else {
    *spilled = false;
  }

  return scratch;
}


ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
    ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
    : resolver_(resolver),
      reg_(kNoRegister),
      spilled_(false) {
  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);

  if (spilled_) {
    resolver->SpillScratch(reg_);
  }
}


ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
  if (spilled_) {
    resolver_->RestoreScratch(reg_);
  }
}

void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
  DCHECK(moves_.empty());
  DCHECK(scratches_.empty());

  // Backend dependent initialization.
  PrepareForEmitNativeCode();

  // Build up a worklist of moves.
  BuildInitialMoveList(parallel_move);

  for (size_t i = 0; i < moves_.size(); ++i) {
    const MoveOperands& move = *moves_[i];
    // Skip constants to perform them last. They don't block other moves and
    // skipping such moves with register destinations keeps those registers
    // free for the whole algorithm.
    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
      PerformMove(i);
    }
  }

  // Perform the moves with constant sources and register destinations with UpdateMoveSource()
  // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
  // from changing the constant sources to stack locations.
  for (size_t i = 0; i < moves_.size(); ++i) {
    MoveOperands* move = moves_[i];
    Location destination = move->GetDestination();
    if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
      Location source = move->GetSource();
      EmitMove(i);
      move->Eliminate();
      // This may introduce additional instruction dependency, but reduce number
      // of moves and possible literal loads. For example,
      // Original moves:
      //   1234.5678 -> D0
      //   1234.5678 -> D1
      // Updated moves:
      //   1234.5678 -> D0
      //   D0 -> D1
      UpdateMoveSource(source, destination);
    }
  }

  // Perform the rest of the moves.
  for (size_t i = 0; i < moves_.size(); ++i) {
    MoveOperands* move = moves_[i];
    if (!move->IsEliminated()) {
      EmitMove(i);
      move->Eliminate();
    }
  }

  // All pending moves that we have added for resolve cycles should be performed.
  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);

  // Backend dependent cleanup.
  FinishEmitNativeCode();

  moves_.clear();
  scratches_.clear();
}

Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
  for (Location loc : scratches_) {
    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
      return loc;
    }
  }
  for (MoveOperands* move : moves_) {
    Location loc = move->GetDestination();
    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
      return loc;
    }
  }
  return Location::NoLocation();
}

void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
  if (kIsDebugBuild) {
    for (Location scratch : scratches_) {
      CHECK(!loc.Equals(scratch));
    }
  }
  scratches_.push_back(loc);
}

void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
  DCHECK(!IsBlockedByMoves(loc));
  for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
    if (loc.Equals(*it)) {
      scratches_.erase(it);
      break;
    }
  }
}

void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
  // Each call to this function performs a move and deletes it from the move
  // graph. We first recursively perform any move blocking this one. We mark
  // a move as "pending" on entry to PerformMove in order to detect cycles
  // in the move graph. We use scratch location to resolve cycles, also
  // additional pending moves might be added. After move has been performed,
  // we will update source operand in the move graph to reduce dependencies in
  // the graph.

  MoveOperands* move = moves_[index];
  DCHECK(!move->IsPending());
  DCHECK(!move->IsEliminated());
  if (move->IsRedundant()) {
    // Previous operations on the list of moves have caused this particular move
    // to become a no-op, so we can safely eliminate it. Consider for example
    // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
    // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
    // used as the scratch location, the move (1 -> 2) will occur while resolving
    // the cycle. When that move is emitted, the code will update moves with a '1'
    // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
    // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
    // eliminated here.
    move->Eliminate();
    return;
  }

  // Clear this move's destination to indicate a pending move. The actual
  // destination is saved in a stack-allocated local. Recursion may allow
  // multiple moves to be pending.
  DCHECK(!move->GetSource().IsInvalid());
  Location destination = move->MarkPending();

  // Perform a depth-first traversal of the move graph to resolve
  // dependencies. Any unperformed, unpending move with a source the same
  // as this one's destination blocks this one so recursively perform all
  // such moves.
  for (size_t i = 0; i < moves_.size(); ++i) {
    const MoveOperands& other_move = *moves_[i];
    if (other_move.Blocks(destination) && !other_move.IsPending()) {
      PerformMove(i);
    }
  }

  // We are about to resolve this move and don't need it marked as
  // pending, so restore its destination.
  move->ClearPending(destination);

  // No one else should write to the move destination when the it is pending.
  DCHECK(!move->IsRedundant());

  Location source = move->GetSource();
  // The move may be blocked on several pending moves, in case we have a cycle.
  if (IsBlockedByMoves(destination)) {
    // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
    // sequence:
    // (C -> scratch)     # Emit right now.
    // (A -> B) (B -> C)  # Unblocked.
    // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
    Location::Kind kind = source.GetKind();
    DCHECK_NE(kind, Location::kConstant);
    Location scratch = AllocateScratchLocationFor(kind);
    // We only care about the move size.
    DataType::Type type = move->Is64BitMove() ? DataType::Type::kInt64 : DataType::Type::kInt32;
    // Perform (C -> scratch)
    move->SetDestination(scratch);
    EmitMove(index);
    move->Eliminate();
    UpdateMoveSource(source, scratch);
    // Add (scratch -> A).
    AddPendingMove(scratch, destination, type);
  } else {
    // This move is not blocked.
    EmitMove(index);
    move->Eliminate();
    UpdateMoveSource(source, destination);
  }

  // Moves in the pending list should not block any other moves. But performing
  // unblocked moves in the pending list can free scratch registers, so we do this
  // as early as possible.
  MoveOperands* pending_move;
  while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
    Location pending_source = pending_move->GetSource();
    Location pending_destination = pending_move->GetDestination();
    // We do not depend on the pending move index. So just delete the move instead
    // of eliminating it to make the pending list cleaner.
    DeletePendingMove(pending_move);
    move->SetSource(pending_source);
    move->SetDestination(pending_destination);
    EmitMove(index);
    move->Eliminate();
    UpdateMoveSource(pending_source, pending_destination);
    // Free any unblocked locations in the scratch location list.
    // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
    // FIXME: If FreeScratchLocation() removes the location from scratches_,
    // we skip the next location. This happens for arm64.
    for (size_t i = 0; i < scratches_.size(); ++i) {
      Location scratch = scratches_[i];
      // Only scratch overlapping with performed move source can be unblocked.
      if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
        FreeScratchLocation(pending_source);
      }
    }
  }
}

void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
  // This function is used to reduce the dependencies in the graph after
  // (from -> to) has been performed. Since we ensure there is no move with the same
  // destination, (to -> X) cannot be blocked while (from -> X) might still be
  // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
  // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
  // a dependency between the two. If we update the source location from 1 to 2, we
  // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
  //
  // This is not something we must do, but we can use fewer scratch locations with
  // this trick. For example, we can avoid using additional scratch locations for
  // moves (0 -> 1), (1 -> 2), (1 -> 0).
  for (MoveOperands* move : moves_) {
    if (move->GetSource().Equals(from)) {
      move->SetSource(to);
    }
  }
}

void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
                                                Location destination,
                                                DataType::Type type) {
  pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
}

void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
  RemoveElement(pending_moves_, move);
}

MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
  for (MoveOperands* move : pending_moves_) {
    Location destination = move->GetDestination();
    // Only moves with destination overlapping with input loc can be unblocked.
    if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
      return move;
    }
  }
  return nullptr;
}

bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
  for (MoveOperands* move : pending_moves_) {
    if (move->Blocks(loc)) {
      return true;
    }
  }
  for (MoveOperands* move : moves_) {
    if (move->Blocks(loc)) {
      return true;
    }
  }
  return false;
}

// So far it is only used for debugging purposes to make sure all pending moves
// have been performed.
size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
  return pending_moves_.size();
}

}  // namespace art