// Copyright 2015 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/compiler/instruction-scheduler.h" #include "src/base/adapters.h" #include "src/base/utils/random-number-generator.h" namespace v8 { namespace internal { namespace compiler { void InstructionScheduler::SchedulingQueueBase::AddNode( ScheduleGraphNode* node) { // We keep the ready list sorted by total latency so that we can quickly find // the next best candidate to schedule. auto it = nodes_.begin(); while ((it != nodes_.end()) && ((*it)->total_latency() >= node->total_latency())) { ++it; } nodes_.insert(it, node); } InstructionScheduler::ScheduleGraphNode* InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) { DCHECK(!IsEmpty()); auto candidate = nodes_.end(); for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) { // We only consider instructions that have all their operands ready. if (cycle >= (*iterator)->start_cycle()) { candidate = iterator; break; } } if (candidate != nodes_.end()) { ScheduleGraphNode *result = *candidate; nodes_.erase(candidate); return result; } return nullptr; } InstructionScheduler::ScheduleGraphNode* InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) { DCHECK(!IsEmpty()); // Choose a random element from the ready list. auto candidate = nodes_.begin(); std::advance(candidate, isolate()->random_number_generator()->NextInt( static_cast<int>(nodes_.size()))); ScheduleGraphNode *result = *candidate; nodes_.erase(candidate); return result; } InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode( Zone* zone, Instruction* instr) : instr_(instr), successors_(zone), unscheduled_predecessors_count_(0), latency_(GetInstructionLatency(instr)), total_latency_(-1), start_cycle_(-1) { } void InstructionScheduler::ScheduleGraphNode::AddSuccessor( ScheduleGraphNode* node) { successors_.push_back(node); node->unscheduled_predecessors_count_++; } InstructionScheduler::InstructionScheduler(Zone* zone, InstructionSequence* sequence) : zone_(zone), sequence_(sequence), graph_(zone), last_side_effect_instr_(nullptr), pending_loads_(zone), last_live_in_reg_marker_(nullptr), last_deopt_(nullptr), operands_map_(zone) {} void InstructionScheduler::StartBlock(RpoNumber rpo) { DCHECK(graph_.empty()); DCHECK(last_side_effect_instr_ == nullptr); DCHECK(pending_loads_.empty()); DCHECK(last_live_in_reg_marker_ == nullptr); DCHECK(last_deopt_ == nullptr); DCHECK(operands_map_.empty()); sequence()->StartBlock(rpo); } void InstructionScheduler::EndBlock(RpoNumber rpo) { if (FLAG_turbo_stress_instruction_scheduling) { ScheduleBlock<StressSchedulerQueue>(); } else { ScheduleBlock<CriticalPathFirstQueue>(); } sequence()->EndBlock(rpo); graph_.clear(); last_side_effect_instr_ = nullptr; pending_loads_.clear(); last_live_in_reg_marker_ = nullptr; last_deopt_ = nullptr; operands_map_.clear(); } void InstructionScheduler::AddInstruction(Instruction* instr) { ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); if (IsBlockTerminator(instr)) { // Make sure that basic block terminators are not moved by adding them // as successor of every instruction. for (ScheduleGraphNode* node : graph_) { node->AddSuccessor(new_node); } } else if (IsFixedRegisterParameter(instr)) { if (last_live_in_reg_marker_ != nullptr) { last_live_in_reg_marker_->AddSuccessor(new_node); } last_live_in_reg_marker_ = new_node; } else { if (last_live_in_reg_marker_ != nullptr) { last_live_in_reg_marker_->AddSuccessor(new_node); } // Make sure that instructions are not scheduled before the last // deoptimization point when they depend on it. if ((last_deopt_ != nullptr) && DependsOnDeoptimization(instr)) { last_deopt_->AddSuccessor(new_node); } // Instructions with side effects and memory operations can't be // reordered with respect to each other. if (HasSideEffect(instr)) { if (last_side_effect_instr_ != nullptr) { last_side_effect_instr_->AddSuccessor(new_node); } for (ScheduleGraphNode* load : pending_loads_) { load->AddSuccessor(new_node); } pending_loads_.clear(); last_side_effect_instr_ = new_node; } else if (IsLoadOperation(instr)) { // Load operations can't be reordered with side effects instructions but // independent loads can be reordered with respect to each other. if (last_side_effect_instr_ != nullptr) { last_side_effect_instr_->AddSuccessor(new_node); } pending_loads_.push_back(new_node); } else if (instr->IsDeoptimizeCall()) { // Ensure that deopts are not reordered with respect to side-effect // instructions. if (last_side_effect_instr_ != nullptr) { last_side_effect_instr_->AddSuccessor(new_node); } last_deopt_ = new_node; } // Look for operand dependencies. for (size_t i = 0; i < instr->InputCount(); ++i) { const InstructionOperand* input = instr->InputAt(i); if (input->IsUnallocated()) { int32_t vreg = UnallocatedOperand::cast(input)->virtual_register(); auto it = operands_map_.find(vreg); if (it != operands_map_.end()) { it->second->AddSuccessor(new_node); } } } // Record the virtual registers defined by this instruction. for (size_t i = 0; i < instr->OutputCount(); ++i) { const InstructionOperand* output = instr->OutputAt(i); if (output->IsUnallocated()) { operands_map_[UnallocatedOperand::cast(output)->virtual_register()] = new_node; } else if (output->IsConstant()) { operands_map_[ConstantOperand::cast(output)->virtual_register()] = new_node; } } } graph_.push_back(new_node); } template <typename QueueType> void InstructionScheduler::ScheduleBlock() { QueueType ready_list(this); // Compute total latencies so that we can schedule the critical path first. ComputeTotalLatencies(); // Add nodes which don't have dependencies to the ready list. for (ScheduleGraphNode* node : graph_) { if (!node->HasUnscheduledPredecessor()) { ready_list.AddNode(node); } } // Go through the ready list and schedule the instructions. int cycle = 0; while (!ready_list.IsEmpty()) { ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle); if (candidate != nullptr) { sequence()->AddInstruction(candidate->instruction()); for (ScheduleGraphNode* successor : candidate->successors()) { successor->DropUnscheduledPredecessor(); successor->set_start_cycle( std::max(successor->start_cycle(), cycle + candidate->latency())); if (!successor->HasUnscheduledPredecessor()) { ready_list.AddNode(successor); } } } cycle++; } } int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const { switch (instr->arch_opcode()) { case kArchNop: case kArchFramePointer: case kArchParentFramePointer: case kArchTruncateDoubleToI: case kArchStackSlot: case kArchDebugBreak: case kArchComment: case kIeee754Float64Acos: case kIeee754Float64Acosh: case kIeee754Float64Asin: case kIeee754Float64Asinh: case kIeee754Float64Atan: case kIeee754Float64Atanh: case kIeee754Float64Atan2: case kIeee754Float64Cbrt: case kIeee754Float64Cos: case kIeee754Float64Cosh: case kIeee754Float64Exp: case kIeee754Float64Expm1: case kIeee754Float64Log: case kIeee754Float64Log1p: case kIeee754Float64Log10: case kIeee754Float64Log2: case kIeee754Float64Pow: case kIeee754Float64Sin: case kIeee754Float64Sinh: case kIeee754Float64Tan: case kIeee754Float64Tanh: return kNoOpcodeFlags; case kArchStackPointer: // ArchStackPointer instruction loads the current stack pointer value and // must not be reordered with instruction with side effects. return kIsLoadOperation; case kArchPrepareCallCFunction: case kArchPrepareTailCall: case kArchCallCFunction: case kArchCallCodeObject: case kArchCallJSFunction: return kHasSideEffect; case kArchTailCallCodeObjectFromJSFunction: case kArchTailCallCodeObject: case kArchTailCallJSFunctionFromJSFunction: case kArchTailCallAddress: return kHasSideEffect | kIsBlockTerminator; case kArchDeoptimize: case kArchJmp: case kArchLookupSwitch: case kArchTableSwitch: case kArchRet: case kArchThrowTerminator: return kIsBlockTerminator; case kCheckedLoadInt8: case kCheckedLoadUint8: case kCheckedLoadInt16: case kCheckedLoadUint16: case kCheckedLoadWord32: case kCheckedLoadWord64: case kCheckedLoadFloat32: case kCheckedLoadFloat64: return kIsLoadOperation; case kCheckedStoreWord8: case kCheckedStoreWord16: case kCheckedStoreWord32: case kCheckedStoreWord64: case kCheckedStoreFloat32: case kCheckedStoreFloat64: case kArchStoreWithWriteBarrier: return kHasSideEffect; case kAtomicLoadInt8: case kAtomicLoadUint8: case kAtomicLoadInt16: case kAtomicLoadUint16: case kAtomicLoadWord32: return kIsLoadOperation; case kAtomicStoreWord8: case kAtomicStoreWord16: case kAtomicStoreWord32: return kHasSideEffect; #define CASE(Name) case k##Name: TARGET_ARCH_OPCODE_LIST(CASE) #undef CASE return GetTargetInstructionFlags(instr); } UNREACHABLE(); return kNoOpcodeFlags; } bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const { return ((GetInstructionFlags(instr) & kIsBlockTerminator) || (instr->flags_mode() == kFlags_branch)); } void InstructionScheduler::ComputeTotalLatencies() { for (ScheduleGraphNode* node : base::Reversed(graph_)) { int max_latency = 0; for (ScheduleGraphNode* successor : node->successors()) { DCHECK(successor->total_latency() != -1); if (successor->total_latency() > max_latency) { max_latency = successor->total_latency(); } } node->set_total_latency(max_latency + node->latency()); } } } // namespace compiler } // namespace internal } // namespace v8