// Copyright (c) 2018 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include <algorithm> #include <functional> #include <memory> #include <unordered_map> #include <unordered_set> #include <vector> #include "source/opt/ir_builder.h" #include "source/opt/ir_context.h" #include "source/opt/loop_descriptor.h" #include "source/opt/loop_peeling.h" #include "source/opt/loop_utils.h" #include "source/opt/scalar_analysis.h" #include "source/opt/scalar_analysis_nodes.h" namespace spvtools { namespace opt { size_t LoopPeelingPass::code_grow_threshold_ = 1000; void LoopPeeling::DuplicateAndConnectLoop( LoopUtils::LoopCloningResult* clone_results) { CFG& cfg = *context_->cfg(); analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); assert(CanPeelLoop() && "Cannot peel loop!"); std::vector<BasicBlock*> ordered_loop_blocks; // TODO(1841): Handle failure to create pre-header. BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock(); loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks); cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks); // Add the basic block to the function. Function::iterator it = loop_utils_.GetFunction()->FindBlock(pre_header->id()); assert(it != loop_utils_.GetFunction()->end() && "Pre-header not found in the function."); loop_utils_.GetFunction()->AddBasicBlocks( clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it); // Make the |loop_|'s preheader the |cloned_loop_| one. BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock(); pre_header->ForEachSuccessorLabel( [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); }); // Update cfg. cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id()); cloned_loop_->SetPreHeaderBlock(pre_header); loop_->SetPreHeaderBlock(nullptr); // When cloning the loop, we didn't cloned the merge block, so currently // |cloned_loop_| shares the same block as |loop_|. // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a // branch to |loop_|'s header (so header will also be the merge of // |cloned_loop_|). uint32_t cloned_loop_exit = 0; for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) { if (loop_->IsInsideLoop(pred_id)) continue; BasicBlock* bb = cfg.block(pred_id); assert(cloned_loop_exit == 0 && "The loop has multiple exits."); cloned_loop_exit = bb->id(); bb->ForEachSuccessorLabel([this](uint32_t* succ) { if (*succ == loop_->GetMergeBlock()->id()) *succ = loop_->GetHeaderBlock()->id(); }); } // Update cfg. cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id()); cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id()); // Patch the phi of the original loop header: // - Set the loop entry branch to come from the cloned loop exit block; // - Set the initial value of the phi using the corresponding cloned loop // exit values. // // We patch the iterating value initializers of the original loop using the // corresponding cloned loop exit values. Connects the cloned loop iterating // values to the original loop. This make sure that the initial value of the // second loop starts with the last value of the first loop. // // For example, loops like: // // int z = 0; // for (int i = 0; i++ < M; i += cst1) { // if (cond) // z += cst2; // } // // Will become: // // int z = 0; // int i = 0; // for (; i++ < M; i += cst1) { // if (cond) // z += cst2; // } // for (; i++ < M; i += cst1) { // if (cond) // z += cst2; // } loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr, clone_results, this](Instruction* phi) { for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) { phi->SetInOperand(i, {clone_results->value_map_.at( exit_value_.at(phi->result_id())->result_id())}); phi->SetInOperand(i + 1, {cloned_loop_exit}); def_use_mgr->AnalyzeInstUse(phi); return; } } }); // Force the creation of a new preheader for the original loop and set it as // the merge block for the cloned loop. // TODO(1841): Handle failure to create pre-header. cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock()); } void LoopPeeling::InsertCanonicalInductionVariable( LoopUtils::LoopCloningResult* clone_results) { if (original_loop_canonical_induction_variable_) { canonical_induction_variable_ = context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at( original_loop_canonical_induction_variable_->result_id())); return; } BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail(); if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) { --insert_point; } InstructionBuilder builder( context_, &*insert_point, IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); Instruction* uint_1_cst = builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned()); // Create the increment. // Note that we do "1 + 1" here, one of the operand should the phi // value but we don't have it yet. The operand will be set latter. Instruction* iv_inc = builder.AddIAdd( uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id()); builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin()); canonical_induction_variable_ = builder.AddPhi( uint_1_cst->type_id(), {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(), GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(), GetClonedLoop()->GetLatchBlock()->id()}); // Connect everything. iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()}); // Update def/use manager. context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc); // If do-while form, use the incremented value. if (do_while_form_) { canonical_induction_variable_ = iv_inc; } } void LoopPeeling::GetIteratorUpdateOperations( const Loop* loop, Instruction* iterator, std::unordered_set<Instruction*>* operations) { analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); operations->insert(iterator); iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) { Instruction* insn = def_use_mgr->GetDef(*id); if (insn->opcode() == SpvOpLabel) { return; } if (operations->count(insn)) { return; } if (!loop->IsInsideLoop(insn)) { return; } GetIteratorUpdateOperations(loop, insn, operations); }); } // Gather the set of blocks for all the path from |entry| to |root|. static void GetBlocksInPath(uint32_t block, uint32_t entry, std::unordered_set<uint32_t>* blocks_in_path, const CFG& cfg) { for (uint32_t pid : cfg.preds(block)) { if (blocks_in_path->insert(pid).second) { if (pid != entry) { GetBlocksInPath(pid, entry, blocks_in_path, cfg); } } } } bool LoopPeeling::IsConditionCheckSideEffectFree() const { CFG& cfg = *context_->cfg(); // The "do-while" form does not cause issues, the algorithm takes into account // the first iteration. if (!do_while_form_) { uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0]; std::unordered_set<uint32_t> blocks_in_path; blocks_in_path.insert(condition_block_id); GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(), &blocks_in_path, cfg); for (uint32_t bb_id : blocks_in_path) { BasicBlock* bb = cfg.block(bb_id); if (!bb->WhileEachInst([this](Instruction* insn) { if (insn->IsBranch()) return true; switch (insn->opcode()) { case SpvOpLabel: case SpvOpSelectionMerge: case SpvOpLoopMerge: return true; default: break; } return context_->IsCombinatorInstruction(insn); })) { return false; } } } return true; } void LoopPeeling::GetIteratingExitValues() { CFG& cfg = *context_->cfg(); loop_->GetHeaderBlock()->ForEachPhiInst( [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; }); if (!loop_->GetMergeBlock()) { return; } if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { return; } analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0]; auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id()); do_while_form_ = std::find(header_pred.begin(), header_pred.end(), condition_block_id) != header_pred.end(); if (do_while_form_) { loop_->GetHeaderBlock()->ForEachPhiInst( [condition_block_id, def_use_mgr, this](Instruction* phi) { std::unordered_set<Instruction*> operations; for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) { exit_value_[phi->result_id()] = def_use_mgr->GetDef(phi->GetSingleWordInOperand(i)); } } }); } else { DominatorTree* dom_tree = &context_->GetDominatorAnalysis(loop_utils_.GetFunction()) ->GetDomTree(); BasicBlock* condition_block = cfg.block(condition_block_id); loop_->GetHeaderBlock()->ForEachPhiInst( [dom_tree, condition_block, this](Instruction* phi) { std::unordered_set<Instruction*> operations; // Not the back-edge value, check if the phi instruction is the only // possible candidate. GetIteratorUpdateOperations(loop_, phi, &operations); for (Instruction* insn : operations) { if (insn == phi) { continue; } if (dom_tree->Dominates(context_->get_instr_block(insn), condition_block)) { return; } } exit_value_[phi->result_id()] = phi; }); } } void LoopPeeling::FixExitCondition( const std::function<uint32_t(Instruction*)>& condition_builder) { CFG& cfg = *context_->cfg(); uint32_t condition_block_id = 0; for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) { if (GetClonedLoop()->IsInsideLoop(id)) { condition_block_id = id; break; } } assert(condition_block_id != 0 && "2nd loop in improperly connected"); BasicBlock* condition_block = cfg.block(condition_block_id); Instruction* exit_condition = condition_block->terminator(); assert(exit_condition->opcode() == SpvOpBranchConditional); BasicBlock::iterator insert_point = condition_block->tail(); if (condition_block->GetMergeInst()) { --insert_point; } exit_condition->SetInOperand(0, {condition_builder(&*insert_point)}); uint32_t to_continue_block_idx = GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1)) ? 1 : 2; exit_condition->SetInOperand( 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)}); exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()}); // Update def/use manager. context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition); } BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) { analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); CFG& cfg = *context_->cfg(); assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor"); // TODO(1841): Handle id overflow. std::unique_ptr<BasicBlock> new_bb = MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction( context_, SpvOpLabel, 0, context_->TakeNextId(), {}))); new_bb->SetParent(loop_utils_.GetFunction()); // Update the loop descriptor. Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb]; if (in_loop) { in_loop->AddBasicBlock(new_bb.get()); loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop); } context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get()); def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst()); BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]); bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) { if (*id == bb->id()) { *id = new_bb->id(); } }); cfg.RemoveEdge(bb_pred->id(), bb->id()); cfg.AddEdge(bb_pred->id(), new_bb->id()); def_use_mgr->AnalyzeInstUse(&*bb_pred->tail()); // Update the incoming branch. bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) { phi->SetInOperand(1, {new_bb->id()}); def_use_mgr->AnalyzeInstUse(phi); }); InstructionBuilder( context_, new_bb.get(), IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) .AddBranch(bb->id()); cfg.RegisterBlock(new_bb.get()); // Add the basic block to the function. Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id()); assert(it != loop_utils_.GetFunction()->end() && "Basic block not found in the function."); BasicBlock* ret = new_bb.get(); loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it); return ret; } BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition, BasicBlock* if_merge) { // TODO(1841): Handle failure to create pre-header. BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock(); // Will no longer be a pre-header because of the if. loop->SetPreHeaderBlock(nullptr); // Kill the branch to the header. context_->KillInst(&*if_block->tail()); InstructionBuilder builder( context_, if_block, IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); builder.AddConditionalBranch(condition->result_id(), loop->GetHeaderBlock()->id(), if_merge->id(), if_merge->id()); return if_block; } void LoopPeeling::PeelBefore(uint32_t peel_factor) { assert(CanPeelLoop() && "Cannot peel loop"); LoopUtils::LoopCloningResult clone_results; // Clone the loop and insert the cloned one before the loop. DuplicateAndConnectLoop(&clone_results); // Add a canonical induction variable "canonical_induction_variable_". InsertCanonicalInductionVariable(&clone_results); InstructionBuilder builder( context_, &*cloned_loop_->GetPreHeaderBlock()->tail(), IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); Instruction* factor = builder.GetIntConstant(peel_factor, int_type_->IsSigned()); Instruction* has_remaining_iteration = builder.AddLessThan( factor->result_id(), loop_iteration_count_->result_id()); Instruction* max_iteration = builder.AddSelect( factor->type_id(), has_remaining_iteration->result_id(), factor->result_id(), loop_iteration_count_->result_id()); // Change the exit condition of the cloned loop to be (exit when become // false): // "canonical_induction_variable_" < min("factor", "loop_iteration_count_") FixExitCondition([max_iteration, this](Instruction* insert_before_point) { return InstructionBuilder(context_, insert_before_point, IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) .AddLessThan(canonical_induction_variable_->result_id(), max_iteration->result_id()) ->result_id(); }); // "Protect" the second loop: the second loop can only be executed if // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_). BasicBlock* if_merge_block = loop_->GetMergeBlock(); loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock())); // Prevent the second loop from being executed if we already executed all the // required iterations. BasicBlock* if_block = ProtectLoop(loop_, has_remaining_iteration, if_merge_block); // Patch the phi of the merge block. if_merge_block->ForEachPhiInst( [&clone_results, if_block, this](Instruction* phi) { // if_merge_block had previously only 1 predecessor. uint32_t incoming_value = phi->GetSingleWordInOperand(0); auto def_in_loop = clone_results.value_map_.find(incoming_value); if (def_in_loop != clone_results.value_map_.end()) incoming_value = def_in_loop->second; phi->AddOperand( {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}}); phi->AddOperand( {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}}); context_->get_def_use_mgr()->AnalyzeInstUse(phi); }); context_->InvalidateAnalysesExceptFor( IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping | IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG); } void LoopPeeling::PeelAfter(uint32_t peel_factor) { assert(CanPeelLoop() && "Cannot peel loop"); LoopUtils::LoopCloningResult clone_results; // Clone the loop and insert the cloned one before the loop. DuplicateAndConnectLoop(&clone_results); // Add a canonical induction variable "canonical_induction_variable_". InsertCanonicalInductionVariable(&clone_results); InstructionBuilder builder( context_, &*cloned_loop_->GetPreHeaderBlock()->tail(), IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); Instruction* factor = builder.GetIntConstant(peel_factor, int_type_->IsSigned()); Instruction* has_remaining_iteration = builder.AddLessThan( factor->result_id(), loop_iteration_count_->result_id()); // Change the exit condition of the cloned loop to be (exit when become // false): // "canonical_induction_variable_" + "factor" < "loop_iteration_count_" FixExitCondition([factor, this](Instruction* insert_before_point) { InstructionBuilder cond_builder( context_, insert_before_point, IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); // Build the following check: canonical_induction_variable_ + factor < // iteration_count return cond_builder .AddLessThan(cond_builder .AddIAdd(canonical_induction_variable_->type_id(), canonical_induction_variable_->result_id(), factor->result_id()) ->result_id(), loop_iteration_count_->result_id()) ->result_id(); }); // "Protect" the first loop: the first loop can only be executed if // factor < loop_iteration_count_. // The original loop's pre-header was the cloned loop merge block. GetClonedLoop()->SetMergeBlock( CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock())); // Use the second loop preheader as if merge block. // Prevent the first loop if only the peeled loop needs it. BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration, GetOriginalLoop()->GetPreHeaderBlock()); // Patch the phi of the header block. // We added an if to enclose the first loop and because the phi node are // connected to the exit value of the first loop, the definition no longer // dominate the preheader. // We had to the preheader (our if merge block) the required phi instruction // and patch the header phi. GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst( [&clone_results, if_block, this](Instruction* phi) { analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); auto find_value_idx = [](Instruction* phi_inst, Loop* loop) { uint32_t preheader_value_idx = !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2; return preheader_value_idx; }; Instruction* cloned_phi = def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id())); uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand( find_value_idx(cloned_phi, GetClonedLoop())); Instruction* new_phi = InstructionBuilder(context_, &*GetOriginalLoop()->GetPreHeaderBlock()->tail(), IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) .AddPhi(phi->type_id(), {phi->GetSingleWordInOperand( find_value_idx(phi, GetOriginalLoop())), GetClonedLoop()->GetMergeBlock()->id(), cloned_preheader_value, if_block->id()}); phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()), {new_phi->result_id()}); def_use_mgr->AnalyzeInstUse(phi); }); context_->InvalidateAnalysesExceptFor( IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping | IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG); } Pass::Status LoopPeelingPass::Process() { bool modified = false; Module* module = context()->module(); // Process each function in the module for (Function& f : *module) { modified |= ProcessFunction(&f); } return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; } bool LoopPeelingPass::ProcessFunction(Function* f) { bool modified = false; LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f); std::vector<Loop*> to_process_loop; to_process_loop.reserve(loop_descriptor.NumLoops()); for (Loop& l : loop_descriptor) { to_process_loop.push_back(&l); } ScalarEvolutionAnalysis scev_analysis(context()); for (Loop* loop : to_process_loop) { CodeMetrics loop_size; loop_size.Analyze(*loop); auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* { if (!loop_to_peel->IsLCSSA()) { LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA(); } bool peeled_loop; Loop* still_peelable_loop; std::tie(peeled_loop, still_peelable_loop) = ProcessLoop(loop_to_peel, &loop_size); if (peeled_loop) { modified = true; } return still_peelable_loop; }; Loop* still_peelable_loop = try_peel(loop); // The pass is working out the maximum factor by which a loop can be peeled. // If the loop can potentially be peeled again, then there is only one // possible direction, so only one call is still needed. if (still_peelable_loop) { try_peel(loop); } } return modified; } std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop, CodeMetrics* loop_size) { ScalarEvolutionAnalysis* scev_analysis = context()->GetScalarEvolutionAnalysis(); // Default values for bailing out. std::pair<bool, Loop*> bail_out{false, nullptr}; BasicBlock* exit_block = loop->FindConditionBlock(); if (!exit_block) { return bail_out; } Instruction* exiting_iv = loop->FindConditionVariable(exit_block); if (!exiting_iv) { return bail_out; } size_t iterations = 0; if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(), &iterations)) { return bail_out; } if (!iterations) { return bail_out; } Instruction* canonical_induction_variable = nullptr; loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable, scev_analysis, this](Instruction* insn) { if (const SERecurrentNode* iv = scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) { const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode(); const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode(); if (offset && coeff && offset->FoldToSingleValue() == 0 && coeff->FoldToSingleValue() == 1) { if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) { canonical_induction_variable = insn; return false; } } } return true; }); bool is_signed = canonical_induction_variable ? context() ->get_type_mgr() ->GetType(canonical_induction_variable->type_id()) ->AsInteger() ->IsSigned() : false; LoopPeeling peeler( loop, InstructionBuilder( context(), loop->GetHeaderBlock(), IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping) .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations), is_signed), canonical_induction_variable); if (!peeler.CanPeelLoop()) { return bail_out; } // For each basic block in the loop, check if it can be peeled. If it // can, get the direction (before/after) and by which factor. LoopPeelingInfo peel_info(loop, iterations, scev_analysis); uint32_t peel_before_factor = 0; uint32_t peel_after_factor = 0; for (uint32_t block : loop->GetBlocks()) { if (block == exit_block->id()) { continue; } BasicBlock* bb = cfg()->block(block); PeelDirection direction; uint32_t factor; std::tie(direction, factor) = peel_info.GetPeelingInfo(bb); if (direction == PeelDirection::kNone) { continue; } if (direction == PeelDirection::kBefore) { peel_before_factor = std::max(peel_before_factor, factor); } else { assert(direction == PeelDirection::kAfter); peel_after_factor = std::max(peel_after_factor, factor); } } PeelDirection direction = PeelDirection::kNone; uint32_t factor = 0; // Find which direction we should peel. if (peel_before_factor) { factor = peel_before_factor; direction = PeelDirection::kBefore; } if (peel_after_factor) { if (peel_before_factor < peel_after_factor) { // Favor a peel after here and give the peel before another shot later. factor = peel_after_factor; direction = PeelDirection::kAfter; } } // Do the peel if we can. if (direction == PeelDirection::kNone) return bail_out; // This does not take into account branch elimination opportunities and // the unrolling. It assumes the peeled loop will be unrolled as well. if (factor * loop_size->roi_size_ > code_grow_threshold_) { return bail_out; } loop_size->roi_size_ *= factor; // Find if a loop should be peeled again. Loop* extra_opportunity = nullptr; if (direction == PeelDirection::kBefore) { peeler.PeelBefore(factor); if (stats_) { stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor); } if (peel_after_factor) { // We could have peeled after, give it another try. extra_opportunity = peeler.GetOriginalLoop(); } } else { peeler.PeelAfter(factor); if (stats_) { stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor); } if (peel_before_factor) { // We could have peeled before, give it another try. extra_opportunity = peeler.GetClonedLoop(); } } return {true, extra_opportunity}; } uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand( Instruction* condition) const { for (uint32_t i = 0; i < condition->NumInOperands(); i++) { BasicBlock* bb = context_->get_instr_block(condition->GetSingleWordInOperand(i)); if (bb && loop_->IsInsideLoop(bb)) { return condition->GetSingleWordInOperand(i); } } return 0; } uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand( Instruction* condition) const { for (uint32_t i = 0; i < condition->NumInOperands(); i++) { BasicBlock* bb = context_->get_instr_block(condition->GetSingleWordInOperand(i)); if (!bb || !loop_->IsInsideLoop(bb)) { return condition->GetSingleWordInOperand(i); } } return 0; } static bool IsHandledCondition(SpvOp opcode) { switch (opcode) { case SpvOpIEqual: case SpvOpINotEqual: case SpvOpUGreaterThan: case SpvOpSGreaterThan: case SpvOpUGreaterThanEqual: case SpvOpSGreaterThanEqual: case SpvOpULessThan: case SpvOpSLessThan: case SpvOpULessThanEqual: case SpvOpSLessThanEqual: return true; default: return false; } } LoopPeelingPass::LoopPeelingInfo::Direction LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const { if (bb->terminator()->opcode() != SpvOpBranchConditional) { return GetNoneDirection(); } analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); Instruction* condition = def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0)); if (!IsHandledCondition(condition->opcode())) { return GetNoneDirection(); } if (!GetFirstLoopInvariantOperand(condition)) { // No loop invariant, it cannot be peeled by this pass. return GetNoneDirection(); } if (!GetFirstNonLoopInvariantOperand(condition)) { // Seems to be a job for the unswitch pass. return GetNoneDirection(); } // Left hand-side. SExpression lhs = scev_analysis_->AnalyzeInstruction( def_use_mgr->GetDef(condition->GetSingleWordInOperand(0))); if (lhs->GetType() == SENode::CanNotCompute) { // Can't make any conclusion. return GetNoneDirection(); } // Right hand-side. SExpression rhs = scev_analysis_->AnalyzeInstruction( def_use_mgr->GetDef(condition->GetSingleWordInOperand(1))); if (rhs->GetType() == SENode::CanNotCompute) { // Can't make any conclusion. return GetNoneDirection(); } // Only take into account recurrent expression over the current loop. bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs); bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs); if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) { return GetNoneDirection(); } if (is_lhs_rec) { if (!lhs->AsSERecurrentNode() || lhs->AsSERecurrentNode()->GetLoop() != loop_) { return GetNoneDirection(); } } if (is_rhs_rec) { if (!rhs->AsSERecurrentNode() || rhs->AsSERecurrentNode()->GetLoop() != loop_) { return GetNoneDirection(); } } // If the op code is ==, then we try a peel before or after. // If opcode is not <, >, <= or >=, we bail out. // // For the remaining cases, we canonicalize the expression so that the // constant expression is on the left hand side and the recurring expression // is on the right hand side. If we swap hand side, then < becomes >, <= // becomes >= etc. // If the opcode is <=, then we add 1 to the right hand side and do the peel // check on <. // If the opcode is >=, then we add 1 to the left hand side and do the peel // check on >. CmpOperator cmp_operator; switch (condition->opcode()) { default: return GetNoneDirection(); case SpvOpIEqual: case SpvOpINotEqual: return HandleEquality(lhs, rhs); case SpvOpUGreaterThan: case SpvOpSGreaterThan: { cmp_operator = CmpOperator::kGT; break; } case SpvOpULessThan: case SpvOpSLessThan: { cmp_operator = CmpOperator::kLT; break; } // We add one to transform >= into > and <= into <. case SpvOpUGreaterThanEqual: case SpvOpSGreaterThanEqual: { cmp_operator = CmpOperator::kGE; break; } case SpvOpULessThanEqual: case SpvOpSLessThanEqual: { cmp_operator = CmpOperator::kLE; break; } } // Force the left hand side to be the non recurring expression. if (is_lhs_rec) { std::swap(lhs, rhs); switch (cmp_operator) { case CmpOperator::kLT: { cmp_operator = CmpOperator::kGT; break; } case CmpOperator::kGT: { cmp_operator = CmpOperator::kLT; break; } case CmpOperator::kLE: { cmp_operator = CmpOperator::kGE; break; } case CmpOperator::kGE: { cmp_operator = CmpOperator::kLE; break; } } } return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode()); } SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration( SERecurrentNode* rec) const { return rec->GetOffset(); } SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration( SERecurrentNode* rec, int64_t iteration) const { SExpression coeff = rec->GetCoefficient(); SExpression offset = rec->GetOffset(); return (coeff * iteration) + offset; } SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration( SERecurrentNode* rec) const { return GetValueAtIteration(rec, loop_max_iterations_ - 1); } bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, bool* result) const { assert(scev_analysis_->IsLoopInvariant(loop_, lhs)); assert(scev_analysis_->IsLoopInvariant(loop_, rhs)); // We perform the test: 0 cmp_op rhs - lhs // What is left is then to determine the sign of the expression. switch (cmp_op) { case CmpOperator::kLT: { return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result); } case CmpOperator::kGT: { return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result); } case CmpOperator::kLE: { return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result); } case CmpOperator::kGE: { return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result); } } return false; } LoopPeelingPass::LoopPeelingInfo::Direction LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs, SExpression rhs) const { { // Try peel before opportunity. SExpression lhs_cst = lhs; if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) { lhs_cst = rec_node->GetOffset(); } SExpression rhs_cst = rhs; if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) { rhs_cst = rec_node->GetOffset(); } if (lhs_cst == rhs_cst) { return Direction{LoopPeelingPass::PeelDirection::kBefore, 1}; } } { // Try peel after opportunity. SExpression lhs_cst = lhs; if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) { // rec_node(x) = a * x + b // assign to lhs: a * (loop_max_iterations_ - 1) + b lhs_cst = GetValueAtLastIteration(rec_node); } SExpression rhs_cst = rhs; if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) { // rec_node(x) = a * x + b // assign to lhs: a * (loop_max_iterations_ - 1) + b rhs_cst = GetValueAtLastIteration(rec_node); } if (lhs_cst == rhs_cst) { return Direction{LoopPeelingPass::PeelDirection::kAfter, 1}; } } return GetNoneDirection(); } LoopPeelingPass::LoopPeelingInfo::Direction LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op, SExpression lhs, SERecurrentNode* rhs) const { SExpression offset = rhs->GetOffset(); SExpression coefficient = rhs->GetCoefficient(); // Compute (cst - B) / A. std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient; if (!flip_iteration.first->AsSEConstantNode()) { return GetNoneDirection(); } // note: !!flip_iteration.second normalize to 0/1 (via bool cast). int64_t iteration = flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() + !!flip_iteration.second; if (iteration <= 0 || loop_max_iterations_ <= static_cast<uint64_t>(iteration)) { // Always true or false within the loop bounds. return GetNoneDirection(); } // If this is a <= or >= operator and the iteration, make sure |iteration| is // the one flipping the condition. // If (cst - B) and A are not divisible, this equivalent to a < or > check, so // we skip this test. if (!flip_iteration.second && (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) { bool first_iteration; bool current_iteration; if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) || !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration), ¤t_iteration)) { return GetNoneDirection(); } // If the condition did not flip the next will. if (first_iteration == current_iteration) { iteration++; } } uint32_t cast_iteration = 0; // sanity check: can we fit |iteration| in a uint32_t ? if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) { cast_iteration = static_cast<uint32_t>(iteration); } if (cast_iteration) { // Peel before if we are closer to the start, after if closer to the end. if (loop_max_iterations_ / 2 > cast_iteration) { return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration}; } else { return Direction{ LoopPeelingPass::PeelDirection::kAfter, static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)}; } } return GetNoneDirection(); } } // namespace opt } // namespace spvtools