// Copyright (c) 2018 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "source/opt/loop_fusion.h" #include <algorithm> #include <vector> #include "source/opt/ir_context.h" #include "source/opt/loop_dependence.h" #include "source/opt/loop_descriptor.h" namespace spvtools { namespace opt { namespace { // Append all the loops nested in |loop| to |loops|. void CollectChildren(Loop* loop, std::vector<const Loop*>* loops) { for (auto child : *loop) { loops->push_back(child); if (child->NumImmediateChildren() != 0) { CollectChildren(child, loops); } } } // Return the set of locations accessed by |stores| and |loads|. std::set<Instruction*> GetLocationsAccessed( const std::map<Instruction*, std::vector<Instruction*>>& stores, const std::map<Instruction*, std::vector<Instruction*>>& loads) { std::set<Instruction*> locations{}; for (const auto& kv : stores) { locations.insert(std::get<0>(kv)); } for (const auto& kv : loads) { locations.insert(std::get<0>(kv)); } return locations; } // Append all dependences from |sources| to |destinations| to |dependences|. void GetDependences(std::vector<DistanceVector>* dependences, LoopDependenceAnalysis* analysis, const std::vector<Instruction*>& sources, const std::vector<Instruction*>& destinations, size_t num_entries) { for (auto source : sources) { for (auto destination : destinations) { DistanceVector dist(num_entries); if (!analysis->GetDependence(source, destination, &dist)) { dependences->push_back(dist); } } } } // Apped all instructions in |block| to |instructions|. void AddInstructionsInBlock(std::vector<Instruction*>* instructions, BasicBlock* block) { for (auto& inst : *block) { instructions->push_back(&inst); } instructions->push_back(block->GetLabelInst()); } } // namespace bool LoopFusion::UsedInContinueOrConditionBlock(Instruction* phi_instruction, Loop* loop) { auto condition_block = loop->FindConditionBlock()->id(); auto continue_block = loop->GetContinueBlock()->id(); auto not_used = context_->get_def_use_mgr()->WhileEachUser( phi_instruction, [this, condition_block, continue_block](Instruction* instruction) { auto block_id = context_->get_instr_block(instruction)->id(); return block_id != condition_block && block_id != continue_block; }); return !not_used; } void LoopFusion::RemoveIfNotUsedContinueOrConditionBlock( std::vector<Instruction*>* instructions, Loop* loop) { instructions->erase( std::remove_if(std::begin(*instructions), std::end(*instructions), [this, loop](Instruction* instruction) { return !UsedInContinueOrConditionBlock(instruction, loop); }), std::end(*instructions)); } bool LoopFusion::AreCompatible() { // Check that the loops are in the same function. if (loop_0_->GetHeaderBlock()->GetParent() != loop_1_->GetHeaderBlock()->GetParent()) { return false; } // Check that both loops have pre-header blocks. if (!loop_0_->GetPreHeaderBlock() || !loop_1_->GetPreHeaderBlock()) { return false; } // Check there are no breaks. if (context_->cfg()->preds(loop_0_->GetMergeBlock()->id()).size() != 1 || context_->cfg()->preds(loop_1_->GetMergeBlock()->id()).size() != 1) { return false; } // Check there are no continues. if (context_->cfg()->preds(loop_0_->GetContinueBlock()->id()).size() != 1 || context_->cfg()->preds(loop_1_->GetContinueBlock()->id()).size() != 1) { return false; } // |GetInductionVariables| returns all OpPhi in the header. Check that both // loops have exactly one that is used in the continue and condition blocks. std::vector<Instruction*> inductions_0{}, inductions_1{}; loop_0_->GetInductionVariables(inductions_0); RemoveIfNotUsedContinueOrConditionBlock(&inductions_0, loop_0_); if (inductions_0.size() != 1) { return false; } induction_0_ = inductions_0.front(); loop_1_->GetInductionVariables(inductions_1); RemoveIfNotUsedContinueOrConditionBlock(&inductions_1, loop_1_); if (inductions_1.size() != 1) { return false; } induction_1_ = inductions_1.front(); if (!CheckInit()) { return false; } if (!CheckCondition()) { return false; } if (!CheckStep()) { return false; } // Check adjacency, |loop_0_| should come just before |loop_1_|. // There is always at least one block between loops, even if it's empty. // We'll check at most 2 preceeding blocks. auto pre_header_1 = loop_1_->GetPreHeaderBlock(); std::vector<BasicBlock*> block_to_check{}; block_to_check.push_back(pre_header_1); if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) { // Follow CFG for one more block. auto preds = context_->cfg()->preds(pre_header_1->id()); if (preds.size() == 1) { auto block = &*containing_function_->FindBlock(preds.front()); if (block == loop_0_->GetMergeBlock()) { block_to_check.push_back(block); } else { return false; } } else { return false; } } // Check that the separating blocks are either empty or only contains a store // to a local variable that is never read (left behind by // '--eliminate-local-multi-store'). Also allow OpPhi, since the loop could be // in LCSSA form. for (auto block : block_to_check) { for (auto& inst : *block) { if (inst.opcode() == SpvOpStore) { // Get the definition of the target to check it's function scope so // there are no observable side effects. auto variable = context_->get_def_use_mgr()->GetDef(inst.GetSingleWordInOperand(0)); if (variable->opcode() != SpvOpVariable || variable->GetSingleWordInOperand(0) != SpvStorageClassFunction) { return false; } // Check the target is never loaded. auto is_used = false; context_->get_def_use_mgr()->ForEachUse( inst.GetSingleWordInOperand(0), [&is_used](Instruction* use_inst, uint32_t) { if (use_inst->opcode() == SpvOpLoad) { is_used = true; } }); if (is_used) { return false; } } else if (inst.opcode() == SpvOpPhi) { if (inst.NumInOperands() != 2) { return false; } } else if (inst.opcode() != SpvOpBranch) { return false; } } } return true; } // namespace opt bool LoopFusion::ContainsBarriersOrFunctionCalls(Loop* loop) { for (const auto& block : loop->GetBlocks()) { for (const auto& inst : *containing_function_->FindBlock(block)) { auto opcode = inst.opcode(); if (opcode == SpvOpFunctionCall || opcode == SpvOpControlBarrier || opcode == SpvOpMemoryBarrier || opcode == SpvOpTypeNamedBarrier || opcode == SpvOpNamedBarrierInitialize || opcode == SpvOpMemoryNamedBarrier) { return true; } } } return false; } bool LoopFusion::CheckInit() { int64_t loop_0_init; if (!loop_0_->GetInductionInitValue(induction_0_, &loop_0_init)) { return false; } int64_t loop_1_init; if (!loop_1_->GetInductionInitValue(induction_1_, &loop_1_init)) { return false; } if (loop_0_init != loop_1_init) { return false; } return true; } bool LoopFusion::CheckCondition() { auto condition_0 = loop_0_->GetConditionInst(); auto condition_1 = loop_1_->GetConditionInst(); if (!loop_0_->IsSupportedCondition(condition_0->opcode()) || !loop_1_->IsSupportedCondition(condition_1->opcode())) { return false; } if (condition_0->opcode() != condition_1->opcode()) { return false; } for (uint32_t i = 0; i < condition_0->NumInOperandWords(); ++i) { auto arg_0 = context_->get_def_use_mgr()->GetDef( condition_0->GetSingleWordInOperand(i)); auto arg_1 = context_->get_def_use_mgr()->GetDef( condition_1->GetSingleWordInOperand(i)); if (arg_0 == induction_0_ && arg_1 == induction_1_) { continue; } if (arg_0 == induction_0_ && arg_1 != induction_1_) { return false; } if (arg_1 == induction_1_ && arg_0 != induction_0_) { return false; } if (arg_0 != arg_1) { return false; } } return true; } bool LoopFusion::CheckStep() { auto scalar_analysis = context_->GetScalarEvolutionAnalysis(); SENode* induction_node_0 = scalar_analysis->SimplifyExpression( scalar_analysis->AnalyzeInstruction(induction_0_)); if (!induction_node_0->AsSERecurrentNode()) { return false; } SENode* induction_step_0 = induction_node_0->AsSERecurrentNode()->GetCoefficient(); if (!induction_step_0->AsSEConstantNode()) { return false; } SENode* induction_node_1 = scalar_analysis->SimplifyExpression( scalar_analysis->AnalyzeInstruction(induction_1_)); if (!induction_node_1->AsSERecurrentNode()) { return false; } SENode* induction_step_1 = induction_node_1->AsSERecurrentNode()->GetCoefficient(); if (!induction_step_1->AsSEConstantNode()) { return false; } if (*induction_step_0 != *induction_step_1) { return false; } return true; } std::map<Instruction*, std::vector<Instruction*>> LoopFusion::LocationToMemOps( const std::vector<Instruction*>& mem_ops) { std::map<Instruction*, std::vector<Instruction*>> location_map{}; for (auto instruction : mem_ops) { auto access_location = context_->get_def_use_mgr()->GetDef( instruction->GetSingleWordInOperand(0)); while (access_location->opcode() == SpvOpAccessChain) { access_location = context_->get_def_use_mgr()->GetDef( access_location->GetSingleWordInOperand(0)); } location_map[access_location].push_back(instruction); } return location_map; } std::pair<std::vector<Instruction*>, std::vector<Instruction*>> LoopFusion::GetLoadsAndStoresInLoop(Loop* loop) { std::vector<Instruction*> loads{}; std::vector<Instruction*> stores{}; for (auto block_id : loop->GetBlocks()) { if (block_id == loop->GetContinueBlock()->id()) { continue; } for (auto& instruction : *containing_function_->FindBlock(block_id)) { if (instruction.opcode() == SpvOpLoad) { loads.push_back(&instruction); } else if (instruction.opcode() == SpvOpStore) { stores.push_back(&instruction); } } } return std::make_pair(loads, stores); } bool LoopFusion::IsUsedInLoop(Instruction* instruction, Loop* loop) { auto not_used = context_->get_def_use_mgr()->WhileEachUser( instruction, [this, loop](Instruction* user) { auto block_id = context_->get_instr_block(user)->id(); return !loop->IsInsideLoop(block_id); }); return !not_used; } bool LoopFusion::IsLegal() { assert(AreCompatible() && "Fusion can't be legal, loops are not compatible."); // Bail out if there are function calls as they could have side-effects that // cause dependencies or if there are any barriers. if (ContainsBarriersOrFunctionCalls(loop_0_) || ContainsBarriersOrFunctionCalls(loop_1_)) { return false; } std::vector<Instruction*> phi_instructions{}; loop_0_->GetInductionVariables(phi_instructions); // Check no OpPhi in |loop_0_| is used in |loop_1_|. for (auto phi_instruction : phi_instructions) { if (IsUsedInLoop(phi_instruction, loop_1_)) { return false; } } // Check no LCSSA OpPhi in merge block of |loop_0_| is used in |loop_1_|. auto phi_used = false; loop_0_->GetMergeBlock()->ForEachPhiInst( [this, &phi_used](Instruction* phi_instruction) { phi_used |= IsUsedInLoop(phi_instruction, loop_1_); }); if (phi_used) { return false; } // Grab loads & stores from both loops. auto loads_stores_0 = GetLoadsAndStoresInLoop(loop_0_); auto loads_stores_1 = GetLoadsAndStoresInLoop(loop_1_); // Build memory location to operation maps. auto load_locs_0 = LocationToMemOps(std::get<0>(loads_stores_0)); auto store_locs_0 = LocationToMemOps(std::get<1>(loads_stores_0)); auto load_locs_1 = LocationToMemOps(std::get<0>(loads_stores_1)); auto store_locs_1 = LocationToMemOps(std::get<1>(loads_stores_1)); // Get the locations accessed in both loops. auto locations_0 = GetLocationsAccessed(store_locs_0, load_locs_0); auto locations_1 = GetLocationsAccessed(store_locs_1, load_locs_1); std::vector<Instruction*> potential_clashes{}; std::set_intersection(std::begin(locations_0), std::end(locations_0), std::begin(locations_1), std::end(locations_1), std::back_inserter(potential_clashes)); // If the loops don't access the same variables, the fusion is legal. if (potential_clashes.empty()) { return true; } // Find variables that have at least one store. std::vector<Instruction*> potential_clashes_with_stores{}; for (auto location : potential_clashes) { if (store_locs_0.find(location) != std::end(store_locs_0) || store_locs_1.find(location) != std::end(store_locs_1)) { potential_clashes_with_stores.push_back(location); } } // If there are only loads to the same variables, the fusion is legal. if (potential_clashes_with_stores.empty()) { return true; } // Else if loads and at least one store (across loops) to the same variable // there is a potential dependence and we need to check the dependence // distance. // Find all the loops in this loop nest for the dependency analysis. std::vector<const Loop*> loops{}; // Find the parents. for (auto current_loop = loop_0_; current_loop != nullptr; current_loop = current_loop->GetParent()) { loops.push_back(current_loop); } auto this_loop_position = loops.size() - 1; std::reverse(std::begin(loops), std::end(loops)); // Find the children. CollectChildren(loop_0_, &loops); CollectChildren(loop_1_, &loops); // Check that any dependes created are legal. That means the fused loops do // not have any dependencies with dependence distance greater than 0 that did // not exist in the original loops. LoopDependenceAnalysis analysis(context_, loops); analysis.GetScalarEvolution()->AddLoopsToPretendAreTheSame( {loop_0_, loop_1_}); for (auto location : potential_clashes_with_stores) { // Analyse dependences from |loop_0_| to |loop_1_|. std::vector<DistanceVector> dependences; // Read-After-Write. GetDependences(&dependences, &analysis, store_locs_0[location], load_locs_1[location], loops.size()); // Write-After-Read. GetDependences(&dependences, &analysis, load_locs_0[location], store_locs_1[location], loops.size()); // Write-After-Write. GetDependences(&dependences, &analysis, store_locs_0[location], store_locs_1[location], loops.size()); // Check that the induction variables either don't appear in the subscripts // or the dependence distance is negative. for (const auto& dependence : dependences) { const auto& entry = dependence.GetEntries()[this_loop_position]; if ((entry.dependence_information == DistanceEntry::DependenceInformation::DISTANCE && entry.distance < 1) || (entry.dependence_information == DistanceEntry::DependenceInformation::IRRELEVANT)) { continue; } else { return false; } } } return true; } void ReplacePhiParentWith(Instruction* inst, uint32_t orig_block, uint32_t new_block) { if (inst->GetSingleWordInOperand(1) == orig_block) { inst->SetInOperand(1, {new_block}); } else { inst->SetInOperand(3, {new_block}); } } void LoopFusion::Fuse() { assert(AreCompatible() && "Can't fuse, loops aren't compatible"); assert(IsLegal() && "Can't fuse, illegal"); // Save the pointers/ids, won't be found in the middle of doing modifications. auto header_1 = loop_1_->GetHeaderBlock()->id(); auto condition_1 = loop_1_->FindConditionBlock()->id(); auto continue_1 = loop_1_->GetContinueBlock()->id(); auto continue_0 = loop_0_->GetContinueBlock()->id(); auto condition_block_of_0 = loop_0_->FindConditionBlock(); // Find the blocks whose branches need updating. auto first_block_of_1 = &*(++containing_function_->FindBlock(condition_1)); auto last_block_of_1 = &*(--containing_function_->FindBlock(continue_1)); auto last_block_of_0 = &*(--containing_function_->FindBlock(continue_0)); // Update the branch for |last_block_of_loop_0| to go to |first_block_of_1|. last_block_of_0->ForEachSuccessorLabel( [first_block_of_1](uint32_t* succ) { *succ = first_block_of_1->id(); }); // Update the branch for the |last_block_of_loop_1| to go to the continue // block of |loop_0_|. last_block_of_1->ForEachSuccessorLabel( [this](uint32_t* succ) { *succ = loop_0_->GetContinueBlock()->id(); }); // Update merge block id in the header of |loop_0_| to the merge block of // |loop_1_|. loop_0_->GetHeaderBlock()->ForEachInst([this](Instruction* inst) { if (inst->opcode() == SpvOpLoopMerge) { inst->SetInOperand(0, {loop_1_->GetMergeBlock()->id()}); } }); // Update condition branch target in |loop_0_| to the merge block of // |loop_1_|. condition_block_of_0->ForEachInst([this](Instruction* inst) { if (inst->opcode() == SpvOpBranchConditional) { auto loop_0_merge_block_id = loop_0_->GetMergeBlock()->id(); if (inst->GetSingleWordInOperand(1) == loop_0_merge_block_id) { inst->SetInOperand(1, {loop_1_->GetMergeBlock()->id()}); } else { inst->SetInOperand(2, {loop_1_->GetMergeBlock()->id()}); } } }); // Move OpPhi instructions not corresponding to the induction variable from // the header of |loop_1_| to the header of |loop_0_|. std::vector<Instruction*> instructions_to_move{}; for (auto& instruction : *loop_1_->GetHeaderBlock()) { if (instruction.opcode() == SpvOpPhi && &instruction != induction_1_) { instructions_to_move.push_back(&instruction); } } for (auto& it : instructions_to_move) { it->RemoveFromList(); it->InsertBefore(induction_0_); } // Update the OpPhi parents to the correct blocks in |loop_0_|. loop_0_->GetHeaderBlock()->ForEachPhiInst([this](Instruction* i) { ReplacePhiParentWith(i, loop_1_->GetPreHeaderBlock()->id(), loop_0_->GetPreHeaderBlock()->id()); ReplacePhiParentWith(i, loop_1_->GetContinueBlock()->id(), loop_0_->GetContinueBlock()->id()); }); // Update instruction to block mapping & DefUseManager. for (auto& phi_instruction : instructions_to_move) { context_->set_instr_block(phi_instruction, loop_0_->GetHeaderBlock()); context_->get_def_use_mgr()->AnalyzeInstUse(phi_instruction); } // Replace the uses of the induction variable of |loop_1_| with that the // induction variable of |loop_0_|. context_->ReplaceAllUsesWith(induction_1_->result_id(), induction_0_->result_id()); // Replace LCSSA OpPhi in merge block of |loop_0_|. loop_0_->GetMergeBlock()->ForEachPhiInst([this](Instruction* instruction) { context_->ReplaceAllUsesWith(instruction->result_id(), instruction->GetSingleWordInOperand(0)); }); // Update LCSSA OpPhi in merge block of |loop_1_|. loop_1_->GetMergeBlock()->ForEachPhiInst( [condition_block_of_0](Instruction* instruction) { instruction->SetInOperand(1, {condition_block_of_0->id()}); }); // Move the continue block of |loop_0_| after the last block of |loop_1_|. containing_function_->MoveBasicBlockToAfter(continue_0, last_block_of_1); // Gather all instructions to be killed from |loop_1_| (induction variable // initialisation, header, condition and continue blocks). std::vector<Instruction*> instr_to_delete{}; AddInstructionsInBlock(&instr_to_delete, loop_1_->GetPreHeaderBlock()); AddInstructionsInBlock(&instr_to_delete, loop_1_->GetHeaderBlock()); AddInstructionsInBlock(&instr_to_delete, loop_1_->FindConditionBlock()); AddInstructionsInBlock(&instr_to_delete, loop_1_->GetContinueBlock()); // There was an additional empty block between the loops, kill that too. if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) { AddInstructionsInBlock(&instr_to_delete, loop_0_->GetMergeBlock()); } // Update the CFG, so it wouldn't need invalidating. auto cfg = context_->cfg(); cfg->ForgetBlock(loop_1_->GetPreHeaderBlock()); cfg->ForgetBlock(loop_1_->GetHeaderBlock()); cfg->ForgetBlock(loop_1_->FindConditionBlock()); cfg->ForgetBlock(loop_1_->GetContinueBlock()); if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) { cfg->ForgetBlock(loop_0_->GetMergeBlock()); } cfg->RemoveEdge(last_block_of_0->id(), loop_0_->GetContinueBlock()->id()); cfg->AddEdge(last_block_of_0->id(), first_block_of_1->id()); cfg->AddEdge(last_block_of_1->id(), loop_0_->GetContinueBlock()->id()); cfg->AddEdge(loop_0_->GetContinueBlock()->id(), loop_1_->GetHeaderBlock()->id()); cfg->AddEdge(condition_block_of_0->id(), loop_1_->GetMergeBlock()->id()); // Update DefUseManager. auto def_use_mgr = context_->get_def_use_mgr(); // Uses of labels that are in updated branches need analysing. def_use_mgr->AnalyzeInstUse(last_block_of_0->terminator()); def_use_mgr->AnalyzeInstUse(last_block_of_1->terminator()); def_use_mgr->AnalyzeInstUse(loop_0_->GetHeaderBlock()->GetLoopMergeInst()); def_use_mgr->AnalyzeInstUse(condition_block_of_0->terminator()); // Update the LoopDescriptor, so it wouldn't need invalidating. auto ld = context_->GetLoopDescriptor(containing_function_); // Create a copy, so the iterator wouldn't be invalidated. std::vector<Loop*> loops_to_add_remove{}; for (auto child_loop : *loop_1_) { loops_to_add_remove.push_back(child_loop); } for (auto child_loop : loops_to_add_remove) { loop_1_->RemoveChildLoop(child_loop); loop_0_->AddNestedLoop(child_loop); } auto loop_1_blocks = loop_1_->GetBlocks(); for (auto block : loop_1_blocks) { loop_1_->RemoveBasicBlock(block); if (block != header_1 && block != condition_1 && block != continue_1) { loop_0_->AddBasicBlock(block); if ((*ld)[block] == loop_1_) { ld->SetBasicBlockToLoop(block, loop_0_); } } if ((*ld)[block] == loop_1_) { ld->ForgetBasicBlock(block); } } loop_1_->RemoveBasicBlock(loop_1_->GetPreHeaderBlock()->id()); ld->ForgetBasicBlock(loop_1_->GetPreHeaderBlock()->id()); if (loop_0_->GetMergeBlock() != loop_1_->GetPreHeaderBlock()) { loop_0_->RemoveBasicBlock(loop_0_->GetMergeBlock()->id()); ld->ForgetBasicBlock(loop_0_->GetMergeBlock()->id()); } loop_0_->SetMergeBlock(loop_1_->GetMergeBlock()); loop_1_->ClearBlocks(); ld->RemoveLoop(loop_1_); // Kill unnessecary instructions and remove all empty blocks. for (auto inst : instr_to_delete) { context_->KillInst(inst); } containing_function_->RemoveEmptyBlocks(); // Invalidate analyses. context_->InvalidateAnalysesExceptFor( IRContext::Analysis::kAnalysisInstrToBlockMapping | IRContext::Analysis::kAnalysisLoopAnalysis | IRContext::Analysis::kAnalysisDefUse | IRContext::Analysis::kAnalysisCFG); } } // namespace opt } // namespace spvtools