// Copyright (c) 2018 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef SOURCE_OPT_LOOP_UTILS_H_ #define SOURCE_OPT_LOOP_UTILS_H_ #include <list> #include <memory> #include <unordered_map> #include <vector> #include "source/opt/ir_context.h" #include "source/opt/loop_descriptor.h" namespace spvtools { namespace opt { // Class to gather some metrics about a Region Of Interest (ROI). // So far it counts the number of instructions in a ROI (excluding debug // and label instructions) per basic block and in total. struct CodeMetrics { void Analyze(const Loop& loop); // The number of instructions per basic block in the ROI. std::unordered_map<uint32_t, size_t> block_sizes_; // Number of instruction in the ROI. size_t roi_size_; }; // LoopUtils is used to encapsulte loop optimizations and from the passes which // use them. Any pass which needs a loop optimization should do it through this // or through a pass which is using this. class LoopUtils { public: // Holds a auxiliary results of the loop cloning procedure. struct LoopCloningResult { using ValueMapTy = std::unordered_map<uint32_t, uint32_t>; using BlockMapTy = std::unordered_map<uint32_t, BasicBlock*>; using PtrMap = std::unordered_map<Instruction*, Instruction*>; PtrMap ptr_map_; // Mapping between the original loop ids and the new one. ValueMapTy value_map_; // Mapping between original loop blocks to the cloned one. BlockMapTy old_to_new_bb_; // Mapping between the cloned loop blocks to original one. BlockMapTy new_to_old_bb_; // List of cloned basic block. std::vector<std::unique_ptr<BasicBlock>> cloned_bb_; }; LoopUtils(IRContext* context, Loop* loop) : context_(context), loop_desc_( context->GetLoopDescriptor(loop->GetHeaderBlock()->GetParent())), loop_(loop), function_(*loop_->GetHeaderBlock()->GetParent()) {} // The converts the current loop to loop closed SSA form. // In the loop closed SSA, all loop exiting values go through a dedicated Phi // instruction. For instance: // // for (...) { // A1 = ... // if (...) // A2 = ... // A = phi A1, A2 // } // ... = op A ... // // Becomes // // for (...) { // A1 = ... // if (...) // A2 = ... // A = phi A1, A2 // } // C = phi A // ... = op C ... // // This makes some loop transformations (such as loop unswitch) simpler // (removes the needs to take care of exiting variables). void MakeLoopClosedSSA(); // Create dedicate exit basic block. This ensure all exit basic blocks has the // loop as sole predecessors. // By construction, structured control flow already has a dedicated exit // block. // Preserves: CFG, def/use and instruction to block mapping. void CreateLoopDedicatedExits(); // Clone |loop_| and remap its instructions. Newly created blocks // will be added to the |cloning_result.cloned_bb_| list, correctly ordered to // be inserted into a function. // It is assumed that |ordered_loop_blocks| is compatible with the result of // |Loop::ComputeLoopStructuredOrder|. If the preheader and merge block are in // the list they will also be cloned. If not, the resulting loop will share // them with the original loop. // The function preserves the def/use, cfg and instr to block analyses. // The cloned loop nest will be added to the loop descriptor and will have // ownership. Loop* CloneLoop(LoopCloningResult* cloning_result, const std::vector<BasicBlock*>& ordered_loop_blocks) const; // Clone |loop_| and remap its instructions, as above. Overload to compute // loop block ordering within method rather than taking in as parameter. Loop* CloneLoop(LoopCloningResult* cloning_result) const; // Clone the |loop_| and make the new loop branch to the second loop on exit. Loop* CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result); // Perfom a partial unroll of |loop| by given |factor|. This will copy the // body of the loop |factor| times. So a |factor| of one would give a new loop // with the original body plus one unrolled copy body. bool PartiallyUnroll(size_t factor); // Fully unroll |loop|. bool FullyUnroll(); // This function validates that |loop| meets the assumptions made by the // implementation of the loop unroller. As the implementation accommodates // more types of loops this function can reduce its checks. // // The conditions checked to ensure the loop can be unrolled are as follows: // 1. That the loop is in structured order. // 2. That the continue block is a branch to the header. // 3. That the only phi used in the loop is the induction variable. // TODO(stephen@codeplay.com): This is a temporary mesure, after the loop is // converted into LCSAA form and has a single entry and exit we can rewrite // the other phis. // 4. That this is an inner most loop, or that loops contained within this // loop have already been fully unrolled. // 5. That each instruction in the loop is only used within the loop. // (Related to the above phi condition). bool CanPerformUnroll(); // Maintains the loop descriptor object after the unroll functions have been // called, otherwise the analysis should be invalidated. void Finalize(); // Returns the context associate to |loop_|. IRContext* GetContext() { return context_; } // Returns the loop descriptor owning |loop_|. LoopDescriptor* GetLoopDescriptor() { return loop_desc_; } // Returns the loop on which the object operates on. Loop* GetLoop() const { return loop_; } // Returns the function that |loop_| belong to. Function* GetFunction() const { return &function_; } private: IRContext* context_; LoopDescriptor* loop_desc_; Loop* loop_; Function& function_; // Populates the loop nest of |new_loop| according to |loop_| nest. void PopulateLoopNest(Loop* new_loop, const LoopCloningResult& cloning_result) const; // Populates |new_loop| descriptor according to |old_loop|'s one. void PopulateLoopDesc(Loop* new_loop, Loop* old_loop, const LoopCloningResult& cloning_result) const; }; } // namespace opt } // namespace spvtools #endif // SOURCE_OPT_LOOP_UTILS_H_