/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
#include "induction_var_range.h"
#include "nodes.h"
#include "optimization.h"
namespace art {
class CompilerDriver;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
* the detected nested loops, such as removal of dead induction and empty loops
* and inner loop vectorization.
*/
class HLoopOptimization : public HOptimization {
public:
HLoopOptimization(HGraph* graph,
CompilerDriver* compiler_driver,
HInductionVarAnalysis* induction_analysis);
void Run() OVERRIDE;
static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
private:
/**
* A single loop inside the loop hierarchy representation.
*/
struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
explicit LoopNode(HLoopInformation* lp_info)
: loop_info(lp_info),
outer(nullptr),
inner(nullptr),
previous(nullptr),
next(nullptr) {}
HLoopInformation* loop_info;
LoopNode* outer;
LoopNode* inner;
LoopNode* previous;
LoopNode* next;
};
/*
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
kNone = 0, // no restrictions
kNoMul = 1, // no multiplication
kNoDiv = 2, // no division
kNoShift = 4, // no shift
kNoShr = 8, // no arithmetic shift right
kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
kNoSignedHAdd = 32, // no signed halving add
kNoUnroundedHAdd = 64, // no unrounded halving add
kNoAbs = 128, // no absolute value
};
/*
* Vectorization mode during synthesis
* (sequential peeling/cleanup loop or vector loop).
*/
enum VectorMode {
kSequential,
kVector
};
/*
* Representation of a unit-stride array reference.
*/
struct ArrayReference {
ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
: base(b), offset(o), type(t), lhs(l) { }
bool operator<(const ArrayReference& other) const {
return
(base < other.base) ||
(base == other.base &&
(offset < other.offset || (offset == other.offset &&
(type < other.type ||
(type == other.type && lhs < other.lhs)))));
}
HInstruction* base; // base address
HInstruction* offset; // offset + i
Primitive::Type type; // component type
bool lhs; // def/use
};
// Loop setup and traversal.
void LocalRun();
void AddLoop(HLoopInformation* loop_info);
void RemoveLoop(LoopNode* node);
void TraverseLoopsInnerToOuter(LoopNode* node);
// Optimization.
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
void OptimizeInnerLoop(LoopNode* node);
// Vectorization analysis and synthesis.
bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
void GenerateNewLoop(LoopNode* node,
HBasicBlock* block,
HBasicBlock* new_preheader,
HInstruction* lo,
HInstruction* hi,
HInstruction* step);
bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
bool VectorizeUse(LoopNode* node,
HInstruction* instruction,
bool generate_code,
Primitive::Type type,
uint64_t restrictions);
bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
bool TrySetVectorLength(uint32_t length);
void GenerateVecInv(HInstruction* org, Primitive::Type type);
void GenerateVecSub(HInstruction* org, HInstruction* off);
void GenerateVecMem(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
Primitive::Type type);
void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
// Vectorization idioms.
bool VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* instruction,
bool generate_code,
Primitive::Type type,
uint64_t restrictions);
// Helpers.
bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
bool TrySetSimpleLoopHeader(HBasicBlock* block);
bool IsEmptyBody(HBasicBlock* block);
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
bool collect_loop_uses,
/*out*/ int32_t* use_count);
bool IsUsedOutsideLoop(HLoopInformation* loop_info,
HInstruction* instruction);
bool TryReplaceWithLastValue(HLoopInformation* loop_info,
HInstruction* instruction,
HBasicBlock* block);
bool TryAssignLastValue(HLoopInformation* loop_info,
HInstruction* instruction,
HBasicBlock* block,
bool collect_loop_uses);
void RemoveDeadInstructions(const HInstructionList& list);
bool CanRemoveCycle(); // Whether the current 'iset_' is removable.
// Compiler driver (to query ISA features).
const CompilerDriver* compiler_driver_;
// Range information based on prior induction variable analysis.
InductionVarRange induction_range_;
// Phase-local heap memory allocator for the loop optimizer. Storage obtained
// through this allocator is immediately released when the loop optimizer is done.
ArenaAllocator* loop_allocator_;
// Global heap memory allocator. Used to build HIR.
ArenaAllocator* global_allocator_;
// Entries into the loop hierarchy representation. The hierarchy resides
// in phase-local heap memory.
LoopNode* top_loop_;
LoopNode* last_loop_;
// Temporary bookkeeping of a set of instructions.
// Contents reside in phase-local heap memory.
ArenaSet<HInstruction*>* iset_;
// Counter that tracks how many induction cycles have been simplified. Useful
// to trigger incremental updates of induction variable analysis of outer loops
// when the induction of inner loops has changed.
uint32_t induction_simplication_count_;
// Flag that tracks if any simplifications have occurred.
bool simplified_;
// Number of "lanes" for selected packed type.
uint32_t vector_length_;
// Set of array references in the vector loop.
// Contents reside in phase-local heap memory.
ArenaSet<ArrayReference>* vector_refs_;
// Mapping used during vectorization synthesis for both the scalar peeling/cleanup
// loop (simd_ is false) and the actual vector loop (simd_ is true). The data
// structure maps original instructions into the new instructions.
// Contents reside in phase-local heap memory.
ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
// Temporary vectorization bookkeeping.
HBasicBlock* vector_preheader_; // preheader of the new loop
HBasicBlock* vector_header_; // header of the new loop
HBasicBlock* vector_body_; // body of the new loop
HInstruction* vector_runtime_test_a_;
HInstruction* vector_runtime_test_b_; // defines a != b runtime test
HPhi* vector_phi_; // the Phi representing the normalized loop index
VectorMode vector_mode_; // selects synthesis mode
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
};
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_