/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "gvn.h" #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "side_effects_analysis.h" #include "utils.h" namespace art { /** * A ValueSet holds instructions that can replace other instructions. It is updated * through the `Add` method, and the `Kill` method. The `Kill` method removes * instructions that are affected by the given side effect. * * The `Lookup` method returns an equivalent instruction to the given instruction * if there is one in the set. In GVN, we would say those instructions have the * same "number". */ class ValueSet : public ArenaObject<kArenaAllocGvn> { public: // Constructs an empty ValueSet which owns all its buckets. explicit ValueSet(ArenaAllocator* allocator) : allocator_(allocator), num_buckets_(kMinimumNumberOfBuckets), buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), num_entries_(0u) { // ArenaAllocator returns zeroed memory, so no need to set buckets to null. DCHECK(IsPowerOfTwo(num_buckets_)); buckets_owned_.SetInitialBits(num_buckets_); } // Copy constructor. Depending on the load factor, it will either make a deep // copy (all buckets owned) or a shallow one (buckets pointing to the parent). ValueSet(ArenaAllocator* allocator, const ValueSet& other) : allocator_(allocator), num_buckets_(other.IdealBucketCount()), buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), num_entries_(0u) { // ArenaAllocator returns zeroed memory, so entries of buckets_ and // buckets_owned_ are initialized to null and false, respectively. DCHECK(IsPowerOfTwo(num_buckets_)); PopulateFromInternal(other, /* is_dirty */ false); } // Erases all values in this set and populates it with values from `other`. void PopulateFrom(const ValueSet& other) { if (this == &other) { return; } PopulateFromInternal(other, /* is_dirty */ true); } // Returns true if `this` has enough buckets so that if `other` is copied into // it, the load factor will not cross the upper threshold. // If `exact_match` is set, true is returned only if `this` has the ideal // number of buckets. Larger number of buckets is allowed otherwise. bool CanHoldCopyOf(const ValueSet& other, bool exact_match) { if (exact_match) { return other.IdealBucketCount() == num_buckets_; } else { return other.IdealBucketCount() <= num_buckets_; } } // Adds an instruction in the set. void Add(HInstruction* instruction) { DCHECK(Lookup(instruction) == nullptr); size_t hash_code = HashCode(instruction); size_t index = BucketIndex(hash_code); if (!buckets_owned_.IsBitSet(index)) { CloneBucket(index); } buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]); ++num_entries_; } // If in the set, returns an equivalent instruction to the given instruction. // Returns null otherwise. HInstruction* Lookup(HInstruction* instruction) const { size_t hash_code = HashCode(instruction); size_t index = BucketIndex(hash_code); for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { if (node->GetHashCode() == hash_code) { HInstruction* existing = node->GetInstruction(); if (existing->Equals(instruction)) { return existing; } } } return nullptr; } // Returns whether instruction is in the set. bool Contains(HInstruction* instruction) const { size_t hash_code = HashCode(instruction); size_t index = BucketIndex(hash_code); for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { if (node->GetInstruction() == instruction) { return true; } } return false; } // Removes all instructions in the set affected by the given side effects. void Kill(SideEffects side_effects) { DeleteAllImpureWhich([side_effects](Node* node) { return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects); }); } void Clear() { num_entries_ = 0; for (size_t i = 0; i < num_buckets_; ++i) { buckets_[i] = nullptr; } buckets_owned_.SetInitialBits(num_buckets_); } // Updates this set by intersecting with instructions in a predecessor's set. void IntersectWith(ValueSet* predecessor) { if (IsEmpty()) { return; } else if (predecessor->IsEmpty()) { Clear(); } else { // Pure instructions do not need to be tested because only impure // instructions can be killed. DeleteAllImpureWhich([predecessor](Node* node) { return !predecessor->Contains(node->GetInstruction()); }); } } bool IsEmpty() const { return num_entries_ == 0; } size_t GetNumberOfEntries() const { return num_entries_; } private: // Copies all entries from `other` to `this`. // If `is_dirty` is set to true, existing data will be wiped first. It is // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise. void PopulateFromInternal(const ValueSet& other, bool is_dirty) { DCHECK_NE(this, &other); DCHECK_GE(num_buckets_, other.IdealBucketCount()); if (num_buckets_ == other.num_buckets_) { // Hash table remains the same size. We copy the bucket pointers and leave // all buckets_owned_ bits false. if (is_dirty) { buckets_owned_.ClearAllBits(); } else { DCHECK_EQ(buckets_owned_.NumSetBits(), 0u); } memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*)); } else { // Hash table size changes. We copy and rehash all entries, and set all // buckets_owned_ bits to true. if (is_dirty) { memset(buckets_, 0, num_buckets_ * sizeof(Node*)); } else { if (kIsDebugBuild) { for (size_t i = 0; i < num_buckets_; ++i) { DCHECK(buckets_[i] == nullptr) << i; } } } for (size_t i = 0; i < other.num_buckets_; ++i) { for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) { size_t new_index = BucketIndex(node->GetHashCode()); buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]); } } buckets_owned_.SetInitialBits(num_buckets_); } num_entries_ = other.num_entries_; } class Node : public ArenaObject<kArenaAllocGvn> { public: Node(HInstruction* instruction, size_t hash_code, Node* next) : instruction_(instruction), hash_code_(hash_code), next_(next) {} size_t GetHashCode() const { return hash_code_; } HInstruction* GetInstruction() const { return instruction_; } Node* GetNext() const { return next_; } void SetNext(Node* node) { next_ = node; } Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) { return new (allocator) Node(instruction_, hash_code_, new_next); } private: HInstruction* const instruction_; const size_t hash_code_; Node* next_; DISALLOW_COPY_AND_ASSIGN(Node); }; // Creates our own copy of a bucket that is currently pointing to a parent. // This algorithm can be called while iterating over the bucket because it // preserves the order of entries in the bucket and will return the clone of // the given 'iterator'. Node* CloneBucket(size_t index, Node* iterator = nullptr) { DCHECK(!buckets_owned_.IsBitSet(index)); Node* clone_current = nullptr; Node* clone_previous = nullptr; Node* clone_iterator = nullptr; for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { clone_current = node->Dup(allocator_, nullptr); if (node == iterator) { clone_iterator = clone_current; } if (clone_previous == nullptr) { buckets_[index] = clone_current; } else { clone_previous->SetNext(clone_current); } clone_previous = clone_current; } buckets_owned_.SetBit(index); return clone_iterator; } // Iterates over buckets with impure instructions (even indices) and deletes // the ones on which 'cond' returns true. template<typename Functor> void DeleteAllImpureWhich(Functor cond) { for (size_t i = 0; i < num_buckets_; i += 2) { Node* node = buckets_[i]; Node* previous = nullptr; if (node == nullptr) { continue; } if (!buckets_owned_.IsBitSet(i)) { // Bucket is not owned but maybe we won't need to change it at all. // Iterate as long as the entries don't satisfy 'cond'. while (node != nullptr) { if (cond(node)) { // We do need to delete an entry but we do not own the bucket. // Clone the bucket, make sure 'previous' and 'node' point to // the cloned entries and break. previous = CloneBucket(i, previous); node = (previous == nullptr) ? buckets_[i] : previous->GetNext(); break; } previous = node; node = node->GetNext(); } } // By this point we either own the bucket and can start deleting entries, // or we do not own it but no entries matched 'cond'. DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr); // We iterate over the remainder of entries and delete those that match // the given condition. while (node != nullptr) { Node* next = node->GetNext(); if (cond(node)) { if (previous == nullptr) { buckets_[i] = next; } else { previous->SetNext(next); } } else { previous = node; } node = next; } } } // Computes a bucket count such that the load factor is reasonable. // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2. size_t IdealBucketCount() const { size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1)); if (bucket_count > kMinimumNumberOfBuckets) { return bucket_count; } else { return kMinimumNumberOfBuckets; } } // Generates a hash code for an instruction. size_t HashCode(HInstruction* instruction) const { size_t hash_code = instruction->ComputeHashCode(); // Pure instructions are put into odd buckets to speed up deletion. Note that in the // case of irreducible loops, we don't put pure instructions in odd buckets, as we // need to delete them when entering the loop. if (instruction->GetSideEffects().HasDependencies() || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { return (hash_code << 1) | 0; } else { return (hash_code << 1) | 1; } } // Converts a hash code to a bucket index. size_t BucketIndex(size_t hash_code) const { return hash_code & (num_buckets_ - 1); } ArenaAllocator* const allocator_; // The internal bucket implementation of the set. size_t const num_buckets_; Node** const buckets_; // Flags specifying which buckets were copied into the set from its parent. // If a flag is not set, the corresponding bucket points to entries in the // parent and must be cloned prior to making changes. ArenaBitVector buckets_owned_; // The number of entries in the set. size_t num_entries_; static constexpr size_t kMinimumNumberOfBuckets = 8; DISALLOW_COPY_AND_ASSIGN(ValueSet); }; /** * Optimization phase that removes redundant instruction. */ class GlobalValueNumberer : public ValueObject { public: GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph, const SideEffectsAnalysis& side_effects) : graph_(graph), allocator_(allocator), side_effects_(side_effects), sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)), visited_blocks_( allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {} void Run(); private: // Per-block GVN. Will also update the ValueSet of the dominated and // successor blocks. void VisitBasicBlock(HBasicBlock* block); HGraph* graph_; ArenaAllocator* const allocator_; const SideEffectsAnalysis& side_effects_; ValueSet* FindSetFor(HBasicBlock* block) const { ValueSet* result = sets_[block->GetBlockId()]; DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId(); return result; } void AbandonSetFor(HBasicBlock* block) { DCHECK(sets_[block->GetBlockId()] != nullptr) << "Block B" << block->GetBlockId() << " expected to have a set"; sets_[block->GetBlockId()] = nullptr; } // Returns false if the GlobalValueNumberer has already visited all blocks // which may reference `block`. bool WillBeReferencedAgain(HBasicBlock* block) const; // Iterates over visited blocks and finds one which has a ValueSet such that: // (a) it will not be referenced in the future, and // (b) it can hold a copy of `reference_set` with a reasonable load factor. HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block, const ValueSet& reference_set) const; // ValueSet for blocks. Initially null, but for an individual block they // are allocated and populated by the dominator, and updated by all blocks // in the path from the dominator to the block. ArenaVector<ValueSet*> sets_; // BitVector which serves as a fast-access map from block id to // visited/unvisited Boolean. ArenaBitVector visited_blocks_; DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); }; void GlobalValueNumberer::Run() { DCHECK(side_effects_.HasRun()); sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_); // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. for (HBasicBlock* block : graph_->GetReversePostOrder()) { VisitBasicBlock(block); } } void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { ValueSet* set = nullptr; const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) { // The entry block should only accumulate constant instructions, and // the builder puts constants only in the entry block. // Therefore, there is no need to propagate the value set to the next block. set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); ValueSet* dominator_set = FindSetFor(dominator); if (dominator->GetSuccessors().size() == 1) { // `block` is a direct successor of its dominator. No need to clone the // dominator's set, `block` can take over its ownership including its buckets. DCHECK_EQ(dominator->GetSingleSuccessor(), block); AbandonSetFor(dominator); set = dominator_set; } else { // Try to find a basic block which will never be referenced again and whose // ValueSet can therefore be recycled. We will need to copy `dominator_set` // into the recycled set, so we pass `dominator_set` as a reference for size. HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set); if (recyclable == nullptr) { // No block with a suitable ValueSet found. Allocate a new one and // copy `dominator_set` into it. set = new (allocator_) ValueSet(allocator_, *dominator_set); } else { // Block with a recyclable ValueSet found. Clone `dominator_set` into it. set = FindSetFor(recyclable); AbandonSetFor(recyclable); set->PopulateFrom(*dominator_set); } } if (!set->IsEmpty()) { if (block->IsLoopHeader()) { if (block->GetLoopInformation()->ContainsIrreducibleLoop()) { // To satisfy our linear scan algorithm, no instruction should flow in an irreducible // loop header. We clear the set at entry of irreducible loops and any loop containing // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction // across the irreducible loop. // Note that, if we're not compiling OSR, we could still do GVN and introduce // phis at irreducible loop headers. We decided it was not worth the complexity. set->Clear(); } else { DCHECK(!block->GetLoopInformation()->IsIrreducible()); DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); set->Kill(side_effects_.GetLoopEffects(block)); } } else if (predecessors.size() > 1) { for (HBasicBlock* predecessor : predecessors) { set->IntersectWith(FindSetFor(predecessor)); if (set->IsEmpty()) { break; } } } } } sets_[block->GetBlockId()] = set; HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); // Do not kill the set with the side effects of the instruction just now: if // the instruction is GVN'ed, we don't need to kill. if (current->CanBeMoved()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) // after fixed ordering. current->AsBinaryOperation()->OrderInputs(); } HInstruction* existing = set->Lookup(current); if (existing != nullptr) { // This replacement doesn't make more OrderInputs() necessary since // current is either used by an instruction that it dominates, // which hasn't been visited yet due to the order we visit instructions. // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway. current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { set->Kill(current->GetSideEffects()); set->Add(current); } } else { set->Kill(current->GetSideEffects()); } current = next; } visited_blocks_.SetBit(block->GetBlockId()); } bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); for (auto dominated_block : block->GetDominatedBlocks()) { if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { return true; } } for (auto successor : block->GetSuccessors()) { if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { return true; } } return false; } HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( HBasicBlock* block, const ValueSet& reference_set) const { HBasicBlock* secondary_match = nullptr; for (size_t block_id : visited_blocks_.Indexes()) { ValueSet* current_set = sets_[block_id]; if (current_set == nullptr) { // Set was already recycled. continue; } HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id]; // We test if `current_set` has enough buckets to store a copy of // `reference_set` with a reasonable load factor. If we find a set whose // number of buckets matches perfectly, we return right away. If we find one // that is larger, we return it if no perfectly-matching set is found. // Note that we defer testing WillBeReferencedAgain until all other criteria // have been satisfied because it might be expensive. if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) { if (!WillBeReferencedAgain(current_block)) { return current_block; } } else if (secondary_match == nullptr && current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) { if (!WillBeReferencedAgain(current_block)) { secondary_match = current_block; } } } return secondary_match; } void GVNOptimization::Run() { GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_); gvn.Run(); } } // namespace art